123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- package main
- import (
- "encoding/json"
- "sort"
- "log"
- "github.com/boltdb/bolt"
- "github.com/xuri/excelize/v2"
- )
- type (
- //SpiderDB 爬虫库,这里模拟真实数据库
- SpiderDb struct {
- db *bolt.DB
- }
- )
- var (
- loginState bool = false
- currentSpiderConfig *SpiderConfig
- )
- // NewSpiderDb
- func NewSpiderDb(dbfile string) *SpiderDb {
- db, err := bolt.Open(dbfile, 0600, nil)
- if err != nil {
- log.Fatal(err)
- }
- err = db.Update(func(tx *bolt.Tx) error {
- _, err := tx.CreateBucketIfNotExists([]byte("myBucket"))
- return err
- })
- if err != nil {
- log.Fatal(err)
- }
- return &SpiderDb{
- db,
- }
- }
- // Close
- func (s *SpiderDb) Close() {
- s.db.Close()
- }
- // CopyAttribute
- func CopyAttribute(dst *string, value1, value2 string) {
- if value1 != "" {
- *dst = value1
- } else if value2 != "" {
- *dst = value2
- }
- }
- // MergeSpiderConfig 合并
- func MergeSpiderConfig(src1, src2 *SpiderConfig) *SpiderConfig {
- nsc := new(SpiderConfig)
- CopyAttribute(&nsc.Code, src2.Code, src1.Code)
- CopyAttribute(&nsc.Site, src2.Site, src1.Site)
- CopyAttribute(&nsc.Channel, src2.Channel, src1.Channel)
- CopyAttribute(&nsc.Url, src2.Url, src1.Url)
- CopyAttribute(&nsc.Author, src2.Author, src1.Author)
- CopyAttribute(&nsc.ListItemCss, src2.ListItemCss, src1.ListItemCss)
- CopyAttribute(&nsc.ListLinkCss, src2.ListLinkCss, src1.ListLinkCss)
- CopyAttribute(&nsc.ListPubtimeCss, src2.ListPubtimeCss, src1.ListPubtimeCss)
- CopyAttribute(&nsc.ListNextPageCss, src2.ListNextPageCss, src1.ListNextPageCss)
- CopyAttribute(&nsc.TitleCss, src2.TitleCss, src1.TitleCss)
- CopyAttribute(&nsc.PublishTimeCss, src2.PublishTimeCss, src1.PublishTimeCss)
- CopyAttribute(&nsc.PublishUnitCss, src2.PublishUnitCss, src1.PublishUnitCss)
- CopyAttribute(&nsc.ContentCss, src2.ContentCss, src1.ContentCss)
- CopyAttribute(&nsc.AttachCss, src2.AttachCss, src1.AttachCss)
- CopyAttribute(&nsc.ListJSCode, src2.ListJSCode, src1.ListJSCode)
- CopyAttribute(&nsc.ContentJSCode, src2.ContentJSCode, src1.ContentJSCode)
- CopyAttribute(&nsc.AttachJSCode, src2.AttachJSCode, src1.AttachJSCode)
- return nsc
- }
- // Load
- func (s *SpiderDb) Load(code string) *SpiderConfig {
- var req *SpiderConfig = new(SpiderConfig)
- err := s.db.View(func(tx *bolt.Tx) error {
- bucket := tx.Bucket([]byte("myBucket"))
- value := bucket.Get([]byte(code))
- if value != nil && len(value) > 0 {
- _ = json.Unmarshal(value, req)
- }
- return nil
- })
- if err != nil {
- log.Fatal(err)
- }
- return req
- }
- // SaveOrUpdate
- func (s *SpiderDb) SaveOrUpdate(sc *SpiderConfig) {
- //加载原始数据
- var sc1 *SpiderConfig = new(SpiderConfig)
- var sc2 *SpiderConfig
- err := s.db.View(func(tx *bolt.Tx) error {
- bucket := tx.Bucket([]byte("myBucket"))
- value := bucket.Get([]byte(sc.Code))
- if value != nil && len(value) > 0 {
- _ = json.Unmarshal(value, sc1)
- }
- return nil
- })
- if err != nil {
- log.Println(err.Error())
- return
- }
- //更新
- if sc1 != nil {
- sc2 = MergeSpiderConfig(sc1, sc)
- value, _ := json.Marshal(sc2)
- err = s.db.Update(func(tx *bolt.Tx) error {
- bucket := tx.Bucket([]byte("myBucket"))
- err := bucket.Put([]byte(sc.Code), value)
- return err
- })
- if err != nil {
- log.Println(err.Error())
- return
- }
- }
- }
- // LoadAll,默认按照代码排序
- func (s *SpiderDb) LoadAll() SpiderConfiges {
- ret := make(SpiderConfiges, 0)
- // 开始读取事务
- err := s.db.View(func(tx *bolt.Tx) error {
- // 遍历数据库中的所有桶
- bucket := tx.Bucket([]byte("myBucket"))
- // 遍历桶中的所有键/值对
- return bucket.ForEach(func(k, v []byte) error {
- var sf *SpiderConfig = new(SpiderConfig)
- json.Unmarshal(v, sf)
- if sf != nil {
- ret = append(ret, sf)
- }
- return nil
- })
- })
- sort.Sort(ret)
- if err != nil {
- log.Println(err.Error())
- }
- return ret
- }
- // 切换当前默认爬虫配置
- func (s *SpiderDb) Switch(code string) {
- if sc := s.Load(code); sc != nil {
- currentSpiderConfig = sc
- }
- }
- // Delete
- func (s *SpiderDb) Delete(code string) {
- err := s.db.Update(func(tx *bolt.Tx) error {
- bucket := tx.Bucket([]byte("myBucket"))
- err := bucket.Delete([]byte(code))
- return err
- })
- if err != nil {
- log.Println(err.Error())
- return
- }
- }
- // 批量导入
- func (s *SpiderDb) BatchImport(filepath string) error {
- f, err := excelize.OpenFile(filepath)
- if err != nil {
- return err
- }
- defer f.Close()
- for _, sheetName := range f.GetSheetList() {
- // 获取工作表的所有行
- rows, err := f.GetRows(sheetName)
- if err != nil {
- continue
- }
- //
- for index, row := range rows {
- if index == 0 || len(row) < 5 || row[0] == "" || row[3] == "" {
- continue
- }
- sc := &SpiderConfig{
- Code: row[0],
- Site: row[1],
- Channel: row[2],
- Url: row[3],
- Author: row[4],
- }
- value, _ := json.Marshal(sc)
- err = s.db.Update(func(tx *bolt.Tx) error {
- bucket := tx.Bucket([]byte("myBucket"))
- err := bucket.Put([]byte(sc.Code), value)
- return err
- })
- if err != nil {
- continue
- }
- }
- }
- return nil
- }
|