package main import ( "encoding/json" "sort" "log" "github.com/boltdb/bolt" "github.com/xuri/excelize/v2" ) type ( //SpiderDB 爬虫库,这里模拟真实数据库 SpiderDb struct { db *bolt.DB } ) var ( loginState bool = false currentSpiderConfig *SpiderConfig ) // NewSpiderDb func NewSpiderDb(dbfile string) *SpiderDb { db, err := bolt.Open(dbfile, 0600, nil) if err != nil { log.Fatal(err) } err = db.Update(func(tx *bolt.Tx) error { _, err := tx.CreateBucketIfNotExists([]byte("myBucket")) return err }) if err != nil { log.Fatal(err) } return &SpiderDb{ db, } } // Close func (s *SpiderDb) Close() { s.db.Close() } // CopyAttribute func CopyAttribute(dst *string, value1, value2 string) { if value1 != "" { *dst = value1 } else if value2 != "" { *dst = value2 } } // MergeSpiderConfig 合并 func MergeSpiderConfig(src1, src2 *SpiderConfig) *SpiderConfig { nsc := new(SpiderConfig) CopyAttribute(&nsc.Code, src2.Code, src1.Code) CopyAttribute(&nsc.Site, src2.Site, src1.Site) CopyAttribute(&nsc.Channel, src2.Channel, src1.Channel) CopyAttribute(&nsc.Url, src2.Url, src1.Url) CopyAttribute(&nsc.Author, src2.Author, src1.Author) CopyAttribute(&nsc.ListItemCss, src2.ListItemCss, src1.ListItemCss) CopyAttribute(&nsc.ListLinkCss, src2.ListLinkCss, src1.ListLinkCss) CopyAttribute(&nsc.ListPubtimeCss, src2.ListPubtimeCss, src1.ListPubtimeCss) CopyAttribute(&nsc.ListNextPageCss, src2.ListNextPageCss, src1.ListNextPageCss) CopyAttribute(&nsc.TitleCss, src2.TitleCss, src1.TitleCss) CopyAttribute(&nsc.PublishTimeCss, src2.PublishTimeCss, src1.PublishTimeCss) CopyAttribute(&nsc.PublishUnitCss, src2.PublishUnitCss, src1.PublishUnitCss) CopyAttribute(&nsc.ContentCss, src2.ContentCss, src1.ContentCss) CopyAttribute(&nsc.AttachCss, src2.AttachCss, src1.AttachCss) CopyAttribute(&nsc.ListJSCode, src2.ListJSCode, src1.ListJSCode) CopyAttribute(&nsc.ContentJSCode, src2.ContentJSCode, src1.ContentJSCode) CopyAttribute(&nsc.AttachJSCode, src2.AttachJSCode, src1.AttachJSCode) return nsc } // Load func (s *SpiderDb) Load(code string) *SpiderConfig { var req *SpiderConfig = new(SpiderConfig) err := s.db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) value := bucket.Get([]byte(code)) if value != nil && len(value) > 0 { _ = json.Unmarshal(value, req) } return nil }) if err != nil { log.Fatal(err) } return req } // SaveOrUpdate func (s *SpiderDb) SaveOrUpdate(sc *SpiderConfig) { //加载原始数据 var sc1 *SpiderConfig = new(SpiderConfig) var sc2 *SpiderConfig err := s.db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) value := bucket.Get([]byte(sc.Code)) if value != nil && len(value) > 0 { _ = json.Unmarshal(value, sc1) } return nil }) if err != nil { log.Println(err.Error()) return } //更新 if sc1 != nil { sc2 = MergeSpiderConfig(sc1, sc) value, _ := json.Marshal(sc2) err = s.db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) err := bucket.Put([]byte(sc.Code), value) return err }) if err != nil { log.Println(err.Error()) return } } } // LoadAll,默认按照代码排序 func (s *SpiderDb) LoadAll() SpiderConfiges { ret := make(SpiderConfiges, 0) // 开始读取事务 err := s.db.View(func(tx *bolt.Tx) error { // 遍历数据库中的所有桶 bucket := tx.Bucket([]byte("myBucket")) // 遍历桶中的所有键/值对 return bucket.ForEach(func(k, v []byte) error { var sf *SpiderConfig = new(SpiderConfig) json.Unmarshal(v, sf) if sf != nil { ret = append(ret, sf) } return nil }) }) sort.Sort(ret) if err != nil { log.Println(err.Error()) } return ret } // 切换当前默认爬虫配置 func (s *SpiderDb) Switch(code string) { if sc := s.Load(code); sc != nil { currentSpiderConfig = sc } } // Delete func (s *SpiderDb) Delete(code string) { err := s.db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) err := bucket.Delete([]byte(code)) return err }) if err != nil { log.Println(err.Error()) return } } // 批量导入 func (s *SpiderDb) BatchImport(filepath string) error { f, err := excelize.OpenFile(filepath) if err != nil { return err } defer f.Close() for _, sheetName := range f.GetSheetList() { // 获取工作表的所有行 rows, err := f.GetRows(sheetName) if err != nil { continue } // for index, row := range rows { if index == 0 || len(row) < 5 || row[0] == "" || row[3] == "" { continue } sc := &SpiderConfig{ Code: row[0], Site: row[1], Channel: row[2], Url: row[3], Author: row[4], } value, _ := json.Marshal(sc) err = s.db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) err := bucket.Put([]byte(sc.Code), value) return err }) if err != nil { continue } } } return nil }