package db import ( "container/list" "encoding/json" "fmt" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "log" "os" "sort" be "spider_creator/backend" "strconv" "strings" "github.com/bmaupin/go-epub" "github.com/boltdb/bolt" "github.com/xuri/excelize/v2" ) // Load func (s *SpiderDb) Load(code string) *be.SpiderConfig { var req *be.SpiderConfig = new(be.SpiderConfig) err := s.db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) value := bucket.Get([]byte(code)) if value != nil && len(value) > 0 { _ = json.Unmarshal(value, req) } return nil }) if err != nil { log.Fatal(err) } return req } // SaveOrUpdate func (s *SpiderDb) SaveOrUpdate(sc *be.SpiderConfig) { //加载原始数据 var sc1 *be.SpiderConfig = new(be.SpiderConfig) var sc2 *be.SpiderConfig err := s.db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) value := bucket.Get([]byte(sc.Code)) if value != nil && len(value) > 0 { _ = json.Unmarshal(value, sc1) } return nil }) if err != nil { qu.Debug(err.Error()) return } //更新 if sc1 != nil { sc2 = be.MergeSpiderConfig(sc1, sc) value, _ := json.Marshal(sc2) err = s.db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) err := bucket.Put([]byte(sc.Code), value) return err }) if err != nil { qu.Debug(err.Error()) return } } } // LoadAll,默认按照代码排序 func (s *SpiderDb) LoadSpiderConfigAll() be.SpiderConfiges { ret := make(be.SpiderConfiges, 0) // 开始读取事务 err := s.db.View(func(tx *bolt.Tx) error { // 遍历数据库中的所有桶 bucket := tx.Bucket([]byte("myBucket")) // 遍历桶中的所有键/值对 return bucket.ForEach(func(k, v []byte) error { var sf *be.SpiderConfig = new(be.SpiderConfig) json.Unmarshal(v, sf) if sf != nil { ret = append(ret, sf) } return nil }) }) sort.Sort(ret) if err != nil { qu.Debug(err.Error()) } return ret } // Delete func (s *SpiderDb) DeleteSpiderConfig(code string) { err := s.db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) err := bucket.Delete([]byte(code)) return err }) if err != nil { qu.Debug(err.Error()) return } } // 批量导入 func (s *SpiderDb) BatchImport(filepath string) error { f, err := excelize.OpenFile(filepath) if err != nil { return err } defer f.Close() for _, sheetName := range f.GetSheetList() { // 获取工作表的所有行 rows, err := f.GetRows(sheetName) if err != nil { continue } // for index, row := range rows { if index == 0 || len(row) < 5 || row[0] == "" || row[3] == "" { continue } sc := &be.SpiderConfig{ Code: row[0], Site: row[1], Channel: row[2], Href: row[3], ModifyUser: row[4], } value, _ := json.Marshal(sc) err = s.db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket([]byte("myBucket")) err := bucket.Put([]byte(sc.Code), value) return err }) if err != nil { continue } } } return nil } // ExportEpubFile 导出epub文件 func (db *SpiderDb) ExportEpubFile(bookname, filepath string, currentResult *list.List) error { output := epub.NewEpub(bookname) output.SetTitle(bookname) output.SetDescription(bookname) output.SetAuthor("unknow") i := 1 for el := currentResult.Front(); el != nil; el = el.Next() { art, _ := el.Value.(*be.ResultItem) body := "

" + art.Title + "

" + strings.Join(strings.Split(art.Content, "\n"), "

") + "

" output.AddSection(body, art.Title, fmt.Sprintf("%06d.xhtml", i+1), "") i += 1 } fo, err := os.Create(filepath) if err != nil { db.enf.Dispatch("debug_event", err.Error()) } output.WriteTo(fo) fo.Close() return nil } // ExportExcelFile数据集导出到excel文件中 func (db *SpiderDb) ExportExcelFile(filepath, site, channel string, currentResult *list.List) error { f := excelize.NewFile() defer f.Close() f.SetCellStr("Sheet1", "A1", "站点") f.SetCellStr("Sheet1", "B1", "栏目") //写入数据 f.SetCellStr("Sheet1", "C1", "标题") f.SetCellStr("Sheet1", "D1", "链接") f.SetCellStr("Sheet1", "E1", "发布单位") f.SetCellStr("Sheet1", "F1", "发布时间") f.SetCellStr("Sheet1", "G1", "正文") f.SetCellStr("Sheet1", "H1", "附件") i := 0 for el := currentResult.Front(); el != nil; el = el.Next() { r, _ := el.Value.(*be.ResultItem) //写入站点信息 iStr := strconv.Itoa(i + 2) f.SetCellStr("Sheet1", "A"+iStr, site) f.SetCellStr("Sheet1", "B"+iStr, channel) //写入数据 f.SetCellStr("Sheet1", "C"+iStr, r.Title) f.SetCellStr("Sheet1", "D"+iStr, r.Href) f.SetCellStr("Sheet1", "E"+iStr, r.PublishUnit) f.SetCellStr("Sheet1", "F"+iStr, r.ListPubTime) f.SetCellStr("Sheet1", "G"+iStr, r.Content) f.SetCellStr("Sheet1", "H"+iStr, "") if len(r.AttachLinks) > 0 { bs, err := json.Marshal(r.AttachLinks) if err == nil { f.SetCellStr("Sheet1", "H"+iStr, string(bs)) } } i += 1 } err := f.SaveAs(filepath) if err != nil { return err } return nil }