db.go 4.8 KB


  1. package main
  2. import (
  3. "encoding/json"
  4. "sort"
  5. "log"
  6. "github.com/boltdb/bolt"
  7. "github.com/xuri/excelize/v2"
  8. )
  9. type (
  10. //SpiderDB 爬虫库,这里模拟真实数据库
  11. SpiderDb struct {
  12. db *bolt.DB
  13. }
  14. )
  15. var (
  16. loginState bool = false
  17. currentSpiderConfig *SpiderConfig
  18. )
  19. // NewSpiderDb
  20. func NewSpiderDb(dbfile string) *SpiderDb {
  21. db, err := bolt.Open(dbfile, 0600, nil)
  22. if err != nil {
  23. log.Fatal(err)
  24. }
  25. err = db.Update(func(tx *bolt.Tx) error {
  26. _, err := tx.CreateBucketIfNotExists([]byte("myBucket"))
  27. return err
  28. })
  29. if err != nil {
  30. log.Fatal(err)
  31. }
  32. return &SpiderDb{
  33. db,
  34. }
  35. }
  36. // Close
  37. func (s *SpiderDb) Close() {
  38. s.db.Close()
  39. }
  40. // CopyAttribute
  41. func CopyAttribute(dst *string, value1, value2 string) {
  42. if value1 != "" {
  43. *dst = value1
  44. } else if value2 != "" {
  45. *dst = value2
  46. }
  47. }
  48. // MergeSpiderConfig 合并
  49. func MergeSpiderConfig(src1, src2 *SpiderConfig) *SpiderConfig {
  50. nsc := new(SpiderConfig)
  51. CopyAttribute(&nsc.Code, src2.Code, src1.Code)
  52. CopyAttribute(&nsc.Site, src2.Site, src1.Site)
  53. CopyAttribute(&nsc.Channel, src2.Channel, src1.Channel)
  54. CopyAttribute(&nsc.Url, src2.Url, src1.Url)
  55. CopyAttribute(&nsc.Author, src2.Author, src1.Author)
  56. CopyAttribute(&nsc.ListItemCss, src2.ListItemCss, src1.ListItemCss)
  57. CopyAttribute(&nsc.ListLinkCss, src2.ListLinkCss, src1.ListLinkCss)
  58. CopyAttribute(&nsc.ListPubtimeCss, src2.ListPubtimeCss, src1.ListPubtimeCss)
  59. CopyAttribute(&nsc.ListNextPageCss, src2.ListNextPageCss, src1.ListNextPageCss)
  60. CopyAttribute(&nsc.TitleCss, src2.TitleCss, src1.TitleCss)
  61. CopyAttribute(&nsc.PublishTimeCss, src2.PublishTimeCss, src1.PublishTimeCss)
  62. CopyAttribute(&nsc.PublishUnitCss, src2.PublishUnitCss, src1.PublishUnitCss)
  63. CopyAttribute(&nsc.ContentCss, src2.ContentCss, src1.ContentCss)
  64. CopyAttribute(&nsc.AttachCss, src2.AttachCss, src1.AttachCss)
  65. CopyAttribute(&nsc.ListJSCode, src2.ListJSCode, src1.ListJSCode)
  66. CopyAttribute(&nsc.ContentJSCode, src2.ContentJSCode, src1.ContentJSCode)
  67. CopyAttribute(&nsc.AttachJSCode, src2.AttachJSCode, src1.AttachJSCode)
  68. return nsc
  69. }
  70. // Load
  71. func (s *SpiderDb) Load(code string) *SpiderConfig {
  72. var req *SpiderConfig = new(SpiderConfig)
  73. err := s.db.View(func(tx *bolt.Tx) error {
  74. bucket := tx.Bucket([]byte("myBucket"))
  75. value := bucket.Get([]byte(code))
  76. if value != nil && len(value) > 0 {
  77. _ = json.Unmarshal(value, req)
  78. }
  79. return nil
  80. })
  81. if err != nil {
  82. log.Fatal(err)
  83. }
  84. return req
  85. }
  86. // SaveOrUpdate
  87. func (s *SpiderDb) SaveOrUpdate(sc *SpiderConfig) {
  88. //加载原始数据
  89. var sc1 *SpiderConfig = new(SpiderConfig)
  90. var sc2 *SpiderConfig
  91. err := s.db.View(func(tx *bolt.Tx) error {
  92. bucket := tx.Bucket([]byte("myBucket"))
  93. value := bucket.Get([]byte(sc.Code))
  94. if value != nil && len(value) > 0 {
  95. _ = json.Unmarshal(value, sc1)
  96. }
  97. return nil
  98. })
  99. if err != nil {
  100. log.Println(err.Error())
  101. return
  102. }
  103. //更新
  104. if sc1 != nil {
  105. sc2 = MergeSpiderConfig(sc1, sc)
  106. value, _ := json.Marshal(sc2)
  107. err = s.db.Update(func(tx *bolt.Tx) error {
  108. bucket := tx.Bucket([]byte("myBucket"))
  109. err := bucket.Put([]byte(sc.Code), value)
  110. return err
  111. })
  112. if err != nil {
  113. log.Println(err.Error())
  114. return
  115. }
  116. }
  117. }
  118. // LoadAll,默认按照代码排序
  119. func (s *SpiderDb) LoadAll() SpiderConfiges {
  120. ret := make(SpiderConfiges, 0)
  121. // 开始读取事务
  122. err := s.db.View(func(tx *bolt.Tx) error {
  123. // 遍历数据库中的所有桶
  124. bucket := tx.Bucket([]byte("myBucket"))
  125. // 遍历桶中的所有键/值对
  126. return bucket.ForEach(func(k, v []byte) error {
  127. var sf *SpiderConfig = new(SpiderConfig)
  128. json.Unmarshal(v, sf)
  129. if sf != nil {
  130. ret = append(ret, sf)
  131. }
  132. return nil
  133. })
  134. })
  135. sort.Sort(ret)
  136. if err != nil {
  137. log.Println(err.Error())
  138. }
  139. return ret
  140. }
  141. // 切换当前默认爬虫配置
  142. func (s *SpiderDb) Switch(code string) {
  143. if sc := s.Load(code); sc != nil {
  144. currentSpiderConfig = sc
  145. }
  146. }
  147. // Delete
  148. func (s *SpiderDb) Delete(code string) {
  149. err := s.db.Update(func(tx *bolt.Tx) error {
  150. bucket := tx.Bucket([]byte("myBucket"))
  151. err := bucket.Delete([]byte(code))
  152. return err
  153. })
  154. if err != nil {
  155. log.Println(err.Error())
  156. return
  157. }
  158. }
  159. // 批量导入
  160. func (s *SpiderDb) BatchImport(filepath string) error {
  161. f, err := excelize.OpenFile(filepath)
  162. if err != nil {
  163. return err
  164. }
  165. defer f.Close()
  166. for _, sheetName := range f.GetSheetList() {
  167. // 获取工作表的所有行
  168. rows, err := f.GetRows(sheetName)
  169. if err != nil {
  170. continue
  171. }
  172. //
  173. for index, row := range rows {
  174. if index == 0 || len(row) < 5 || row[0] == "" || row[3] == "" {
  175. continue
  176. }
  177. sc := &SpiderConfig{
  178. Code: row[0],
  179. Site: row[1],
  180. Channel: row[2],
  181. Url: row[3],
  182. Author: row[4],
  183. }
  184. value, _ := json.Marshal(sc)
  185. err = s.db.Update(func(tx *bolt.Tx) error {
  186. bucket := tx.Bucket([]byte("myBucket"))
  187. err := bucket.Put([]byte(sc.Code), value)
  188. return err
  189. })
  190. if err != nil {
  191. continue
  192. }
  193. }
  194. }
  195. return nil
  196. }