bind4spider.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // 爬虫调试绑定
  2. package main
  3. import (
  4. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "log"
  6. "sort"
  7. be "spider_creator/backend"
  8. bdb "spider_creator/backend/db"
  9. )
  10. // DebugSpider 调试爬虫
  11. func (a *App) DebugSpider(url string, proxyServe string, maxPages int, listDealy int64, trunPageDelay int64, contentDelay int64, headless bool,
  12. showImage bool, threads int, cssMark map[string]interface{}) {
  13. exitCh = make(chan bool, 1)
  14. qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads)
  15. qu.Debug(cssMark)
  16. if maxPages == 1 && threads == 1 {
  17. vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
  18. } else { //多页下载强制使用多线程模式
  19. vm.RunSpiderMulThreads(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, threads, exitCh, cssMark)
  20. }
  21. }
  22. // VerifySpiderConfig 验证
  23. func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) (*be.SpiderConfigVerifyResult, int, string) {
  24. var errtype int
  25. sc, err := be.NewSpiderConfig(cssMark)
  26. if err != nil {
  27. qu.Debug("CssMark Marshal Error:", err)
  28. return nil, errtype, "验证失败"
  29. }
  30. ret, err := vm.VerifySpiderConfig(sc) //验证
  31. if err != nil {
  32. qu.Debug("验证脚本配置失败::", err.Error())
  33. return ret, errtype, "验证脚本配置失败"
  34. }
  35. be.VerifyResults[sc.Code] = ret
  36. return ret, 1, "验证脚本配置成功"
  37. }
  38. // StopDebugSpider 停止调试
  39. func (a *App) StopDebugSpider() string {
  40. defer func() {
  41. if err := recover(); err != nil {
  42. log.Println(err)
  43. }
  44. }()
  45. exitCh <- true
  46. return "ok"
  47. }
  48. // ViewResultItemAll 查看所有结果,只显示最近的50条
  49. func (a *App) ViewResultItemAll(code string) be.ResultItems {
  50. ret := make(be.ResultItems, 0)
  51. index := 0
  52. if dataResult := be.DataResults[code]; dataResult != nil {
  53. for el := dataResult.Back(); el != nil; el = el.Prev() {
  54. if index > 50 {
  55. break
  56. }
  57. index += 1
  58. v, _ := el.Value.(*be.ResultItem)
  59. ret = append(ret, v)
  60. }
  61. }
  62. return ret
  63. }
  64. // ExportEpubFile
  65. func (a *App) ExportEpubFile(bookname, filepath string) string {
  66. log.Println("EPUB 文件存储:", bookname, filepath)
  67. db.ExportEpubFile(bookname, filepath, currentResults)
  68. return "ok"
  69. }
  70. // ImportSpiderConfigByExcelFile 通过excel文件导入爬虫配置
  71. func (a *App) ImportSpiderConfigByExcelFile(filepath string) string {
  72. db.BatchImport(filepath)
  73. return "ok"
  74. }
  75. // CountYestodayArts
  76. func (a *App) CountYestodayArts(url string, listDealy int64, trunPageDelay int64, headless bool, showImage bool) {
  77. exitCh = make(chan bool, 1)
  78. vm.CountYestodayArts(url, listDealy, trunPageDelay, headless, showImage, exitCh, currentSpiderConfig)
  79. }
  80. // ExportExcelFile
  81. func (a *App) ExportExcelFile(filepath string) string {
  82. if err := db.ExportExcelFile(filepath, currentSpiderConfig.Site, currentSpiderConfig.Channel, currentResults); err == nil {
  83. return "ok"
  84. } else {
  85. return err.Error()
  86. }
  87. }
  88. // LoadAllJobs 加载我的所有作业
  89. func (a *App) LoadAllJobs() be.Jobs {
  90. rs, err := bdb.LoadEntities[be.Job]("jobs")
  91. if err != nil {
  92. return make(be.Jobs, 0)
  93. }
  94. jobs := be.Jobs(rs)
  95. sort.Sort(jobs)
  96. return jobs
  97. }
  98. // SaveJob
  99. func (a *App) SaveJob(job *be.Job) string {
  100. err := bdb.SaveEntity[be.Job]("jobs", job.Code, job)
  101. if err != nil {
  102. return err.Error()
  103. }
  104. return "ok"
  105. }
  106. // DeleteJob
  107. func (a *App) DeleteJob(code string) string {
  108. err := bdb.DeleteEntity[be.Job]("jobs", code)
  109. if err != nil {
  110. return err.Error()
  111. }
  112. return "ok"
  113. }
  114. // LoadJob
  115. func (a *App) LoadJob(code string) *be.Job {
  116. job, _ := bdb.LoadEntity[be.Job]("jobs", code)
  117. return job
  118. }
  119. // RunJob
  120. func (a *App) RunJob(code string) string {
  121. go vm.RunJob(code)
  122. return "ok"
  123. }
  124. func (a *App) StopJob(code string) string {
  125. vm.StopJob(code)
  126. return "ok"
  127. }
  128. // ExportJobResult
  129. func (a *App) ExportJobResult(code string, filePath string) string {
  130. vm.ExportJobResult(code, filePath)
  131. return "ok"
  132. }