bind4spider.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // 爬虫调试绑定
  2. package main
  3. import (
  4. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "sort"
  6. be "spider_creator/backend"
  7. bdb "spider_creator/backend/db"
  8. bew "spider_creator/backend/webservice"
  9. )
  10. // DebugSpider 调试爬虫
  11. func (a *App) DebugSpider(url string, proxyServe string, maxPages int, listDealy int64, trunPageDelay int64, contentDelay int64, headless bool,
  12. showImage bool, threads int, cssMark map[string]interface{}) {
  13. exitCh = make(chan bool, 1)
  14. qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads)
  15. qu.Debug("cssMark---", cssMark)
  16. vm.RunSpiderTmp(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
  17. //if maxPages == 1 && threads == 1 {
  18. // vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
  19. //} else { //多页下载强制使用多线程模式
  20. // vm.RunSpiderMulThreads(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, threads, exitCh, cssMark)
  21. //}
  22. }
  23. // VerifySpiderConfig 验证
  24. func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) map[string]interface{} {
  25. var errtype int
  26. qu.Debug(cssMark)
  27. sc, err := be.NewSpiderConfig(cssMark)
  28. if err != nil {
  29. qu.Debug("CssMark Marshal Error:", err)
  30. return map[string]interface{}{
  31. "ret": nil,
  32. "err": errtype,
  33. "msg": "标注信息传输失败!",
  34. }
  35. }
  36. ret, err := vm.VerifySpiderConfig(sc) //验证
  37. if err != nil {
  38. qu.Debug("验证脚本配置失败::", err.Error())
  39. return map[string]interface{}{
  40. "ret": ret,
  41. "err": errtype,
  42. "msg": "验证脚本配置失败!",
  43. }
  44. }
  45. be.VerifyResults[sc.Code] = ret
  46. qu.Debug("验证结果---", *ret)
  47. return map[string]interface{}{
  48. "ret": ret,
  49. "err": 1,
  50. "msg": "验证脚本配置成功",
  51. }
  52. }
  53. // StopDebugSpider 停止调试
  54. func (a *App) StopDebugSpider() string {
  55. defer func() {
  56. if err := recover(); err != nil {
  57. qu.Debug(err)
  58. }
  59. }()
  60. exitCh <- true
  61. return "ok"
  62. }
  63. // ViewResultItemAll 查看所有结果,只显示最近的50条
  64. func (a *App) ViewResultItemAll(code string) be.ResultItems {
  65. ret := make(be.ResultItems, 0)
  66. index := 0
  67. if dataResult := be.DataResults[code]; dataResult != nil {
  68. for el := dataResult.Back(); el != nil; el = el.Prev() {
  69. if index > 50 {
  70. break
  71. }
  72. index += 1
  73. v, _ := el.Value.(*be.ResultItem)
  74. ret = append(ret, v)
  75. }
  76. }
  77. return ret
  78. }
  79. // ServerActionCurrentOpenTab 记录当前编辑code的cssmark,高亮回显使用
  80. func (a *App) ServerActionCurrentOpenTab(cssMark map[string]interface{}) {
  81. bew.SetCurrentTabCssMark(cssMark)
  82. }
  83. // ExportEpubFile epub导出
  84. func (a *App) ExportEpubFile(bookname, filepath, code string) map[string]interface{} {
  85. sc := be.DataResults[code]
  86. qu.Debug("结果导出:", bookname, filepath, code)
  87. qu.Debug("EPUB 文件存储:", bookname, filepath)
  88. var msg string
  89. var errType int
  90. if sc != nil {
  91. if err := a.RunExportEpubFile(bookname, filepath, sc); err != nil {
  92. msg = err.Error()
  93. } else {
  94. msg = "导出成功"
  95. errType = 1
  96. }
  97. } else {
  98. msg = "导出失败,无法获取结果"
  99. }
  100. return map[string]interface{}{"err": errType, "msg": msg}
  101. }
  102. // ExportJsonFile json导出
  103. func (a *App) ExportJsonFile(filepath, code string) map[string]interface{} {
  104. sc := be.DataResults[code]
  105. qu.Debug("结果导出:", filepath, code)
  106. var msg string
  107. var errType int
  108. if sc != nil {
  109. if err := a.RunExportJsonFile(filepath, code, sc); err != nil {
  110. msg = err.Error()
  111. } else {
  112. msg = "导出成功"
  113. errType = 1
  114. }
  115. } else {
  116. msg = "导出失败,无法获取结果"
  117. }
  118. return map[string]interface{}{"err": errType, "msg": msg}
  119. }
  120. // ExportExcelFile excel导出
  121. func (a *App) ExportExcelFile(filepath, code string) map[string]interface{} {
  122. sc := be.DataResults[code]
  123. qu.Debug("结果导出:", filepath, code)
  124. var msg string
  125. var errType int
  126. if sc != nil {
  127. if err := a.RunExportExcelFile(filepath, code, sc); err != nil {
  128. msg = err.Error()
  129. } else {
  130. msg = "导出成功"
  131. errType = 1
  132. }
  133. } else {
  134. msg = "导出失败,无法获取结果"
  135. }
  136. return map[string]interface{}{"err": errType, "msg": msg}
  137. }
  138. // ImportSpiderConfigByExcelFile 通过excel文件导入爬虫配置
  139. func (a *App) ImportSpiderConfigByExcelFile(filepath string) string {
  140. db.BatchImport(filepath)
  141. return "ok"
  142. }
  143. // CountYestodayArts
  144. func (a *App) CountYestodayArts(url string, listDealy int64, trunPageDelay int64, headless bool, showImage bool) {
  145. exitCh = make(chan bool, 1)
  146. vm.CountYestodayArts(url, listDealy, trunPageDelay, headless, showImage, exitCh, currentSpiderConfig)
  147. }
  148. // LoadAllJobs 加载我的所有作业
  149. func (a *App) LoadAllJobs() be.Jobs {
  150. rs, err := bdb.LoadEntities[be.Job]("jobs")
  151. if err != nil {
  152. return make(be.Jobs, 0)
  153. }
  154. jobs := be.Jobs(rs)
  155. sort.Sort(jobs)
  156. return jobs
  157. }
  158. // SaveJob
  159. func (a *App) SaveJob(job *be.Job) string {
  160. err := bdb.SaveEntity[be.Job]("jobs", job.Code, job)
  161. if err != nil {
  162. return err.Error()
  163. }
  164. return "ok"
  165. }
  166. // DeleteJob
  167. func (a *App) DeleteJob(code string) string {
  168. err := bdb.DeleteEntity[be.Job]("jobs", code)
  169. if err != nil {
  170. return err.Error()
  171. }
  172. return "ok"
  173. }
  174. // LoadJob
  175. func (a *App) LoadJob(code string) *be.Job {
  176. job, _ := bdb.LoadEntity[be.Job]("jobs", code)
  177. return job
  178. }
  179. // RunJob
  180. func (a *App) RunJob(code string) string {
  181. go vm.RunJob(code)
  182. return "ok"
  183. }
  184. func (a *App) StopJob(code string) string {
  185. vm.StopJob(code)
  186. return "ok"
  187. }
  188. // ExportJobResult
  189. func (a *App) ExportJobResult(code string, filePath string) string {
  190. vm.ExportJobResult(code, filePath)
  191. return "ok"
  192. }