bind4spider.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. // 爬虫调试绑定
  2. package main
  3. import (
  4. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "sort"
  6. be "spider_creator/backend"
  7. bdb "spider_creator/backend/db"
  8. bew "spider_creator/backend/webservice"
  9. "strings"
  10. )
  11. // DebugSpider 调试爬虫
  12. func (a *App) DebugSpider(url string, proxyServe bool, maxPages int, listDealy int64, trunPageDelay int64, contentDelay int64, headless bool,
  13. showImage bool, threads int, cssMark map[string]interface{}) {
  14. exitCh = make(chan bool, 1)
  15. qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads)
  16. qu.Debug("cssMark---", cssMark)
  17. //vm.RunSpiderTmp(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
  18. if maxPages == 1 && threads == 1 {
  19. vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
  20. } else { //多页下载强制使用多线程模式
  21. vm.RunSpiderMulThreads(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, threads, exitCh, cssMark)
  22. }
  23. }
  24. // VerifySpiderConfig 验证
  25. func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) map[string]interface{} {
  26. var errtype int
  27. qu.Debug("cssMark---", cssMark)
  28. sc, err := be.NewSpiderConfig(cssMark)
  29. if err != nil {
  30. qu.Debug("CssMark Marshal Error:", err)
  31. return map[string]interface{}{
  32. "ret": nil,
  33. "err": errtype,
  34. "msg": "标注信息传输失败!",
  35. }
  36. }
  37. ret, err, msgArr := vm.VerifySpiderConfig(sc) //验证
  38. if err != nil {
  39. qu.Debug("验证脚本配置失败::", err.Error())
  40. return map[string]interface{}{
  41. "ret": ret,
  42. "err": errtype,
  43. "msg": "验证脚本配置失败!",
  44. }
  45. }
  46. be.VerifyResults[sc.Code] = ret
  47. qu.Debug("验证结果---", *ret)
  48. var msg string
  49. if len(msgArr) > 0 {
  50. msg = strings.Join(msgArr, "\n")
  51. }
  52. return map[string]interface{}{
  53. "ret": ret,
  54. "err": 1,
  55. "msg": msg,
  56. }
  57. }
  58. // StopDebugSpider 停止调试
  59. func (a *App) StopDebugSpider() string {
  60. defer func() {
  61. if err := recover(); err != nil {
  62. qu.Debug(err)
  63. }
  64. }()
  65. exitCh <- true
  66. return "ok"
  67. }
  68. // ViewResultItemAll 查看所有结果,只显示最近的50条
  69. func (a *App) ViewResultItemAll(code string) be.ResultItems {
  70. ret := make(be.ResultItems, 0)
  71. index := 0
  72. if dataResult := be.DataResults[code]; dataResult != nil {
  73. for el := dataResult.Back(); el != nil; el = el.Prev() {
  74. if index > 50 {
  75. break
  76. }
  77. index += 1
  78. v, _ := el.Value.(*be.ResultItem)
  79. ret = append(ret, v)
  80. }
  81. }
  82. return ret
  83. }
  84. // ServerActionCurrentOpenTab 记录当前编辑code的cssmark,高亮回显使用
  85. func (a *App) ServerActionCurrentOpenTab(cssMark map[string]interface{}) {
  86. bew.SetCurrentTabCssMark(cssMark)
  87. }
  88. // ExportEpubFile epub导出
  89. func (a *App) ExportEpubFile(bookname, filepath, code string) map[string]interface{} {
  90. sc := be.DataResults[code]
  91. qu.Debug("结果导出:", bookname, filepath, code)
  92. qu.Debug("EPUB 文件存储:", bookname, filepath)
  93. var msg string
  94. var errType int
  95. if sc != nil {
  96. if err := a.RunExportEpubFile(bookname, filepath, sc); err != nil {
  97. msg = err.Error()
  98. } else {
  99. msg = "导出成功"
  100. errType = 1
  101. }
  102. } else {
  103. msg = "导出失败,无法获取结果"
  104. }
  105. return map[string]interface{}{"err": errType, "msg": msg}
  106. }
  107. // ExportJsonFile json导出
  108. func (a *App) ExportJsonFile(filepath, code string) map[string]interface{} {
  109. sc := be.DataResults[code]
  110. qu.Debug("结果导出:", filepath, code)
  111. var msg string
  112. var errType int
  113. if sc != nil {
  114. if err := a.RunExportJsonFile(filepath, code, sc); err != nil {
  115. msg = err.Error()
  116. } else {
  117. msg = "导出成功"
  118. errType = 1
  119. }
  120. } else {
  121. msg = "导出失败,无法获取结果"
  122. }
  123. return map[string]interface{}{"err": errType, "msg": msg}
  124. }
  125. // ExportExcelFile excel导出
  126. func (a *App) ExportExcelFile(filepath, code string) map[string]interface{} {
  127. sc := be.DataResults[code]
  128. qu.Debug("结果导出:", filepath, code)
  129. var msg string
  130. var errType int
  131. if sc != nil {
  132. if err := a.RunExportExcelFile(filepath, code, sc); err != nil {
  133. msg = err.Error()
  134. } else {
  135. msg = "导出成功"
  136. errType = 1
  137. }
  138. } else {
  139. msg = "导出失败,无法获取结果"
  140. }
  141. return map[string]interface{}{"err": errType, "msg": msg}
  142. }
  143. // ImportSpiderConfigByExcelFile 通过excel文件导入爬虫配置
  144. func (a *App) ImportSpiderConfigByExcelFile(filepath string) string {
  145. db.BatchImport(filepath)
  146. return "ok"
  147. }
  148. // CountYestodayArts
  149. func (a *App) CountYestodayArts(url string, listDealy int64, trunPageDelay int64, headless bool, showImage bool) {
  150. exitCh = make(chan bool, 1)
  151. vm.CountYestodayArts(url, listDealy, trunPageDelay, headless, showImage, exitCh, currentSpiderConfig)
  152. }
  153. // LoadAllJobs 加载我的所有作业
  154. func (a *App) LoadAllJobs() be.Jobs {
  155. rs, err := bdb.LoadEntities[be.Job]("jobs")
  156. if err != nil {
  157. return make(be.Jobs, 0)
  158. }
  159. jobs := be.Jobs(rs)
  160. sort.Sort(jobs)
  161. return jobs
  162. }
  163. func (a *App) SaveJob(job *be.Job) string {
  164. err := bdb.SaveEntity[be.Job]("jobs", job.Code, job)
  165. if err != nil {
  166. return err.Error()
  167. }
  168. return "ok"
  169. }
  170. func (a *App) DeleteJob(code string) string {
  171. err := bdb.DeleteEntity[be.Job]("jobs", code)
  172. if err != nil {
  173. return err.Error()
  174. }
  175. return "ok"
  176. }
  177. func (a *App) LoadJob(code string) *be.Job {
  178. job, _ := bdb.LoadEntity[be.Job]("jobs", code)
  179. return job
  180. }
  181. func (a *App) RunJob(code string) string {
  182. go vm.RunJob(code)
  183. return "ok"
  184. }
  185. func (a *App) StopJob(code string) string {
  186. vm.StopJob(code)
  187. return "ok"
  188. }
  189. // ExportJobResult
  190. func (a *App) ExportJobResult(code string, filePath string) string {
  191. vm.ExportJobResult(code, filePath)
  192. return "ok"
  193. }