// 爬虫调试绑定 package main import ( qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "sort" be "spider_creator/backend" bdb "spider_creator/backend/db" bew "spider_creator/backend/webservice" ) // DebugSpider 调试爬虫 func (a *App) DebugSpider(url string, proxyServe string, maxPages int, listDealy int64, trunPageDelay int64, contentDelay int64, headless bool, showImage bool, threads int, cssMark map[string]interface{}) { exitCh = make(chan bool, 1) qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads) qu.Debug("cssMark---", cssMark) if maxPages == 1 && threads == 1 { vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark) } else { //多页下载强制使用多线程模式 vm.RunSpiderMulThreads(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, threads, exitCh, cssMark) } } // VerifySpiderConfig 验证 func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) map[string]interface{} { var errtype int qu.Debug(cssMark) sc, err := be.NewSpiderConfig(cssMark) if err != nil { qu.Debug("CssMark Marshal Error:", err) return map[string]interface{}{ "ret": nil, "err": errtype, "msg": "标注信息传输失败!", } } ret, err := vm.VerifySpiderConfig(sc) //验证 if err != nil { qu.Debug("验证脚本配置失败::", err.Error()) return map[string]interface{}{ "ret": ret, "err": errtype, "msg": "验证脚本配置失败!", } } be.VerifyResults[sc.Code] = ret qu.Debug("验证结果---", *ret) return map[string]interface{}{ "ret": ret, "err": 1, "msg": "验证脚本配置成功", } } // StopDebugSpider 停止调试 func (a *App) StopDebugSpider() string { defer func() { if err := recover(); err != nil { qu.Debug(err) } }() exitCh <- true return "ok" } // ViewResultItemAll 查看所有结果,只显示最近的50条 func (a *App) ViewResultItemAll(code string) be.ResultItems { ret := make(be.ResultItems, 0) index := 0 if dataResult := be.DataResults[code]; dataResult != nil { for el := dataResult.Back(); el != nil; el = el.Prev() { if index > 50 { break } index += 1 v, _ := el.Value.(*be.ResultItem) ret = append(ret, v) } } return ret } // ServerActionCurrentOpenTab 记录当前编辑code的cssmark,高亮回显使用 func (a *App) ServerActionCurrentOpenTab(cssMark map[string]interface{}) { qu.Debug("----------------") sc, err := be.NewSpiderConfig(cssMark) if err != nil { qu.Debug("标注信息传输失败!") } bew.SetCurrentTabCssMark(sc) } // ExportEpubFile func (a *App) ExportEpubFile(bookname, filepath string) string { qu.Debug("EPUB 文件存储:", bookname, filepath) db.ExportEpubFile(bookname, filepath, currentResults) return "ok" } // ImportSpiderConfigByExcelFile 通过excel文件导入爬虫配置 func (a *App) ImportSpiderConfigByExcelFile(filepath string) string { db.BatchImport(filepath) return "ok" } // CountYestodayArts func (a *App) CountYestodayArts(url string, listDealy int64, trunPageDelay int64, headless bool, showImage bool) { exitCh = make(chan bool, 1) vm.CountYestodayArts(url, listDealy, trunPageDelay, headless, showImage, exitCh, currentSpiderConfig) } // ExportExcelFile func (a *App) ExportExcelFile(filepath string) string { if err := db.ExportExcelFile(filepath, currentSpiderConfig.Site, currentSpiderConfig.Channel, currentResults); err == nil { return "ok" } else { return err.Error() } } // LoadAllJobs 加载我的所有作业 func (a *App) LoadAllJobs() be.Jobs { rs, err := bdb.LoadEntities[be.Job]("jobs") if err != nil { return make(be.Jobs, 0) } jobs := be.Jobs(rs) sort.Sort(jobs) return jobs } // SaveJob func (a *App) SaveJob(job *be.Job) string { err := bdb.SaveEntity[be.Job]("jobs", job.Code, job) if err != nil { return err.Error() } return "ok" } // DeleteJob func (a *App) DeleteJob(code string) string { err := bdb.DeleteEntity[be.Job]("jobs", code) if err != nil { return err.Error() } return "ok" } // LoadJob func (a *App) LoadJob(code string) *be.Job { job, _ := bdb.LoadEntity[be.Job]("jobs", code) return job } // RunJob func (a *App) RunJob(code string) string { go vm.RunJob(code) return "ok" } func (a *App) StopJob(code string) string { vm.StopJob(code) return "ok" } // ExportJobResult func (a *App) ExportJobResult(code string, filePath string) string { vm.ExportJobResult(code, filePath) return "ok" }