// 爬虫调试绑定 package main import ( qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "log" "sort" be "spider_creator/backend" bdb "spider_creator/backend/db" ) // DebugSpider 调试爬虫 func (a *App) DebugSpider(url string, proxyServe string, maxPages int, listDealy int64, trunPageDelay int64, contentDelay int64, headless bool, showImage bool, threads int, cssMark map[string]interface{}) { exitCh = make(chan bool, 1) qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads) qu.Debug(cssMark) if maxPages == 1 && threads == 1 { vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark) } else { //多页下载强制使用多线程模式 vm.RunSpiderMulThreads(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, threads, exitCh, cssMark) } } // VerifySpiderConfig 验证 func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) (*be.SpiderConfigVerifyResult, int, string) { var errtype int sc, err := be.NewSpiderConfig(cssMark) if err != nil { qu.Debug("CssMark Marshal Error:", err) return nil, errtype, "验证失败" } ret, err := vm.VerifySpiderConfig(sc) //验证 if err != nil { qu.Debug("验证脚本配置失败::", err.Error()) return ret, errtype, "验证脚本配置失败" } be.VerifyResults[sc.Code] = ret return ret, 1, "验证脚本配置成功" } // StopDebugSpider 停止调试 func (a *App) StopDebugSpider() string { defer func() { if err := recover(); err != nil { log.Println(err) } }() exitCh <- true return "ok" } // ViewResultItemAll 查看所有结果,只显示最近的50条 func (a *App) ViewResultItemAll(code string) be.ResultItems { ret := make(be.ResultItems, 0) index := 0 if dataResult := be.DataResults[code]; dataResult != nil { for el := dataResult.Back(); el != nil; el = el.Prev() { if index > 50 { break } index += 1 v, _ := el.Value.(*be.ResultItem) ret = append(ret, v) } } return ret } // ExportEpubFile func (a *App) ExportEpubFile(bookname, filepath string) string { log.Println("EPUB 文件存储:", bookname, filepath) db.ExportEpubFile(bookname, filepath, currentResults) return "ok" } // ImportSpiderConfigByExcelFile 通过excel文件导入爬虫配置 func (a *App) ImportSpiderConfigByExcelFile(filepath string) string { db.BatchImport(filepath) return "ok" } // CountYestodayArts func (a *App) CountYestodayArts(url string, listDealy int64, trunPageDelay int64, headless bool, showImage bool) { exitCh = make(chan bool, 1) vm.CountYestodayArts(url, listDealy, trunPageDelay, headless, showImage, exitCh, currentSpiderConfig) } // ExportExcelFile func (a *App) ExportExcelFile(filepath string) string { if err := db.ExportExcelFile(filepath, currentSpiderConfig.Site, currentSpiderConfig.Channel, currentResults); err == nil { return "ok" } else { return err.Error() } } // LoadAllJobs 加载我的所有作业 func (a *App) LoadAllJobs() be.Jobs { rs, err := bdb.LoadEntities[be.Job]("jobs") if err != nil { return make(be.Jobs, 0) } jobs := be.Jobs(rs) sort.Sort(jobs) return jobs } // SaveJob func (a *App) SaveJob(job *be.Job) string { err := bdb.SaveEntity[be.Job]("jobs", job.Code, job) if err != nil { return err.Error() } return "ok" } // DeleteJob func (a *App) DeleteJob(code string) string { err := bdb.DeleteEntity[be.Job]("jobs", code) if err != nil { return err.Error() } return "ok" } // LoadJob func (a *App) LoadJob(code string) *be.Job { job, _ := bdb.LoadEntity[be.Job]("jobs", code) return job } // RunJob func (a *App) RunJob(code string) string { go vm.RunJob(code) return "ok" } func (a *App) StopJob(code string) string { vm.StopJob(code) return "ok" } // ExportJobResult func (a *App) ExportJobResult(code string, filePath string) string { vm.ExportJobResult(code, filePath) return "ok" }