123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- // 爬虫调试绑定
- package main
- import (
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "sort"
- be "spider_creator/backend"
- bdb "spider_creator/backend/db"
- bew "spider_creator/backend/webservice"
- )
- // DebugSpider 调试爬虫
- func (a *App) DebugSpider(url string, proxyServe bool, maxPages int, listDealy int64, trunPageDelay int64, contentDelay int64, headless bool,
- showImage bool, threads int, cssMark map[string]interface{}) {
- exitCh = make(chan bool, 1)
- qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads)
- qu.Debug("cssMark---", cssMark)
- //vm.RunSpiderTmp(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
- if maxPages == 1 && threads == 1 {
- vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
- } else { //多页下载强制使用多线程模式
- vm.RunSpiderMulThreads(url, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, proxyServe, threads, exitCh, cssMark)
- }
- }
- // VerifySpiderConfig 验证
- func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) map[string]interface{} {
- var errtype int
- qu.Debug("cssMark---", cssMark)
- sc, err := be.NewSpiderConfig(cssMark)
- if err != nil {
- qu.Debug("CssMark Marshal Error:", err)
- return map[string]interface{}{
- "ret": nil,
- "err": errtype,
- "msg": "标注信息传输失败!",
- }
- }
- ret, err := vm.VerifySpiderConfig(sc) //验证
- if err != nil {
- qu.Debug("验证脚本配置失败::", err.Error())
- return map[string]interface{}{
- "ret": ret,
- "err": errtype,
- "msg": "验证脚本配置失败!",
- }
- }
- be.VerifyResults[sc.Code] = ret
- qu.Debug("验证结果---", *ret)
- return map[string]interface{}{
- "ret": ret,
- "err": 1,
- "msg": "验证脚本配置成功",
- }
- }
- // StopDebugSpider 停止调试
- func (a *App) StopDebugSpider() string {
- defer func() {
- if err := recover(); err != nil {
- qu.Debug(err)
- }
- }()
- exitCh <- true
- return "ok"
- }
- // ViewResultItemAll 查看所有结果,只显示最近的50条
- func (a *App) ViewResultItemAll(code string) be.ResultItems {
- ret := make(be.ResultItems, 0)
- index := 0
- if dataResult := be.DataResults[code]; dataResult != nil {
- for el := dataResult.Back(); el != nil; el = el.Prev() {
- if index > 50 {
- break
- }
- index += 1
- v, _ := el.Value.(*be.ResultItem)
- ret = append(ret, v)
- }
- }
- return ret
- }
- // ServerActionCurrentOpenTab 记录当前编辑code的cssmark,高亮回显使用
- func (a *App) ServerActionCurrentOpenTab(cssMark map[string]interface{}) {
- bew.SetCurrentTabCssMark(cssMark)
- }
- // ExportEpubFile epub导出
- func (a *App) ExportEpubFile(bookname, filepath, code string) map[string]interface{} {
- sc := be.DataResults[code]
- qu.Debug("结果导出:", bookname, filepath, code)
- qu.Debug("EPUB 文件存储:", bookname, filepath)
- var msg string
- var errType int
- if sc != nil {
- if err := a.RunExportEpubFile(bookname, filepath, sc); err != nil {
- msg = err.Error()
- } else {
- msg = "导出成功"
- errType = 1
- }
- } else {
- msg = "导出失败,无法获取结果"
- }
- return map[string]interface{}{"err": errType, "msg": msg}
- }
- // ExportJsonFile json导出
- func (a *App) ExportJsonFile(filepath, code string) map[string]interface{} {
- sc := be.DataResults[code]
- qu.Debug("结果导出:", filepath, code)
- var msg string
- var errType int
- if sc != nil {
- if err := a.RunExportJsonFile(filepath, code, sc); err != nil {
- msg = err.Error()
- } else {
- msg = "导出成功"
- errType = 1
- }
- } else {
- msg = "导出失败,无法获取结果"
- }
- return map[string]interface{}{"err": errType, "msg": msg}
- }
- // ExportExcelFile excel导出
- func (a *App) ExportExcelFile(filepath, code string) map[string]interface{} {
- sc := be.DataResults[code]
- qu.Debug("结果导出:", filepath, code)
- var msg string
- var errType int
- if sc != nil {
- if err := a.RunExportExcelFile(filepath, code, sc); err != nil {
- msg = err.Error()
- } else {
- msg = "导出成功"
- errType = 1
- }
- } else {
- msg = "导出失败,无法获取结果"
- }
- return map[string]interface{}{"err": errType, "msg": msg}
- }
- // ImportSpiderConfigByExcelFile 通过excel文件导入爬虫配置
- func (a *App) ImportSpiderConfigByExcelFile(filepath string) string {
- db.BatchImport(filepath)
- return "ok"
- }
- // CountYestodayArts
- func (a *App) CountYestodayArts(url string, listDealy int64, trunPageDelay int64, headless bool, showImage bool) {
- exitCh = make(chan bool, 1)
- vm.CountYestodayArts(url, listDealy, trunPageDelay, headless, showImage, exitCh, currentSpiderConfig)
- }
- // LoadAllJobs 加载我的所有作业
- func (a *App) LoadAllJobs() be.Jobs {
- rs, err := bdb.LoadEntities[be.Job]("jobs")
- if err != nil {
- return make(be.Jobs, 0)
- }
- jobs := be.Jobs(rs)
- sort.Sort(jobs)
- return jobs
- }
- func (a *App) SaveJob(job *be.Job) string {
- err := bdb.SaveEntity[be.Job]("jobs", job.Code, job)
- if err != nil {
- return err.Error()
- }
- return "ok"
- }
- func (a *App) DeleteJob(code string) string {
- err := bdb.DeleteEntity[be.Job]("jobs", code)
- if err != nil {
- return err.Error()
- }
- return "ok"
- }
- func (a *App) LoadJob(code string) *be.Job {
- job, _ := bdb.LoadEntity[be.Job]("jobs", code)
- return job
- }
- func (a *App) RunJob(code string) string {
- go vm.RunJob(code)
- return "ok"
- }
- func (a *App) StopJob(code string) string {
- vm.StopJob(code)
- return "ok"
- }
- // ExportJobResult
- func (a *App) ExportJobResult(code string, filePath string) string {
- vm.ExportJobResult(code, filePath)
- return "ok"
- }
|