Jelajahi Sumber

结构功能修改

mxs 10 bulan lalu
induk
melakukan
0f1bf94854

+ 6 - 6
app.go

@@ -3,7 +3,7 @@ package main
 import (
 	"fmt"
 	"golang.org/x/net/context"
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"os"
 	rt "runtime"
 	bdb "spider_creator/backend/db"
@@ -54,7 +54,7 @@ func (a *App) startup(ctx context.Context) {
 		os.MkdirAll(attachesDir, 0777)
 	}
 	var dbfile = baseDir + "/spider.dat"
-	log.Println("db file:", dbfile)
+	qu.Debug("db file:", dbfile)
 
 	db = bdb.NewSpiderDb(dbfile, a)
 	bdb.Db = db
@@ -114,7 +114,7 @@ func (a *App) destory(ctx context.Context) {
 
 // SwitchSpiderConfig
 //func (a *App) SwitchSpiderConfig(code string) string {
-//	log.Println("切换当前默认爬虫配置:", code)
+//	qu.Debug("切换当前默认爬虫配置:", code)
 //	db.Switch(code)
 //	return "ok"
 //}
@@ -145,7 +145,7 @@ func (a *App) destory(ctx context.Context) {
 //func (a *App) StopDebugSpider() string {
 //	defer func() {
 //		if err := recover(); err != nil {
-//			log.Println(err)
+//			qu.Debug(err)
 //		}
 //	}()
 //	exitCh <- true
@@ -171,7 +171,7 @@ func (a *App) destory(ctx context.Context) {
 //		{Pattern: "*.json", DisplayName: "json file *.json"},
 //	}})
 //	if err != nil {
-//		log.Println(err.Error())
+//		qu.Debug(err.Error())
 //		return ""
 //	}
 //	return path
@@ -183,7 +183,7 @@ func (a *App) destory(ctx context.Context) {
 //		{Pattern: "*.xlsx", DisplayName: "excel file *.xlsx"},
 //	}})
 //	if err != nil {
-//		log.Println(err.Error())
+//		qu.Debug(err.Error())
 //		return ""
 //	}
 //	return path

+ 2 - 2
backend/ai/ai.go

@@ -49,8 +49,8 @@ func UpdateResultDateStr(rs be.ResultItems) (err error) {
 	obj, _ := choices[0].(map[string]interface{})
 	message, _ := obj["message"].(map[string]interface{})
 	value, _ := message["content"].(string)
-	// log.Println("提示语", prompt)
-	// log.Println("AI调用结果", value)
+	// qu.Debug("提示语", prompt)
+	// qu.Debug("AI调用结果", value)
 	results := strings.Split(value, "\n")
 	if len(results) < len(rs) {
 		err = errors.New("调用大模型失败")

+ 2 - 2
backend/browser.go

@@ -81,8 +81,8 @@ func NewBrowser(headless bool, showImage bool, proxyServe string) (
 		chromedp.Flag("no-sandbox", true),
 		chromedp.Flag("disable-dev-shm-usage", false),
 		chromedp.Flag("default-browser-check", false),
-		chromedp.Flag("ignore-certificate-errors", true), //忽略错误
-		chromedp.Flag("disable-web-security", true),      //禁用网络安全标志
+		chromedp.Flag("ignore-certificate-errors", false), //忽略错误
+		chromedp.Flag("disable-web-security", true),       //禁用网络安全标志
 		chromedp.Flag("mute-audio", false),
 		chromedp.Flag("accept-language", `zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6`),
 	)

+ 4 - 5
backend/db/db.go

@@ -3,10 +3,9 @@ package db
 import (
 	"encoding/json"
 	"errors"
-	"log"
-	be "spider_creator/backend"
-
 	"github.com/boltdb/bolt"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	be "spider_creator/backend"
 )
 
 // 在新增表的时候,需要在这里增加实体清单
@@ -32,7 +31,7 @@ type (
 func NewSpiderDb(dbfile string, enf be.EventNotifyFace) *SpiderDb {
 	db, err := bolt.Open(dbfile, 0600, nil)
 	if err != nil {
-		log.Println("db error", err.Error())
+		qu.Debug("db error", err.Error())
 	}
 	for _, t := range DB_TABLES {
 		err = db.Update(func(tx *bolt.Tx) error {
@@ -40,7 +39,7 @@ func NewSpiderDb(dbfile string, enf be.EventNotifyFace) *SpiderDb {
 			return err
 		})
 		if err != nil {
-			log.Println("db error", err.Error())
+			qu.Debug("db error", err.Error())
 		}
 	}
 	return &SpiderDb{

+ 5 - 4
backend/db/spider.go

@@ -4,6 +4,7 @@ import (
 	"container/list"
 	"encoding/json"
 	"fmt"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"log"
 	"os"
 	"sort"
@@ -48,7 +49,7 @@ func (s *SpiderDb) SaveOrUpdate(sc *be.SpiderConfig) {
 		return nil
 	})
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 		return
 	}
 	//更新
@@ -61,7 +62,7 @@ func (s *SpiderDb) SaveOrUpdate(sc *be.SpiderConfig) {
 			return err
 		})
 		if err != nil {
-			log.Println(err.Error())
+			qu.Debug(err.Error())
 			return
 		}
 	}
@@ -86,7 +87,7 @@ func (s *SpiderDb) LoadSpiderConfigAll() be.SpiderConfiges {
 	})
 	sort.Sort(ret)
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 	}
 	return ret
 }
@@ -99,7 +100,7 @@ func (s *SpiderDb) DeleteSpiderConfig(code string) {
 		return err
 	})
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 		return
 	}
 }

+ 4 - 2
backend/vm/check.go

@@ -2,7 +2,7 @@ package vm
 
 import (
 	"container/list"
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	be "spider_creator/backend"
 	"time"
 
@@ -11,6 +11,7 @@ import (
 
 // VerifySpiderConfig 验证爬虫配置,支持翻页,列表项数据只提取2条
 func (vm *VM) VerifySpiderConfig(sc *be.SpiderConfig) (*be.SpiderConfigVerifyResult, error) {
+	qu.Debug("sc---", *sc)
 	verifyResult := list.New()
 	ret := &be.SpiderConfigVerifyResult{true, true, true, true, true, true, true}
 	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(false, false, "")    //列表页使用
@@ -45,7 +46,7 @@ T:
 			chromedp.Evaluate(listRunJs, &listResult),
 		})
 		if err != nil {
-			log.Println("执行JS代码失败", err.Error())
+			qu.Debug("执行JS代码失败", err.Error())
 			continue
 		}
 		//TODO 5.操作详情页
@@ -113,6 +114,7 @@ T:
 			ret.Attaches = len(r.AttachLinks) > 0
 		}
 	}
+	qu.Debug(verifyResult.Len())
 	if ret.ListItems {
 		ret.ListItems = verifyResult.Len() > 2
 	}

+ 10 - 10
backend/vm/jobs.go

@@ -8,7 +8,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	be "spider_creator/backend"
 	bdb "spider_creator/backend/db"
 	"strconv"
@@ -32,7 +32,7 @@ func (vm *VM) RunJob(code string) {
 	job, err := bdb.LoadEntity[be.Job]("jobs", code)
 	var state *be.JobRunningState
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 		vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: "执行作业失败:" + err.Error()})
 		return
 	}
@@ -68,7 +68,7 @@ func (vm *VM) RunJob(code string) {
 		vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: fmt.Sprintf("作业执行结束,结果:%d", state.ResultCache.Len())})
 		close(state.ExitCh)
 	}()
-	log.Println("共有子爬虫数量:", len(job.Items))
+	qu.Debug("共有子爬虫数量:", len(job.Items))
 	var totalPages, downloadedPages float32 = 0, 0
 	for _, item := range job.Items {
 		totalPages += float32(item.MaxPages)
@@ -80,11 +80,11 @@ L:
 		// TODO 加载单个爬虫采集配置
 		sf, err := bdb.LoadEntity[be.SpiderConfig]("myBucket", item.SpiderCode)
 		if err != nil {
-			log.Println("加载爬虫配置参数失败:", err.Error())
+			qu.Debug("加载爬虫配置参数失败:", err.Error())
 			continue
 		}
-		log.Println(*sf)
-		log.Println(*item)
+		qu.Debug(*sf)
+		qu.Debug(*item)
 		listRunJs, contentRunJs := sf.ListJSCode, sf.ContentJSCode
 
 		//TODO 2. 执行JS代码,获取列表页信息
@@ -114,19 +114,19 @@ L:
 				chromedp.Evaluate(listRunJs, &listResult),
 			})
 			if err != nil {
-				log.Println("执行JS代码失败", err.Error())
+				qu.Debug("执行JS代码失败", err.Error())
 				vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
 					Act: be.JOB_RUNNING_EVENT_DEBUG,
 					Msg: "执行列表页JS代码失败"})
 				continue
 			}
-			log.Println("加载当前列表页,长度:", len(listResult))
+			qu.Debug("加载当前列表页,长度:", len(listResult))
 			vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
 				Act: be.JOB_RUNNING_EVENT_DEBUG,
 				Msg: "获取列表完成"})
 			//TODO 5.操作详情页
 			for _, r := range listResult {
-				log.Println("详情页", r.Title, r.Href)
+				qu.Debug("详情页", r.Title, r.Href)
 				select {
 				case <-state.ExitCh:
 					break L
@@ -188,7 +188,7 @@ L:
 func (vm *VM) StopJob(code string) {
 	defer func() {
 		if err := recover(); err != nil {
-			log.Println(err)
+			qu.Debug(err)
 		}
 	}()
 	if v, ok := runningJobs[code]; ok {

+ 16 - 15
backend/vm/single.go

@@ -5,7 +5,7 @@ import (
 	_ "embed"
 	"fmt"
 	"github.com/chromedp/chromedp"
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	be "spider_creator/backend"
 	ai "spider_creator/backend/ai"
 	"strconv"
@@ -22,8 +22,9 @@ func NewVM(attachesDir string, dnf be.EventNotifyFace) *VM {
 // RunSpider
 func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay int64, headless bool, showImage bool, proxyServe string, exit chan bool, cssMark map[string]interface{}) {
 	sc, err := be.NewSpiderConfig(cssMark)
+	qu.Debug("sc---", *sc)
 	if err != nil {
-		log.Println("标注信息传输失败!")
+		qu.Debug("标注信息传输失败!")
 		vm.dnf.Dispatch("debug_event", "标注信息传输失败!")
 		return
 	}
@@ -31,12 +32,12 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 		sc.Href = url
 	}
 	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
-	log.Println("1浏览器打开", *sc)
+	qu.Debug("1浏览器打开", *sc)
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {
 		cancel()
 		baseCancel()
-		log.Println("0浏览器已经销毁")
+		qu.Debug("0浏览器已经销毁")
 		vm.dnf.Dispatch("debug_event", "0 浏览器已经销毁")
 		close(exit)
 	}()
@@ -46,24 +47,24 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 		chromedp.Sleep(time.Duration(listDealy) * time.Millisecond), //列表页等待
 	})
 	vm.dnf.Dispatch("debug_event", "2 页面已经打开")
-	log.Println("2页面打开")
+	qu.Debug("2页面打开")
 	var runJs string = sc.ListJSCode
 	listResult := make(be.ResultItems, 0)
 	//TODO 2. 执行JS代码,获取列表页信息
 	if runJs == "" {
 		runJs = renderJavascriptCoder(loadListItemsJS, sc)
 	}
-	//log.Println("execute list jscode", runJs)
+	//qu.Debug("execute list jscode", runJs)
 	err = chromedp.Run(ctx, chromedp.Tasks{
 		chromedp.Evaluate(runJs, &listResult),
 	})
 	if err != nil {
-		log.Println("执行JS代码失败", err.Error())
+		qu.Debug("执行JS代码失败", err.Error())
 		vm.dnf.Dispatch("debug_event", "2 执行JS代码失败")
 		return
 	}
 	vm.dnf.Dispatch("debug_event", "3 获取列表完成")
-	log.Println("3获取列表完成")
+	qu.Debug("3获取列表完成")
 
 	//TODO 3. 打开详情页 ,最多打开10条
 	runJs = sc.ContentJSCode
@@ -72,7 +73,7 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 	}
 	currentResult := list.New()
 	be.DataResults[sc.Code] = currentResult
-	//log.Println("execute content js", runJs)
+	//qu.Debug("execute content js", runJs)
 	for _, v := range listResult {
 		select {
 		case <-exit:
@@ -87,7 +88,7 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 				chromedp.Evaluate(runJs, v),
 			})
 			if err != nil {
-				log.Println("执行JS代码失败", err.Error())
+				qu.Debug("执行JS代码失败", err.Error())
 			}
 			if len(v.AttachLinks) > 0 { //有附件
 				vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. 下载附件"))
@@ -103,7 +104,7 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 		}
 	}
 	vm.dnf.Dispatch("debug_event", "5 采集测试完成")
-	log.Println("5采集测试完成")
+	qu.Debug("5采集测试完成")
 }
 
 // CountYestodayArts 统计昨日信息发布量
@@ -111,12 +112,12 @@ func (vm *VM) CountYestodayArts(url string, listDealy int64, trunPageDelay int64
 	headless bool, showImage bool, exit chan bool, currentSpiderConfig *be.SpiderConfig) (count int) {
 	sc := be.MergeSpiderConfig(currentSpiderConfig, &be.SpiderConfig{Href: url})
 	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, "")
-	log.Println("1浏览器打开")
+	qu.Debug("1浏览器打开")
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {
 		cancel()
 		baseCancel()
-		log.Println("0浏览器已经销毁")
+		qu.Debug("0浏览器已经销毁")
 		vm.dnf.Dispatch("debug_event", "0 浏览器已经销毁")
 		vm.dnf.Dispatch("debug_event", fmt.Sprintf("99 昨日信息发布量:%d ", count))
 		close(exit)
@@ -135,7 +136,7 @@ func (vm *VM) CountYestodayArts(url string, listDealy int64, trunPageDelay int64
 		chromedp.Sleep(time.Duration(listDealy) * time.Millisecond),
 	})
 	vm.dnf.Dispatch("debug_event", "2 页面已经打开")
-	log.Println("2页面打开")
+	qu.Debug("2页面打开")
 	//TODO 2. 执行JS代码,获取列表页信息
 	runJs := renderJavascriptCoder(loadListItemsJS, sc)
 	tmp := map[string]bool{}
@@ -151,7 +152,7 @@ func (vm *VM) CountYestodayArts(url string, listDealy int64, trunPageDelay int64
 				chromedp.Evaluate(runJs, &listResult),
 			})
 			if err != nil {
-				log.Println("执行JS代码失败", err.Error())
+				qu.Debug("执行JS代码失败", err.Error())
 				vm.dnf.Dispatch("debug_event", "3 执行JS代码失败")
 				return
 			}

+ 17 - 14
backend/vm/vm.go

@@ -7,7 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"io/ioutil"
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"math/rand"
 	"net/http"
 	"os"
@@ -52,13 +52,13 @@ var (
 func renderJavascriptCoder(tpl string, sc *be.SpiderConfig) string {
 	t, err := template.New("").Parse(tpl)
 	if err != nil {
-		log.Println("创建JS代码模板失败", err.Error())
+		qu.Debug("创建JS代码模板失败", err.Error())
 		return ""
 	}
 	buf := new(bytes.Buffer)
 	err = t.Execute(buf, sc)
 	if err != nil {
-		log.Println("执行JS代码模板失败", err.Error())
+		qu.Debug("执行JS代码模板失败", err.Error())
 		return ""
 	}
 	return buf.String()
@@ -70,21 +70,21 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
 		Timeout: 30 * time.Second,
 	}
 	for _, attach := range v.AttachLinks {
-		log.Println("准备下载附件,", attach.Href, attach.Title)
+		qu.Debug("准备下载附件,", attach.Href, attach.Title)
 		req, err := http.NewRequest("GET", attach.Href, nil)
 		if err != nil {
-			log.Println(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
+			qu.Debug(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
 			continue
 		}
 		req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
 		resp, err := client.Do(req)
 		if err != nil {
-			log.Println(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
+			qu.Debug(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
 			continue
 		}
 		bs, err := ioutil.ReadAll(resp.Body)
 		if err != nil {
-			log.Println(" 下载附件 下载 出错:", attach.Href, attach.FileName, err.Error())
+			qu.Debug(" 下载附件 下载 出错:", attach.Href, attach.FileName, err.Error())
 			continue
 		}
 		resp.Body.Close()
@@ -99,7 +99,7 @@ func downloadAttaches(v *be.ResultItem, attachesDir string) {
 		save2File := attachesDir + "/" + fileName
 		fo, err := os.Create(save2File)
 		if err != nil {
-			log.Println(" 下载附件 生成文件 出错:", attach.Href, attach.FileName, save2File, err.Error())
+			qu.Debug(" 下载附件 生成文件 出错:", attach.Href, attach.FileName, save2File, err.Error())
 			continue
 		}
 		fo.Write(bs)
@@ -128,18 +128,20 @@ func trunPage(sc *be.SpiderConfig, delay int64, ctx context.Context) error {
 	if runJs == "" {
 		runJs = fmt.Sprintf(`var link=document.querySelector("%s");if(link)link.click();""`, sc.ListNextPageCss)
 	}
-	log.Println("将要执行翻页的JS代码,", runJs)
+	qu.Debug("将要执行翻页的JS代码,", runJs)
 	//TODO 1. 获取当前列表当前页的内容快照,以便与翻页后的结果对比
 	var result1, result2 string
 	var checkRunJs = fmt.Sprintf(`document.querySelector("%s").outerText`, sc.ListBodyCss)
-	log.Println("检查翻页是否成功,执行的JS", checkRunJs)
+	qu.Debug("检查翻页是否成功,执行的JS", checkRunJs)
 	err := chromedp.Run(ctx, chromedp.Tasks{
 		chromedp.Evaluate(checkRunJs, &result1),
 	})
 	if err != nil {
-		log.Println("翻页检查1失败,", checkRunJs)
+		qu.Debug("翻页检查1失败,", checkRunJs)
 		return err
 	}
+	qu.Debug("第一页:", result1)
+	qu.Debug("runJs:", runJs, delay)
 	if runJs != "" {
 		//可能就没有分页
 		err = chromedp.Run(ctx, chromedp.Tasks{
@@ -147,18 +149,19 @@ func trunPage(sc *be.SpiderConfig, delay int64, ctx context.Context) error {
 			chromedp.Sleep(time.Duration(delay) * time.Millisecond),
 		})
 		if err != nil {
-			log.Println("翻页操作失败,", runJs)
+			qu.Debug("翻页操作失败,", runJs)
 			return err
 		}
 	} else {
 		return errors.New("trun page error ")
 	}
+	qu.Debug("--------------------------")
 	err = chromedp.Run(ctx, chromedp.Tasks{
 		chromedp.Evaluate(checkRunJs, &result2),
 	})
-
+	qu.Debug("第二页:", result2)
 	if err != nil {
-		log.Println("翻页检查2失败,", checkRunJs)
+		qu.Debug("翻页检查2失败,", checkRunJs)
 		return err
 	}
 	if result1 == "" || result2 == "" || result1 == result2 {

+ 14 - 14
backend/vm/worker.go

@@ -3,7 +3,7 @@ package vm
 import (
 	"container/list"
 	"fmt"
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	be "spider_creator/backend"
 	"sync"
 	"time"
@@ -48,7 +48,7 @@ func (w *Worker) Run(v *be.ResultItem, ch chan *Worker, wg *sync.WaitGroup) {
 		chromedp.Evaluate(w.js, v),
 	})
 	if err != nil {
-		log.Println("执行JS代码失败_详情", err.Error())
+		qu.Debug("执行JS代码失败_详情", err.Error())
 	}
 	if len(v.AttachLinks) > 0 { //有附件
 		w.vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. 下载附件"))
@@ -68,7 +68,7 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 	//sc := be.MergeSpiderConfig(currentSpiderConfig, &be.SpiderConfig{Href: url})
 	sc, err := be.NewSpiderConfig(cssMark)
 	if err != nil {
-		log.Println("标注信息传输失败!")
+		qu.Debug("标注信息传输失败!")
 		vm.dnf.Dispatch("debug_event", "标注信息传输失败!")
 		return
 	}
@@ -76,12 +76,12 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 		sc.Href = url
 	}
 	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
-	log.Println("1浏览器打开")
+	qu.Debug("1浏览器打开")
 	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
 	defer func() {
 		cancel()
 		baseCancel()
-		log.Println("0浏览器已经销毁")
+		qu.Debug("0浏览器已经销毁")
 		vm.dnf.Dispatch("debug_event", "0 浏览器已经销毁")
 		close(exit)
 	}()
@@ -92,18 +92,18 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 	if runContentJs == "" {
 		runContentJs = renderJavascriptCoder(loadContentJS, sc)
 	}
-	log.Println("获取列表JS代码", runListJs)
-	ws := make([]*Worker, threads)
+	qu.Debug("获取列表JS代码", runListJs)
+	wts := make([]*Worker, threads)
 	ch := make(chan *Worker, threads)
 	wg := new(sync.WaitGroup)
 	for i := 0; i < threads; i++ {
 		w := NewWorker(headless, showImage, proxyServe, contentDelay, runContentJs, vm)
-		ws = append(ws, w)
+		wts = append(wts, w)
 		ch <- w
 	}
 	//批量销毁
 	defer func() {
-		for _, w := range ws {
+		for _, w := range wts {
 			if w != nil {
 				w.Destory()
 			}
@@ -118,7 +118,7 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 		chromedp.Sleep(time.Duration(listDealy) * time.Millisecond),
 	})
 	vm.dnf.Dispatch("debug_event", "2 页面已经打开")
-	log.Println("2页面打开")
+	qu.Debug("2页面打开")
 	currentResult := list.New()
 	be.DataResults[sc.Code] = currentResult
 	for i := 0; i < maxPages; i++ {
@@ -128,12 +128,12 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 			chromedp.Evaluate(runListJs, &listResult),
 		})
 		if err != nil {
-			log.Println("执行JS代码失败_列表", err.Error())
+			qu.Debug("执行JS代码失败_列表", err.Error())
 			vm.dnf.Dispatch("debug_event", "2 列表-执行JS代码失败")
 			return
 		}
 		vm.dnf.Dispatch("debug_event", "3 获取列表完成")
-		log.Println("3获取列表完成")
+		qu.Debug("3获取列表完成")
 
 		//TODO 3. 打开详情页 ,支持多线程
 		for _, v := range listResult {
@@ -152,12 +152,12 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 		wg.Wait()
 		vm.dnf.Dispatch("debug_event", "4 当前页采集完成,准备执行翻页逻辑//"+sc.ListNextPageCss)
 		if err = trunPage(sc, trunPageDelay, ctx); err != nil {
-			log.Println("翻页失败", err.Error())
+			qu.Debug("翻页失败", err.Error())
 			vm.dnf.Dispatch("debug_event", "6 翻页失败: "+err.Error())
 			time.Sleep(3 * time.Second)
 			break
 		}
 	}
 	vm.dnf.Dispatch("debug_event", "6 采集测试完成")
-	log.Println("6 采集测试完成")
+	qu.Debug("6 采集测试完成")
 }

+ 33 - 15
backend/webservice/webservice.go

@@ -6,6 +6,7 @@ import (
 	_ "embed"
 	"encoding/json"
 	"fmt"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"log"
 	"net/http"
 	be "spider_creator/backend"
@@ -20,6 +21,7 @@ type (
 	SpiderConfigItem struct {
 		Key string `json:"key"`
 		Css string `json:"css"`
+		Url string `json:"url"`
 	}
 	WebService struct {
 		db                  *bdb.SpiderDb
@@ -32,7 +34,8 @@ var (
 	//go:embed cert.pem
 	certBytes []byte
 	//go:embed key.pem
-	keyBytes []byte
+	keyBytes               []byte
+	currentTabSpiderConfig = &be.SpiderConfig{}
 )
 
 // NewWebService
@@ -46,7 +49,7 @@ func (ws *WebService) RunHttpServe() {
 	// 解析证书
 	cert, err := tls.X509KeyPair(certBytes, keyBytes)
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 		return
 	}
 	// 创建一个TLS配置
@@ -65,28 +68,28 @@ func (ws *WebService) RunHttpServe() {
 	mux.HandleFunc("/save", ws.SaveSpiderConfig)
 	mux.HandleFunc("/load", ws.LoadSpiderConfig)
 	//
-	log.Println("Starting HTTPS server on ", LISTEN_ADDR)
+	qu.Debug("Starting HTTPS server on ", LISTEN_ADDR)
 	err = server.ListenAndServeTLS("", "")
 	if err != nil {
-		log.Println("Failed to start server:  ", err.Error())
+		qu.Debug("Failed to start server:  ", err.Error())
 		return
 	}
 }
 
-// LoadCurrentSpiderConfig,json处理
+// SaveSpiderConfig LoadCurrentSpiderConfig,json处理
 func (ws *WebService) SaveSpiderConfig(w http.ResponseWriter, r *http.Request) {
-	log.Println("保存设置")
+	qu.Debug("保存设置")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 	w.Header().Set("Content-Type", "application/json")
 	var req = new(SpiderConfigItem)
 	err := json.NewDecoder(r.Body).Decode(req)
 	if err != nil {
-		log.Println("序列化失败")
+		qu.Debug("序列化失败")
 		http.Error(w, err.Error(), http.StatusBadRequest)
 		return
 	}
-	log.Println("CSS", req.Key, req.Css)
-	//TODO 业务操作
+	qu.Debug("CSS", req.Key, req.Css, req.Url)
+	/*//TODO 业务操作
 	switch req.Key {
 	case "listItemCss":
 		ws.currentSpiderConfig.ListItemCss = req.Css
@@ -110,20 +113,35 @@ func (ws *WebService) SaveSpiderConfig(w http.ResponseWriter, r *http.Request) {
 		ws.currentSpiderConfig.AttachCss = req.Css
 	}
 	fmt.Fprint(w, "{'code':200}")
-	ws.db.SaveOrUpdate(ws.currentSpiderConfig)
+	ws.db.SaveOrUpdate(ws.currentSpiderConfig)*/
+	fmt.Fprint(w, "{'code':200}")
 	//TODO 通知开发工具端,CSS选择器有变动
-	ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": req.Key, "sc": ws.currentSpiderConfig})
+	ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": req.Key, "css": req.Css, "url": req.Url})
 }
 
-// LoadCurrentSpiderConfig,加载,返回当前配置项
+// LoadSpiderConfig LoadCurrentSpiderConfig,加载,返回当前配置项
 func (ws *WebService) LoadSpiderConfig(w http.ResponseWriter, r *http.Request) {
-	log.Println("加载当前配置项")
+	qu.Debug("加载当前配置项")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 	w.Header().Set("Content-Type", "application/json")
-	err := json.NewEncoder(w).Encode(ws.currentSpiderConfig)
+	var req = new(SpiderConfigItem)
+	err := json.NewDecoder(r.Body).Decode(req)
 	if err != nil {
-		log.Println("反向序列化失败")
+		qu.Debug("序列化失败")
 		http.Error(w, err.Error(), http.StatusBadRequest)
 		return
+	} else {
+		qu.Debug("高亮:", req.Url, *currentTabSpiderConfig)
+		err = json.NewEncoder(w).Encode(currentTabSpiderConfig)
+		if err != nil {
+			log.Println("反向序列化失败")
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
 	}
 }
+
+func SetCurrentTabCssMark(sc *be.SpiderConfig) {
+	currentTabSpiderConfig = sc
+	qu.Debug("当前编辑爬虫链接:", currentTabSpiderConfig)
+}

+ 4 - 5
bind4comm.go

@@ -2,9 +2,8 @@
 package main
 
 import (
-	"log"
-
 	"github.com/wailsapp/wails/v2/pkg/runtime"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 )
 
 // Greet returns a greeting for the given name
@@ -15,7 +14,7 @@ func (a *App) Dispatch(event string, data interface{}) error {
 
 // SelectSaveFilePath
 func (a *App) SelectSaveFilePath(defaultDirectory, defaultFileName string) string {
-	log.Println("导出文件位置:", defaultDirectory, defaultFileName)
+	qu.Debug("导出文件位置:", defaultDirectory, defaultFileName)
 	path, err := runtime.SaveFileDialog(a.ctx, runtime.SaveDialogOptions{Filters: []runtime.FileFilter{
 		{Pattern: "*.epub", DisplayName: "epub file *.epub"},
 		{Pattern: "*.xlsx", DisplayName: "excel file *.xlsx"},
@@ -25,7 +24,7 @@ func (a *App) SelectSaveFilePath(defaultDirectory, defaultFileName string) strin
 		DefaultDirectory: defaultDirectory,
 	})
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 		return ""
 	}
 	return path
@@ -37,7 +36,7 @@ func (a *App) SelectOpenFilePath() string {
 		{Pattern: "*.xlsx", DisplayName: "excel file *.xlsx"},
 	}})
 	if err != nil {
-		log.Println(err.Error())
+		qu.Debug(err.Error())
 		return ""
 	}
 	return path

+ 32 - 8
bind4spider.go

@@ -3,10 +3,10 @@ package main
 
 import (
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
-	"log"
 	"sort"
 	be "spider_creator/backend"
 	bdb "spider_creator/backend/db"
+	bew "spider_creator/backend/webservice"
 )
 
 // DebugSpider 调试爬虫
@@ -14,7 +14,7 @@ func (a *App) DebugSpider(url string, proxyServe string, maxPages int, listDealy
 	showImage bool, threads int, cssMark map[string]interface{}) {
 	exitCh = make(chan bool, 1)
 	qu.Debug(url, proxyServe, maxPages, listDealy, trunPageDelay, contentDelay, headless, showImage, threads)
-	qu.Debug(cssMark)
+	qu.Debug("cssMark---", cssMark)
 	if maxPages == 1 && threads == 1 {
 		vm.RunSpider(url, maxPages, listDealy, contentDelay, headless, showImage, proxyServe, exitCh, cssMark)
 	} else { //多页下载强制使用多线程模式
@@ -23,27 +23,41 @@ func (a *App) DebugSpider(url string, proxyServe string, maxPages int, listDealy
 }
 
 // VerifySpiderConfig 验证
-func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) (*be.SpiderConfigVerifyResult, int, string) {
+func (a *App) VerifySpiderConfig(cssMark map[string]interface{}) map[string]interface{} {
 	var errtype int
+	qu.Debug(cssMark)
 	sc, err := be.NewSpiderConfig(cssMark)
 	if err != nil {
 		qu.Debug("CssMark Marshal Error:", err)
-		return nil, errtype, "验证失败"
+		return map[string]interface{}{
+			"ret": nil,
+			"err": errtype,
+			"msg": "标注信息传输失败!",
+		}
 	}
 	ret, err := vm.VerifySpiderConfig(sc) //验证
 	if err != nil {
 		qu.Debug("验证脚本配置失败::", err.Error())
-		return ret, errtype, "验证脚本配置失败"
+		return map[string]interface{}{
+			"ret": ret,
+			"err": errtype,
+			"msg": "验证脚本配置失败!",
+		}
 	}
 	be.VerifyResults[sc.Code] = ret
-	return ret, 1, "验证脚本配置成功"
+	qu.Debug("验证结果---", *ret)
+	return map[string]interface{}{
+		"ret": ret,
+		"err": 1,
+		"msg": "验证脚本配置成功",
+	}
 }
 
 // StopDebugSpider 停止调试
 func (a *App) StopDebugSpider() string {
 	defer func() {
 		if err := recover(); err != nil {
-			log.Println(err)
+			qu.Debug(err)
 		}
 	}()
 	exitCh <- true
@@ -67,9 +81,19 @@ func (a *App) ViewResultItemAll(code string) be.ResultItems {
 	return ret
 }
 
+// ServerActionCurrentOpenTab 记录当前编辑code的cssmark,高亮回显使用
+func (a *App) ServerActionCurrentOpenTab(cssMark map[string]interface{}) {
+	qu.Debug("----------------")
+	sc, err := be.NewSpiderConfig(cssMark)
+	if err != nil {
+		qu.Debug("标注信息传输失败!")
+	}
+	bew.SetCurrentTabCssMark(sc)
+}
+
 // ExportEpubFile
 func (a *App) ExportEpubFile(bookname, filepath string) string {
-	log.Println("EPUB 文件存储:", bookname, filepath)
+	qu.Debug("EPUB 文件存储:", bookname, filepath)
 	db.ExportEpubFile(bookname, filepath, currentResults)
 	return "ok"
 }

+ 2 - 2
bind4spidercfg.go

@@ -2,7 +2,7 @@
 package main
 
 import (
-	"log"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	be "spider_creator/backend"
 )
 
@@ -19,7 +19,7 @@ func (a *App) SaveOrUpdateSpiderConfig(sc *be.SpiderConfig) string {
 
 // SwitchSpiderConfig
 func (a *App) SwitchSpiderConfig(code string) string {
-	log.Println("切换当前默认爬虫配置:", code)
+	qu.Debug("切换当前默认爬虫配置:", code)
 	csf := db.Load(code)
 	be.CopySpiderConfig(currentSpiderConfig, csf)
 	return "ok"

+ 4 - 0
frontend/src/components/spider/jscodetpl.js

@@ -64,5 +64,9 @@ ret
     AttachJsCode:`
 //附件下载以及提交
     
+`,
+    ListTurnPageJsCode: `
+//列表页翻页代码
 `
+
 }

+ 21 - 15
frontend/src/views/CodeList.vue

@@ -65,11 +65,11 @@
                 <el-table-column prop="modifyuser" label="维护人" width="80" show-overflow-tooltip></el-table-column>
                 <el-table-column label="操作" width="160">
                     <template #default="scope">
-                        <el-tooltip content="标注" placement="top">
+                  <!-- <el-tooltip content="标注" placement="top">
                             <el-button size="small" :class="{ active: scope.row._action_clicked_mark }" @click="tableEvents.handleDataTag(scope.row)">
                                 <el-icon><Link /></el-icon>
                             </el-button>
-                        </el-tooltip>
+                        </el-tooltip>-->
                         <el-tooltip content="编辑" placement="top">
                             <el-button size="small" :class="{ active: scope.row._action_clicked_edit }" @click="tableEvents.handleEdit(scope.$index, scope.row)">
                                 <el-icon><Edit /></el-icon>
@@ -514,6 +514,10 @@ const getLuaParams = row => {
         site: row.site,
         channel: row.channel,
         modifyuser: row.modifyuser,
+        claimtime: row.claimtime,
+        recovertime: row.recovertime,
+        priority: row.priority,
+        spiderimportant: row.spiderimportant,
         modifytime: row.modifytime,
     }
     return baseInfo
@@ -526,8 +530,6 @@ const editDialogMarkClick = (row) => {
         showClose: true,
         duration: 3000,
     });
-    const mark = getMarkWithRow(row)
-    ServerActionCurrentOpenTab(mark)
     BrowserOpenURL(row.href)
 }
 
@@ -547,6 +549,8 @@ const tableEvents = {
             showClose: true,
             duration: 3000,
         });
+        const mark = getMarkWithRow(row)
+        ServerActionCurrentOpenTab(mark)
         openEditDialog(row)
     },
     handleDebug(index, row) {
@@ -608,7 +612,7 @@ const tableEvents = {
     handleRollback(index, row) {
         onlyClickHighlight(row, '_action_clicked_rollback')
         const lua = getLuaParams(row)
-        lua.state = 0
+        lua.state = 12
         ServerActionUpdateCodeState({ lua: lua }).then(r => {
             if (r.err === 1) {
                 ElMessage({
@@ -630,18 +634,20 @@ const tableEvents = {
 
 //Wails事件绑定
 EventsOn("spiderConfigChange", data => {
+    console.log(data)
     const { key, css, url } = data
+    refreshAndAsyncEditDialog(key, css)
     // 判断标注url和编辑url是否相同
-    const editUrl = currentEditRow.href
-    if (url === editUrl) {
-        refreshAndAsyncEditDialog(key, css[key])
-    } else {
-        ElMessage({
-            message: `标注url和编辑url不匹配,此次更新取消。当前标注url: ${url},当前编辑url: ${editUrl}`,
-            type: 'warn',
-            duration: 4000,
-        })
-    }
+    // const editUrl = currentEditRow.href
+    // if (url === editUrl) {
+    //     refreshAndAsyncEditDialog(key, css[key])
+    // } else {
+    //     ElMessage({
+    //         message: `标注url和编辑url不匹配,此次更新取消。当前标注url: ${url},当前编辑url: ${editUrl}`,
+    //         type: 'warn',
+    //         duration: 4000,
+    //     })
+    // }
 })
 </script>
 

+ 51 - 30
server.go

@@ -8,6 +8,7 @@ import (
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"net/http"
 	"reflect"
+	be "spider_creator/backend"
 	"time"
 )
 
@@ -29,7 +30,8 @@ func (a *App) ServerActionCodeList(param map[string]interface{}) *Result {
 	qu.Debug("param---", param)
 	data := &Result{}
 	if User != nil {
-		formatUser(param)
+		//formatUser(param)
+		param["user"] = User
 		qu.Debug("param---", param)
 		getResult(param, data, "getcodes")
 	} else {
@@ -39,49 +41,68 @@ func (a *App) ServerActionCodeList(param map[string]interface{}) *Result {
 	return data
 }
 
-// ServerActionGetModifyUsers 获取爬虫开发人员列表
-func (a *App) ServerActionGetModifyUsers() *Result {
+// ServerActionClaimCodes 爬虫认领
+func (a *App) ServerActionClaimCodes() *Result {
 	data := &Result{}
-	if User != nil && User.Auth > 1 { //禁止开发查询
-		getResult(nil, data, "getmodifyusers")
-		if len(User.Ids) > 0 && User.Identity == 0 { //外包审核员或管理员只能查看外包开发人员信息
-			resultUsers := []map[string]interface{}{}
-			for _, user := range data.Data.List {
-				userid := qu.ObjToString(user["_id"])
-				for _, id := range User.Ids {
-					if userid == id {
-						resultUsers = append(resultUsers, user)
-						break
-					}
-				}
-			}
-			data.Data.List = resultUsers
-			data.Data.Total = len(resultUsers)
-		}
+	if User != nil && User.Auth == 1 {
+		getResult(User, data, "claimcode")
 	} else {
-		data.Msg = "查询开发列表失败,权限不够!"
+		data.Msg = "认领失败!"
 	}
 	return data
 }
 
-// ServerActionClaimCodes 爬虫认领
-func (a *App) ServerActionClaimCodes() *Result {
+// ServerActionUpdateCode 爬虫更新
+func (a *App) ServerActionUpdateCode(param map[string]interface{}) *Result {
+	qu.Debug("param---", param)
 	data := &Result{}
-	if User.Auth == 1 {
-		param := map[string]interface{}{}
-		formatUser(param)
-		getResult(data, param, "claimcode")
+	if User != nil {
+		//formatUser(param)
+		qu.Debug("param---", param)
+		getResult(map[string]interface{}{"param": param, "user": User}, data, "updatecode")
 	} else {
-		data.Msg = "认领失败!"
+		data.Msg = "用户登录异常,请重新登录!"
+		qu.Debug(data.Msg)
 	}
+	qu.Debug(*data)
 	return data
 }
 
-// ServerActionUpdateCode 爬虫更新
-func (a *App) ServerActionUpdateCode(param map[string]interface{}) *Result {
+// ServerActionUpdateCode 爬虫状态更新
+func (a *App) ServerActionUpdateCodeState(param map[string]interface{}) *Result {
 	qu.Debug("param---", param)
+	/*
+		1、提交审核
+		2、回退
+		3、打回
+		4、审核通过
+		5、上线
+	*/
 	data := &Result{}
-	getResult(param, data, "updatecode")
+	//前期校验
+	if User != nil {
+		//提交审核时,验证校验清单
+		lua, _ := param["lua"].(map[string]interface{})
+		if qu.IntAll(lua["state"]) == 1 {
+			code := qu.ObjToString(lua["code"])
+			if vr := be.VerifyResults[code]; vr == nil { //没有检验清单,不允许提交
+				data.Msg = "未验证,无法提交!"
+			} else if !vr.ListItems || !vr.Content || !vr.Title || !vr.PublishTime { //校验检验清单必通过项
+				//vr.ListTrunPage
+				data.Msg = "验证清单未通过!"
+			} else {
+				param["verify"] = vr
+			}
+		}
+	} else {
+		data.Msg = "用户登录异常,请重新登录!"
+		qu.Debug(data.Msg)
+	}
+	if data.Msg == "" {
+		param["user"] = User
+		//formatUser(param)
+		getResult(param, data, "updatecodestate")
+	}
 	qu.Debug(*data)
 	return data
 }

+ 25 - 0
user.go

@@ -63,3 +63,28 @@ func (a *App) ServerActionUserLogout() map[string]interface{} {
 		"data": nil,
 	}
 }
+
+// ServerActionGetModifyUsers 获取爬虫开发人员列表
+func (a *App) ServerActionGetModifyUsers() *Result {
+	data := &Result{}
+	if User != nil && User.Auth > 1 { //禁止开发查询
+		getResult(nil, data, "getmodifyusers")
+		if len(User.Ids) > 0 && User.Identity == 0 { //外包审核员或管理员只能查看外包开发人员信息
+			resultUsers := []map[string]interface{}{}
+			for _, user := range data.Data.List {
+				userid := qu.ObjToString(user["_id"])
+				for _, id := range User.Ids {
+					if userid == id {
+						resultUsers = append(resultUsers, user)
+						break
+					}
+				}
+			}
+			data.Data.List = resultUsers
+			data.Data.Total = len(resultUsers)
+		}
+	} else {
+		data.Msg = "查询开发列表失败,权限不够!"
+	}
+	return data
+}