Ver Fonte

后台方法目录优化

mxs há 10 meses atrás
pai
commit
30d6ee573f

+ 1 - 0
backend/README.md

@@ -0,0 +1 @@
+所有的后端代码都挪到这里了

+ 93 - 0
backend/ai/ai.go

@@ -0,0 +1,93 @@
+package ai
+
+import (
+	"errors"
+	"fmt"
+
+	be "spidercreator/backend"
+	"strings"
+
+	zhipu "github.com/itcwc/go-zhipu/model_api"
+)
+
+const (
+	MODEL_NAME = "glm-4-flash" //"glm-4-air"
+)
+
+var (
+	expireAtTime = int64(1719803252) // token 过期时间
+	apiKey       = "5343038d9d934536456f281f8487866a.YUmO7HK9xNb990j9"
+)
+
+// UpdateResultDateStr
+func UpdateResultDateStr(rs be.ResultItems) (err error) {
+	tmp := make([]string, len(rs))
+	for i, v := range rs {
+		tmp[i] = v.ListPubTime
+	}
+
+	prompt := fmt.Sprintf(`根据我提供的内容,识别每行文本中的日期,按照YYYY-MM-DD形式输出,如:2024-01-01;找不到日期数据,输出NULL。不要联网,不要解释,不要说明,直接输出结果。
+				---------------------------
+				%s				
+				`, strings.Join(tmp, "\n"))
+	mssage := zhipu.PostParams{
+		Model: MODEL_NAME,
+		Messages: []zhipu.Message{
+			{
+				Role:    "user", // 消息的角色信息 详见文档
+				Content: prompt, // 消息内容
+			},
+		},
+	}
+	postResponse, err := zhipu.BeCommonModel(expireAtTime, mssage, apiKey)
+	if err != nil {
+		return err
+	}
+
+	//解析数据
+	choices, _ := postResponse["choices"].([]interface{})
+	obj, _ := choices[0].(map[string]interface{})
+	message, _ := obj["message"].(map[string]interface{})
+	value, _ := message["content"].(string)
+	// log.Println("提示语", prompt)
+	// log.Println("AI调用结果", value)
+	results := strings.Split(value, "\n")
+	if len(results) < len(rs) {
+		err = errors.New("调用大模型失败")
+		return
+	}
+	//更新
+	for i, v := range rs {
+		if results[i] != "NULL" {
+			v.ListPubTime = results[i]
+		} else {
+			v.ListPubTime = ""
+		}
+	}
+	return nil
+}
+
+// Question
+func Question(prompt string) (ret interface{}, err error) {
+	mssage := zhipu.PostParams{
+		Model: MODEL_NAME,
+		Messages: []zhipu.Message{
+			{
+				Role:    "user", // 消息的角色信息 详见文档
+				Content: prompt, // 消息内容
+			},
+		},
+	}
+	postResponse, err := zhipu.BeCommonModel(expireAtTime, mssage, apiKey)
+	if err != nil {
+		return nil, err
+	}
+
+	//解析数据
+	choices, _ := postResponse["choices"].([]interface{})
+	obj, _ := choices[0].(map[string]interface{})
+	message, _ := obj["message"].(map[string]interface{})
+	value := message["content"]
+
+	return value, nil
+}

+ 1 - 0
backend/backend.go

@@ -0,0 +1 @@
+package backend

+ 116 - 0
backend/browser.go

@@ -0,0 +1,116 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+
+	"github.com/chromedp/cdproto/page"
+
+	"github.com/chromedp/chromedp"
+)
+
+var (
+	useragent = []string{
+		"Chrome: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
+		"Firefox: Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0",
+		"Safari: Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_5 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0 Mobile/15D60 Safari/604.1",
+		"MacOSX: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14",
+		"Mozilla/5.0(Macintosh;U;IntelMacOSX10_6_8;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
+		"Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
+		"Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
+		"Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1",
+		"Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
+		"Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Trident/4.0;SE2.XMetaSr1.0;SE2.XMetaSr1.0;.NETCLR2.0.50727;SE2.XMetaSr1.0)",
+		"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.70 Safari/537.36",
+		"Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
+		"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
+		"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
+		"Safari 11 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15 QQBrowserLite/1.3.0",
+		"Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
+		"Chrome 59 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
+		"Chrome 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36",
+		"Safari 11 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5",
+		"Firefox 9 Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:93.0) Gecko/20100101 Firefox/93.0",
+		"Safari Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
+		"Chrome 8 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
+		"Chrome Mozilla/5.0 (X11; U; U; Linux x86_64; zh-my) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Puffin/8.3.1.41624AP",
+		"Opera 28 Mozilla/5.0 (Linux; BRAVIA 4K 2015 Build/LMY48E.S265) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36 OPR/28.0.1754.0",
+		"Safari Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 HeyTapBrowser/40.7.29.1",
+		"Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.58 Safari/537.36 Edg/93.0.961.33",
+		"Chrome 9 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/15.0 Chrome/90.0.4430.210 Safari/537.36",
+		"Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
+		"Chrome Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
+		"Microsoft Edge Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
+		"Chrome 8 Mozilla/5.0 (Windows NT 10.0; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+		"Chrome 8 Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
+		"Chrome 9 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
+		"Chrome 8 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36",
+		"Chrome 9 Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+		"Chrome Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+		"Firefox 7 Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0",
+		"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
+		"Internet Explorer 11 Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; LCJB; rv:11.0) like Gecko",
+		"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36",
+		"Firefox 36  Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
+		"Chrome Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3870.400 QQBrowser/10.8.4405.400",
+		"Chrome 58 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
+		"Firefox 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
+		"Chrome 8 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
+		"Chrome 9 Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38",
+	}
+)
+
+func NewBrowser(headless bool, showImage bool, proxyServe string) (
+	context.Context, context.CancelFunc,
+	context.Context, context.CancelFunc,
+	context.Context, context.CancelFunc,
+) {
+	ctx, cancelFn := chromedp.NewContext(context.Background())
+
+	chromeOptions := append(chromedp.DefaultExecAllocatorOptions[:],
+		chromedp.NoDefaultBrowserCheck,                                  //不检查默认浏览器
+		chromedp.Flag("enable-automation", false),                       // 防止监测webdriver
+		chromedp.Flag("disable-blink-features", "AutomationControlled"), //禁用 blink 特征
+		chromedp.Flag("force-dev-mode-highlighting", true),
+		chromedp.Flag("disable-extensions", false), //是否禁用扩展
+		chromedp.Flag("headless", headless),
+		chromedp.Flag("user-agent", useragent[rand.Intn(20)]), //搞到底还是要在这里设置useragent
+		chromedp.Flag("disable-keep-alive", true),
+		chromedp.Flag("disable-gpu", true),
+		chromedp.Flag("no-sandbox", true),
+		chromedp.Flag("disable-dev-shm-usage", false),
+		chromedp.Flag("default-browser-check", false),
+		chromedp.Flag("ignore-certificate-errors", true), //忽略错误
+		chromedp.Flag("disable-web-security", true),      //禁用网络安全标志
+		chromedp.Flag("mute-audio", false),
+		chromedp.Flag("accept-language", `zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6`),
+	)
+	if proxyServe != "" {
+		chromeOptions = append(chromeOptions,
+			chromedp.ProxyServer(fmt.Sprintf("socks5://%s", proxyServe)),
+		)
+	}
+	if showImage {
+		chromeOptions = append(chromeOptions,
+			chromedp.Flag("blink-settings", "imagesEnabled=true"),
+		)
+	} else {
+		chromeOptions = append(chromeOptions,
+			chromedp.Flag("blink-settings", "imagesEnabled=false"),
+		)
+	}
+
+	allocCtx, allocCancelFn := chromedp.NewExecAllocator(ctx, chromeOptions...)
+	// 创建一个浏览器实例
+	incCtx, incCancelFn := chromedp.NewContext(allocCtx,
+		chromedp.WithLogf(nil))
+	//
+	chromedp.Run(ctx,
+		chromedp.ActionFunc(func(cxt context.Context) error {
+			_, err := page.AddScriptToEvaluateOnNewDocument("Object.defineProperty(navigator, 'webdriver', { get: () => false, });").Do(cxt)
+			return err
+		}),
+	)
+	return ctx, cancelFn, allocCtx, allocCancelFn, incCtx, incCancelFn
+}

+ 1 - 0
backend/db/README.md

@@ -0,0 +1 @@
+这里使用Bolt数据库,可以认为是一个MongoDB

+ 119 - 0
backend/db/db.go

@@ -0,0 +1,119 @@
+package db
+
+import (
+	"encoding/json"
+	"errors"
+	"log"
+	be "spidercreator/backend"
+
+	"github.com/boltdb/bolt"
+)
+
+// 在新增表的时候,需要在这里增加实体清单
+type Entity interface {
+	be.SpiderConfig | be.Job
+}
+
+var (
+	// 在新增表的时候,需要在这里增加表名
+	DB_TABLES = []string{"myBucket", "jobs", "spiderconfig", "systemconfig"}
+	Db        *SpiderDb
+)
+
+type (
+	//SpiderDB 爬虫库,这里模拟真实数据库
+	SpiderDb struct {
+		db  *bolt.DB
+		enf be.EventNotifyFace
+	}
+)
+
+// NewSpiderDb
+func NewSpiderDb(dbfile string, enf be.EventNotifyFace) *SpiderDb {
+	db, err := bolt.Open(dbfile, 0600, nil)
+	if err != nil {
+		log.Println("db error", err.Error())
+	}
+	for _, t := range DB_TABLES {
+		err = db.Update(func(tx *bolt.Tx) error {
+			_, err := tx.CreateBucketIfNotExists([]byte(t))
+			return err
+		})
+		if err != nil {
+			log.Println("db error", err.Error())
+		}
+	}
+	return &SpiderDb{
+		db, enf,
+	}
+}
+
+// Close
+func (s *SpiderDb) Close() {
+	s.db.Close()
+}
+
+// 支持泛型的通用方法
+// LoadEntity
+func LoadEntity[T any](table, key string) (*T, error) {
+	var ret *T = new(T)
+	err := Db.db.View(func(tx *bolt.Tx) error {
+		bucket := tx.Bucket([]byte(table))
+		value := bucket.Get([]byte(key))
+		if value != nil && len(value) > 0 {
+			_ = json.Unmarshal(value, ret)
+		}
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return ret, nil
+}
+
+// LoadEntities
+func LoadEntities[T1 Entity](table string) ([]*T1, error) {
+	ret := make([]*T1, 0)
+	// 开始读取事务
+	err := Db.db.View(func(tx *bolt.Tx) error {
+		// 遍历数据库中的所有桶
+		bucket := tx.Bucket([]byte(table))
+		if bucket == nil {
+			return errors.New("桶不存在")
+		}
+		// 遍历桶中的所有键/值对
+		return bucket.ForEach(func(k, v []byte) error {
+			var sf = new(T1)
+			json.Unmarshal(v, sf)
+			if sf != nil {
+				ret = append(ret, sf)
+			}
+			return nil
+		})
+	})
+	if err != nil {
+		return ret, err
+	}
+
+	return ret, nil
+}
+
+// DeleteEntity
+func DeleteEntity[T any](table, key string) error {
+	return Db.db.Update(func(tx *bolt.Tx) error {
+		bucket := tx.Bucket([]byte(table))
+		return bucket.Delete([]byte(key))
+
+	})
+}
+
+// SaveEntity
+func SaveEntity[T any](table, key string, obj *T) error {
+	//加载原始数据
+	value, _ := json.Marshal(obj)
+	return Db.db.Update(func(tx *bolt.Tx) error {
+		bucket := tx.Bucket([]byte(table))
+		err := bucket.Put([]byte(key), value)
+		return err
+	})
+}

+ 11 - 0
backend/db/job.go

@@ -0,0 +1,11 @@
+/*
+在db.go中实现了大部分方法,
+一般不用单独去写数据表操作
+*/
+package db
+
+// ExportJobResult
+func (db *SpiderDb) ExportJobResult2ExcelFile() error {
+
+	return nil
+}

+ 212 - 0
backend/db/spider.go

@@ -0,0 +1,212 @@
+package db
+
+import (
+	"container/list"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"sort"
+	be "spidercreator/backend"
+	"strconv"
+	"strings"
+
+	"github.com/bmaupin/go-epub"
+
+	"github.com/boltdb/bolt"
+	"github.com/xuri/excelize/v2"
+)
+
+// Load
+func (s *SpiderDb) Load(code string) *be.SpiderConfig {
+	var req *be.SpiderConfig = new(be.SpiderConfig)
+	err := s.db.View(func(tx *bolt.Tx) error {
+		bucket := tx.Bucket([]byte("myBucket"))
+		value := bucket.Get([]byte(code))
+		if value != nil && len(value) > 0 {
+			_ = json.Unmarshal(value, req)
+		}
+		return nil
+	})
+	if err != nil {
+		log.Fatal(err)
+	}
+	return req
+}
+
+// SaveOrUpdate
+func (s *SpiderDb) SaveOrUpdate(sc *be.SpiderConfig) {
+	//加载原始数据
+	var sc1 *be.SpiderConfig = new(be.SpiderConfig)
+	var sc2 *be.SpiderConfig
+	err := s.db.View(func(tx *bolt.Tx) error {
+		bucket := tx.Bucket([]byte("myBucket"))
+		value := bucket.Get([]byte(sc.Code))
+		if value != nil && len(value) > 0 {
+			_ = json.Unmarshal(value, sc1)
+		}
+		return nil
+	})
+	if err != nil {
+		log.Println(err.Error())
+		return
+	}
+	//更新
+	if sc1 != nil {
+		sc2 = be.MergeSpiderConfig(sc1, sc)
+		value, _ := json.Marshal(sc2)
+		err = s.db.Update(func(tx *bolt.Tx) error {
+			bucket := tx.Bucket([]byte("myBucket"))
+			err := bucket.Put([]byte(sc.Code), value)
+			return err
+		})
+		if err != nil {
+			log.Println(err.Error())
+			return
+		}
+	}
+}
+
+// LoadAll,默认按照代码排序
+func (s *SpiderDb) LoadSpiderConfigAll() be.SpiderConfiges {
+	ret := make(be.SpiderConfiges, 0)
+	// 开始读取事务
+	err := s.db.View(func(tx *bolt.Tx) error {
+		// 遍历数据库中的所有桶
+		bucket := tx.Bucket([]byte("myBucket"))
+		// 遍历桶中的所有键/值对
+		return bucket.ForEach(func(k, v []byte) error {
+			var sf *be.SpiderConfig = new(be.SpiderConfig)
+			json.Unmarshal(v, sf)
+			if sf != nil {
+				ret = append(ret, sf)
+			}
+			return nil
+		})
+	})
+	sort.Sort(ret)
+	if err != nil {
+		log.Println(err.Error())
+	}
+	return ret
+}
+
+// Delete
+func (s *SpiderDb) DeleteSpiderConfig(code string) {
+	err := s.db.Update(func(tx *bolt.Tx) error {
+		bucket := tx.Bucket([]byte("myBucket"))
+		err := bucket.Delete([]byte(code))
+		return err
+	})
+	if err != nil {
+		log.Println(err.Error())
+		return
+	}
+}
+
+// 批量导入
+func (s *SpiderDb) BatchImport(filepath string) error {
+	f, err := excelize.OpenFile(filepath)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	for _, sheetName := range f.GetSheetList() {
+		// 获取工作表的所有行
+		rows, err := f.GetRows(sheetName)
+		if err != nil {
+			continue
+		}
+		//
+		for index, row := range rows {
+			if index == 0 || len(row) < 5 || row[0] == "" || row[3] == "" {
+				continue
+			}
+			sc := &be.SpiderConfig{
+				Code:    row[0],
+				Site:    row[1],
+				Channel: row[2],
+				Url:     row[3],
+				Author:  row[4],
+			}
+			value, _ := json.Marshal(sc)
+			err = s.db.Update(func(tx *bolt.Tx) error {
+				bucket := tx.Bucket([]byte("myBucket"))
+				err := bucket.Put([]byte(sc.Code), value)
+				return err
+			})
+			if err != nil {
+				continue
+			}
+		}
+
+	}
+
+	return nil
+}
+
+// ExportEpubFile 导出epub文件
+func (db *SpiderDb) ExportEpubFile(bookname, filepath string,
+	currentResult *list.List) error {
+	output := epub.NewEpub(bookname)
+	output.SetTitle(bookname)
+	output.SetDescription(bookname)
+	output.SetAuthor("unknow")
+	i := 1
+	for el := currentResult.Front(); el != nil; el = el.Next() {
+		art, _ := el.Value.(*be.ResultItem)
+		body := "<h2>" + art.Title + "</h2><p>" + strings.Join(strings.Split(art.Content, "\n"), "</p><p>") + "</p>"
+		output.AddSection(body, art.Title, fmt.Sprintf("%06d.xhtml", i+1), "")
+		i += 1
+	}
+	fo, err := os.Create(filepath)
+	if err != nil {
+		db.enf.Dispatch("debug_event", err.Error())
+	}
+	output.WriteTo(fo)
+	fo.Close()
+	return nil
+}
+
+// ExportExcelFile数据集导出到excel文件中
+func (db *SpiderDb) ExportExcelFile(filepath, site, channel string,
+	currentResult *list.List) error {
+	f := excelize.NewFile()
+	defer f.Close()
+	f.SetCellStr("Sheet1", "A1", "站点")
+	f.SetCellStr("Sheet1", "B1", "栏目")
+	//写入数据
+	f.SetCellStr("Sheet1", "C1", "标题")
+	f.SetCellStr("Sheet1", "D1", "链接")
+	f.SetCellStr("Sheet1", "E1", "发布单位")
+	f.SetCellStr("Sheet1", "F1", "发布时间")
+	f.SetCellStr("Sheet1", "G1", "正文")
+	f.SetCellStr("Sheet1", "H1", "附件")
+	i := 0
+	for el := currentResult.Front(); el != nil; el = el.Next() {
+		r, _ := el.Value.(*be.ResultItem)
+		//写入站点信息
+		iStr := strconv.Itoa(i + 2)
+		f.SetCellStr("Sheet1", "A"+iStr, site)
+		f.SetCellStr("Sheet1", "B"+iStr, channel)
+		//写入数据
+		f.SetCellStr("Sheet1", "C"+iStr, r.Title)
+		f.SetCellStr("Sheet1", "D"+iStr, r.Href)
+		f.SetCellStr("Sheet1", "E"+iStr, r.PublishUnit)
+		f.SetCellStr("Sheet1", "F"+iStr, r.ListPubTime)
+		f.SetCellStr("Sheet1", "G"+iStr, r.Content)
+		f.SetCellStr("Sheet1", "H"+iStr, "")
+		if len(r.AttachLinks) > 0 {
+			bs, err := json.Marshal(r.AttachLinks)
+			if err == nil {
+				f.SetCellStr("Sheet1", "H"+iStr, string(bs))
+			}
+		}
+		i += 1
+	}
+	err := f.SaveAs(filepath)
+	if err != nil {
+		return err
+	}
+	return nil
+}

+ 7 - 0
backend/face.go

@@ -0,0 +1,7 @@
+package backend
+
+type (
+	EventNotifyFace interface {
+		Dispatch(event string, data interface{}) error
+	}
+)

+ 193 - 0
backend/types.go

@@ -0,0 +1,193 @@
+package backend
+
+import (
+	"container/list"
+)
+
+const (
+	JOB_RUNNING_EVENT_PROGRESS = 1
+	JOB_RUNNING_EVENT_DEBUG    = 0
+)
+
+type (
+	//爬虫配置信息
+	SpiderConfig struct {
+		Site               string `json:"site"`
+		Channel            string `json:"channel"`
+		Author             string `json:"author"`
+		Url                string `json:"url"`
+		Code               string `json:"code"`
+		ListBodyCss        string `json:"listBodyCss"`
+		ListItemCss        string `json:"listItemCss"`
+		ListLinkCss        string `json:"listLinkCss"`
+		ListPubtimeCss     string `json:"listPublishTimeCss"`
+		ListNextPageCss    string `json:"listNextPageCss"`
+		TitleCss           string `json:"titleCss"`
+		PublishUnitCss     string `json:"publishUnitCss"`
+		PublishTimeCss     string `json:"publishTimeCss"`
+		ContentCss         string `json:"contentCss"`
+		AttachCss          string `json:"attachCss"`
+		ListJSCode         string `json:"listJs"`
+		ContentJSCode      string `json:"contentJs"`
+		AttachJSCode       string `json:"attachJs"` //无效
+		ListTrunPageJSCode string `json:"listTrunPageJs"`
+	}
+	//附件链接
+	AttachLink struct {
+		Title    string `json:"title"`
+		Href     string `json:"href"`
+		FileName string `json:"fileName"`
+		FileType string `json:"fileType"`
+		FileSize string `json:"fileSize"`
+		FilePath string `json:"filePath"`
+	}
+	//爬取结果信息
+	ResultItem struct {
+		No      int    `json:"no"` //编号
+		Site    string `json:"site"`
+		Channel string `json:"channel"`
+
+		Href        string        `json:"href"`
+		ListTitle   string        `json:"listTitle"`
+		ListPubTime string        `json:"listPubishTime"`
+		Title       string        `json:"title"`
+		PublishUnit string        `json:"publishUnit"`
+		PublishTime string        `json:"publishTime"`
+		Content     string        `json:"content"`
+		ContentHtml string        `json:"contentHtml"`
+		AttachLinks []*AttachLink `json:"attachLinks"` //存放附件的标题,链接
+		AttachJson  string        `json:"attachJson"`  //存放附件的OSS元信息
+	}
+	//爬取结果信息清单
+	ResultItems []*ResultItem
+
+	SpiderConfiges []*SpiderConfig
+	//
+	JobItem struct {
+		//code/site与爬虫配置一致
+		SpiderCode string `json:"code"`
+		SpiderSite string `json:"site"`
+		//以下是运行参数(不配置时,默认使用通用配置)
+		Channel              string `json:"channel"`
+		Url                  string `json:"url"`
+		ProxyServe           string `json:"proxyServe"`
+		MaxPages             int    `json:"maxPages"`
+		Threads              int    `json:"threads"`
+		ListDelay            int64  `json:"listDelay"`
+		TrunPageDelay        int64  `json:"trunPageDelay"`
+		ContentDelay         int64  `json:"contentDelay"`
+		NeedDownloadAttaches bool   `json:"needDownloadAttaches"`
+	}
+	//作业定义
+	Job struct {
+		Code  string     `json:"code"`
+		Name  string     `json:"name"`
+		Items []*JobItem `json:"items"`
+		//通用参数
+		ProxyServe           string `json:"proxyServe"`
+		MaxPages             int    `json:"maxPages"`
+		Threads              int    `json:"threads"`
+		ListDelay            int64  `json:"listDelay"`
+		TrunPageDelay        int64  `json:"trunPageDelay"`
+		ContentDelay         int64  `json:"contentDelay"`
+		State                int    `json:"state"`
+		StateType            string `json:"stateType"`
+		Progress             int    `json:"progress"`
+		NeedDownloadAttaches bool   `json:"needDownloadAttaches"`
+	}
+	Jobs []*Job
+	//推送事件
+	JobRunningEvent struct {
+		Progress int    `json:"progress"`
+		Msg      string `json:"msg"`
+		Act      int    `json:"act"`
+		Code     string `json:"code"`
+	}
+	//Job运行时状态,中间结果
+	JobRunningState struct {
+		Code        string
+		State       int
+		Progress    int
+		ResultCache *list.List //缓存
+		ExitCh      chan bool
+	}
+)
+
+func (sc SpiderConfiges) Len() int {
+	return len(sc)
+}
+func (sc SpiderConfiges) Swap(i, j int) {
+	sc[i], sc[j] = sc[j], sc[i]
+}
+func (sc SpiderConfiges) Less(i, j int) bool {
+	return sc[i].Code > sc[j].Code
+}
+
+func (js Jobs) Len() int {
+	return len(js)
+}
+func (js Jobs) Swap(i, j int) {
+	js[i], js[j] = js[j], js[i]
+}
+func (js Jobs) Less(i, j int) bool {
+	return js[i].Code > js[j].Code
+}
+
+// CopyAttribute
+func CopyAttribute(dst *string, value1, value2 string) {
+	if value1 == "EMPTY" { //特殊符号,强制修改
+		*dst = ""
+	} else if value1 != "" {
+		*dst = value1
+	} else if value2 != "" {
+		*dst = value2
+	}
+}
+
+// MergeSpiderConfig 合并
+func MergeSpiderConfig(src1, src2 *SpiderConfig) *SpiderConfig {
+	nsc := new(SpiderConfig)
+	CopyAttribute(&nsc.Code, src2.Code, src1.Code)
+	CopyAttribute(&nsc.Site, src2.Site, src1.Site)
+	CopyAttribute(&nsc.Channel, src2.Channel, src1.Channel)
+	CopyAttribute(&nsc.Url, src2.Url, src1.Url)
+	CopyAttribute(&nsc.Author, src2.Author, src1.Author)
+	CopyAttribute(&nsc.ListBodyCss, src2.ListBodyCss, src1.ListBodyCss)
+	CopyAttribute(&nsc.ListItemCss, src2.ListItemCss, src1.ListItemCss)
+	CopyAttribute(&nsc.ListLinkCss, src2.ListLinkCss, src1.ListLinkCss)
+	CopyAttribute(&nsc.ListPubtimeCss, src2.ListPubtimeCss, src1.ListPubtimeCss)
+	CopyAttribute(&nsc.ListNextPageCss, src2.ListNextPageCss, src1.ListNextPageCss)
+	CopyAttribute(&nsc.TitleCss, src2.TitleCss, src1.TitleCss)
+	CopyAttribute(&nsc.PublishTimeCss, src2.PublishTimeCss, src1.PublishTimeCss)
+	CopyAttribute(&nsc.PublishUnitCss, src2.PublishUnitCss, src1.PublishUnitCss)
+	CopyAttribute(&nsc.ContentCss, src2.ContentCss, src1.ContentCss)
+	CopyAttribute(&nsc.AttachCss, src2.AttachCss, src1.AttachCss)
+	CopyAttribute(&nsc.ListJSCode, src2.ListJSCode, src1.ListJSCode)
+	CopyAttribute(&nsc.ContentJSCode, src2.ContentJSCode, src1.ContentJSCode)
+	CopyAttribute(&nsc.AttachJSCode, src2.AttachJSCode, src1.AttachJSCode)
+	CopyAttribute(&nsc.ListTrunPageJSCode, src2.ListTrunPageJSCode, src1.ListTrunPageJSCode)
+	return nsc
+}
+
+// CopySpiderConfig 复制
+func CopySpiderConfig(src1, src2 *SpiderConfig) {
+	src1.Code = src2.Code
+	src1.Site = src2.Site
+	src1.Author = src2.Author
+	src1.Channel = src2.Channel
+	src1.Url = src2.Url
+	src1.ListBodyCss = src2.ListBodyCss
+	src1.ListItemCss = src2.ListItemCss
+	src1.ListPubtimeCss = src2.ListPubtimeCss
+	src1.ListNextPageCss = src2.ListNextPageCss
+	src1.ListLinkCss = src2.ListLinkCss
+	src1.TitleCss = src2.TitleCss
+	src1.PublishTimeCss = src2.PublishTimeCss
+	src1.PublishUnitCss = src2.PublishUnitCss
+	src1.ContentCss = src2.ContentCss
+	src1.AttachCss = src2.AttachCss
+	src1.ListJSCode = src2.ListJSCode
+	src1.ListTrunPageJSCode = src2.ListTrunPageJSCode
+	src1.ContentJSCode = src2.ContentJSCode
+	src1.AttachJSCode = src2.AttachJSCode
+}

+ 244 - 0
backend/vm/jobs.go

@@ -0,0 +1,244 @@
+/**
+*批量作业任务调度
+ */
+package vm
+
+import (
+	"container/list"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	be "spidercreator/backend"
+	bdb "spidercreator/backend/db"
+	"strconv"
+	"time"
+
+	"github.com/xuri/excelize/v2"
+
+	"github.com/chromedp/chromedp"
+)
+
+var (
+	// 调度中心,本地运行,只加不减,
+	// 同时运行的作业没几个,这里不删除元素,不加锁
+	runningJobs = map[string]*be.JobRunningState{}
+)
+
+// 异步执行指定job,只支持单线程
+func (vm *VM) RunJob(code string) {
+	// TODO 1 加载配置
+	vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: "加载作业"})
+	job, err := bdb.LoadEntity[be.Job]("jobs", code)
+	var state *be.JobRunningState
+	if err != nil {
+		log.Println(err.Error())
+		vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: "执行作业失败:" + err.Error()})
+		return
+	}
+
+	if v, ok := runningJobs[code]; ok {
+		v.Progress = 1
+		v.State = 1
+		v.ExitCh = make(chan bool, 1)
+		v.ResultCache.Init()
+		state = v
+	} else {
+		state = &be.JobRunningState{
+			Code:        code,
+			State:       1,
+			Progress:    1,
+			ResultCache: new(list.List),
+			ExitCh:      make(chan bool, 1),
+		}
+		runningJobs[code] = state
+	}
+	vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: "加载作业完成"})
+	no := 1
+	//加载参数
+	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(true, false, "")    //列表页使用
+	_, baseCancelFn2, _, _, ctx2, incCancelFn2 := be.NewBrowser(true, false, "") //详情页使用
+	defer func() {
+		job.State = 0
+		job.Progress = 0
+		incCancelFn2()
+		baseCancelFn2()
+		incCancelFn()
+		baseCancelFn()
+		vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code, Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: fmt.Sprintf("作业执行结束,结果:%d", state.ResultCache.Len())})
+		close(state.ExitCh)
+	}()
+	log.Println("共有子爬虫数量:", len(job.Items))
+	var totalPages, downloadedPages float32 = 0, 0
+	for _, item := range job.Items {
+		totalPages += float32(item.MaxPages)
+	}
+	//TODO 2. 主体结构
+L:
+	for _, item := range job.Items {
+		//单个任务爬取
+		// TODO 加载单个爬虫采集配置
+		sf, err := bdb.LoadEntity[be.SpiderConfig]("myBucket", item.SpiderCode)
+		if err != nil {
+			log.Println("加载爬虫配置参数失败:", err.Error())
+			continue
+		}
+		log.Println(*sf)
+		log.Println(*item)
+		listRunJs, contentRunJs := sf.ListJSCode, sf.ContentJSCode
+
+		//TODO 2. 执行JS代码,获取列表页信息
+		if listRunJs == "" {
+			listRunJs = renderJavascriptCoder(loadListItemsJS, sf)
+		}
+		if contentRunJs == "" {
+			contentRunJs = renderJavascriptCoder(loadContentJS, sf)
+		}
+
+		listResult := make(be.ResultItems, 0)
+		//TODO 3.打开列表,获取条目清单
+		chromedp.Run(ctx, chromedp.Tasks{
+			chromedp.Navigate(item.Url),
+			chromedp.WaitReady("document.body", chromedp.ByJSPath),
+			chromedp.Sleep(time.Duration(item.ListDelay) * time.Millisecond),
+		})
+		vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+			Act: be.JOB_RUNNING_EVENT_DEBUG, Msg: "打开列表页完成"})
+		//TODO 4.支持打开多页
+		for j := 0; j < item.MaxPages; j++ {
+			downloadedPages += 1
+			vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+				Act:      be.JOB_RUNNING_EVENT_PROGRESS,
+				Progress: int(downloadedPages / totalPages * 100)})
+			err := chromedp.Run(ctx, chromedp.Tasks{
+				chromedp.Evaluate(listRunJs, &listResult),
+			})
+			if err != nil {
+				log.Println("执行JS代码失败", err.Error())
+				vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+					Act: be.JOB_RUNNING_EVENT_DEBUG,
+					Msg: "执行列表页JS代码失败"})
+				continue
+			}
+			log.Println("加载当前列表页,长度:", len(listResult))
+			vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+				Act: be.JOB_RUNNING_EVENT_DEBUG,
+				Msg: "获取列表完成"})
+			//TODO 5.操作详情页
+			for _, r := range listResult {
+				log.Println("详情页", r.Title, r.Href)
+				select {
+				case <-state.ExitCh:
+					break L
+				default:
+					vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+						Act: be.JOB_RUNNING_EVENT_DEBUG,
+						Msg: fmt.Sprintf("打开详情页%d %s", no, r.ListTitle)})
+					//打开详情页
+					err = chromedp.Run(ctx2, chromedp.Tasks{
+						chromedp.Navigate(r.Href),
+						chromedp.WaitReady("document.body", chromedp.ByJSPath),
+						chromedp.Sleep(time.Duration(item.ContentDelay) * time.Millisecond),
+					})
+					if err != nil {
+						continue
+					}
+					//获取详情页内容
+					err = chromedp.Run(ctx2, chromedp.Tasks{
+						chromedp.Evaluate(contentRunJs, r),
+					})
+					if err != nil {
+						continue
+					}
+
+					if item.NeedDownloadAttaches {
+						downloadAttaches(r, vm.attachesDir)
+					}
+					//补齐其他字段
+					r.No = no
+					no += 1
+					r.Site = item.SpiderSite
+					r.Channel = item.Channel
+					//结果放入缓存
+					state.ResultCache.PushBack(r)
+					vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+						Act: be.JOB_RUNNING_EVENT_DEBUG,
+						Msg: fmt.Sprintf("下载详情页%d %s", no, r.Href)})
+				}
+			}
+			//TODO 6.翻页
+			vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+				Act: be.JOB_RUNNING_EVENT_DEBUG,
+				Msg: "准备翻页"})
+			if err = trunPage(sf, item.TrunPageDelay, ctx); err != nil {
+				vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+					Act: be.JOB_RUNNING_EVENT_DEBUG,
+					Msg: "执行翻页代码失败"})
+				break
+			}
+		}
+	}
+
+	vm.dnf.Dispatch("run_job_event", &be.JobRunningEvent{Code: job.Code,
+		Act:      be.JOB_RUNNING_EVENT_PROGRESS,
+		Progress: 100})
+}
+
+// StopJob
+func (vm *VM) StopJob(code string) {
+	defer func() {
+		if err := recover(); err != nil {
+			log.Println(err)
+		}
+	}()
+	if v, ok := runningJobs[code]; ok {
+		v.ExitCh <- true
+		v.State = 0
+		v.Progress = 0
+	}
+}
+
+// ExportJobResult
+func (vm *VM) ExportJobResult(code, filePath string) error {
+	if job, ok := runningJobs[code]; ok {
+		f := excelize.NewFile()
+		defer f.Close()
+		f.SetCellStr("Sheet1", "A1", "站点")
+		f.SetCellStr("Sheet1", "B1", "栏目")
+		//写入数据
+		f.SetCellStr("Sheet1", "C1", "标题")
+		f.SetCellStr("Sheet1", "D1", "链接")
+		f.SetCellStr("Sheet1", "E1", "发布单位")
+		f.SetCellStr("Sheet1", "F1", "发布时间")
+		f.SetCellStr("Sheet1", "G1", "正文")
+		f.SetCellStr("Sheet1", "H1", "附件")
+		i := 0
+		for el := job.ResultCache.Front(); el != nil; el = el.Next() {
+			r, _ := el.Value.(*be.ResultItem)
+			//写入站点信息
+			iStr := strconv.Itoa(i + 2)
+			f.SetCellStr("Sheet1", "A"+iStr, r.Site)
+			f.SetCellStr("Sheet1", "B"+iStr, r.Channel)
+			//写入数据
+			f.SetCellStr("Sheet1", "C"+iStr, r.Title)
+			f.SetCellStr("Sheet1", "D"+iStr, r.Href)
+			f.SetCellStr("Sheet1", "E"+iStr, r.PublishUnit)
+			f.SetCellStr("Sheet1", "F"+iStr, r.ListPubTime)
+			f.SetCellStr("Sheet1", "G"+iStr, r.Content)
+			f.SetCellStr("Sheet1", "H"+iStr, "")
+			if len(r.AttachLinks) > 0 {
+				bs, err := json.Marshal(r.AttachLinks)
+				if err == nil {
+					f.SetCellStr("Sheet1", "H"+iStr, string(bs))
+				}
+			}
+			i += 1
+		}
+		err := f.SaveAs(filePath)
+		if err != nil {
+			return err
+		}
+		return nil
+	}
+	return errors.New("找不到正在该作业")
+}

+ 34 - 0
backend/vm/load_content.js

@@ -0,0 +1,34 @@
+//执行JS代码
+var ret = {}
+var tmp = null
+
+if ("{{.TitleCss}}" != "") {//标题
+	tmp = document.querySelector("{{.TitleCss}}")
+	if (tmp) ret["title"] = tmp.getAttribute("title") || tmp.innerText
+}
+if ("{{.PublishUnitCss}}" != "") {//采购单位
+	tmp = document.querySelector("{{.PublishUnitCss}}")
+	if (tmp) ret["publishUnit"] = tmp.getAttribute("title") || tmp.innerText
+}
+if ("{{.PublishTimeCss}}" != "") {//发布时间
+	tmp = document.querySelector("{{.PublishTimeCss}}")
+	if (tmp) ret["publishTime"] = tmp.getAttribute("title") || tmp.innerText
+}
+if ("{{.ContentCss}}" != "") {//正文内容
+	tmp = document.querySelector("{{.ContentCss}}")
+	if (tmp) {
+		ret["content"] = tmp.innerText
+		ret["contentHtml"] = tmp.innerHTML
+	}
+}
+if("{{.AttachCss}}"!=""){//附件
+	tmp = document.querySelectorAll("{{.AttachCss}} a")
+	let attach=[]
+	if(tmp){
+		tmp.forEach((v,i)=>{
+			attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
+		})
+	}
+	ret["attachLinks"]=attach
+}
+ret

+ 23 - 0
backend/vm/load_list_items.js

@@ -0,0 +1,23 @@
+var ret = []
+document.querySelectorAll("{{.ListItemCss}}").forEach((v, i) => {
+    let item = {}
+    if ("{{.ListLinkCss}}" != "") {
+        let link = v.querySelector("{{.ListLinkCss}}")
+        if (link) {
+            var href = link.href
+            if (!href.startsWith("http")) href = window.location.origin + "/" + href
+            let title = link.getAttribute("title") || link.innerText
+            item = { "listTitle": title, "href": href, "no": i }
+        } else {
+            item = { "no": i }
+        }
+    }
+    if ("{{.ListPubtimeCss}}" != "") {
+        let pubtime = v.querySelector("{{.ListPubtimeCss}}")
+        if (pubtime) {
+            item["listPubishTime"] = pubtime.innerText
+        }
+    }
+    ret.push(item)
+})
+ret

+ 194 - 0
backend/vm/single.go

@@ -0,0 +1,194 @@
+package vm
+
+import (
+	"container/list"
+	_ "embed"
+	"fmt"
+	"log"
+	be "spidercreator/backend"
+	ai "spidercreator/backend/ai"
+	"strconv"
+	"time"
+
+	"github.com/chromedp/chromedp"
+)
+
+// NewVM
+func NewVM(attachesDir string, dnf be.EventNotifyFace) *VM {
+	return &VM{
+		attachesDir, dnf,
+	}
+}
+
+// RunSpider
+func (vm *VM) RunSpider(url string, maxPages int,
+	listDealy int64, contentDelay int64,
+	headless bool, showImage bool,
+	proxyServe string, exit chan bool,
+	currentSpiderConfig *be.SpiderConfig,
+	currentResult *list.List) {
+	sc := be.MergeSpiderConfig(currentSpiderConfig, &be.SpiderConfig{Url: url})
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	log.Println("1浏览器打开", *sc)
+	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
+	defer func() {
+		cancel()
+		baseCancel()
+		log.Println("0浏览器已经销毁")
+		vm.dnf.Dispatch("debug_event", "0 浏览器已经销毁")
+		close(exit)
+	}()
+	chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Navigate(sc.Url),
+		chromedp.WaitReady("document.body", chromedp.ByJSPath),
+		chromedp.Sleep(time.Duration(listDealy) * time.Millisecond),
+	})
+	vm.dnf.Dispatch("debug_event", "2 页面已经打开")
+	log.Println("2页面打开")
+	var runJs string = sc.ListJSCode
+	listResult := make(be.ResultItems, 0)
+	//TODO 2. 执行JS代码,获取列表页信息
+	if runJs == "" {
+		runJs = renderJavascriptCoder(loadListItemsJS, sc)
+	}
+	//log.Println("execute list jscode", runJs)
+	err := chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Evaluate(runJs, &listResult),
+	})
+	if err != nil {
+		log.Println("执行JS代码失败", err.Error())
+		vm.dnf.Dispatch("debug_event", "2 执行JS代码失败")
+		return
+	}
+	vm.dnf.Dispatch("debug_event", "3 获取列表完成")
+	log.Println("3获取列表完成")
+
+	//TODO 3. 打开详情页 ,最多打开10条
+	runJs = sc.ContentJSCode
+	if runJs == "" {
+		runJs = renderJavascriptCoder(loadContentJS, sc)
+	}
+	//log.Println("execute content js", runJs)
+	for _, v := range listResult {
+		select {
+		case <-exit:
+			return
+		default:
+			vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. %d- 待 下载详情页 %s ", v.No, v.ListTitle))
+			var result string = ""
+			err = chromedp.Run(ctx, chromedp.Tasks{
+				chromedp.Navigate(v.Href),
+				chromedp.WaitReady(`document.body`, chromedp.ByJSPath),
+				chromedp.Sleep(time.Duration(contentDelay) * time.Millisecond),
+				chromedp.Evaluate(runJs, v),
+			})
+			if err != nil {
+				log.Println("执行JS代码失败", err.Error())
+			}
+			if len(v.AttachLinks) > 0 { //有附件
+				vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. 下载附件"))
+				//TODO 下载附件
+				downloadAttaches(v, vm.attachesDir)
+			}
+			//关闭当前TAB页
+			chromedp.Run(ctx, chromedp.Tasks{
+				chromedp.Evaluate(`var ret="";window.close();ret`, &result),
+			})
+			vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. %d- 下载详情页 %s 完成", v.No, v.Title))
+			currentResult.PushBack(v)
+		}
+	}
+	vm.dnf.Dispatch("debug_event", "5 采集测试完成")
+	log.Println("5采集测试完成")
+}
+
+// CountYestodayArts 统计昨日信息发布量
+func (vm *VM) CountYestodayArts(url string, listDealy int64, trunPageDelay int64,
+	headless bool, showImage bool, exit chan bool, currentSpiderConfig *be.SpiderConfig) (count int) {
+	sc := be.MergeSpiderConfig(currentSpiderConfig, &be.SpiderConfig{Url: url})
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, "")
+	log.Println("1浏览器打开")
+	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
+	defer func() {
+		cancel()
+		baseCancel()
+		log.Println("0浏览器已经销毁")
+		vm.dnf.Dispatch("debug_event", "0 浏览器已经销毁")
+		vm.dnf.Dispatch("debug_event", fmt.Sprintf("99 昨日信息发布量:%d ", count))
+		close(exit)
+	}()
+
+	//时间比较
+	now := time.Now()
+	yesterday := now.AddDate(0, 0, -1) // 获取昨天的日期
+	startOfYesterday := time.Date(yesterday.Year(), yesterday.Month(), yesterday.Day(), 0, 0, 0, 0, now.Location())
+	endOfYesterday := startOfYesterday.AddDate(0, 0, 1).Add(-time.Nanosecond)
+
+	//TODO 1.
+	chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Navigate(sc.Url),
+		chromedp.WaitReady("document.body", chromedp.ByJSPath),
+		chromedp.Sleep(time.Duration(listDealy) * time.Millisecond),
+	})
+	vm.dnf.Dispatch("debug_event", "2 页面已经打开")
+	log.Println("2页面打开")
+	//TODO 2. 执行JS代码,获取列表页信息
+	runJs := renderJavascriptCoder(loadListItemsJS, sc)
+	tmp := map[string]bool{}
+	//最多翻页1000页
+	for i := 0; i < MAX_TRUN_PAGE; i++ {
+		select {
+		case <-exit:
+			return
+		default:
+			vm.dnf.Dispatch("debug_event", "3 执行列表页JS")
+			listResult := make(be.ResultItems, 0)
+			err := chromedp.Run(ctx, chromedp.Tasks{
+				chromedp.Evaluate(runJs, &listResult),
+			})
+			if err != nil {
+				log.Println("执行JS代码失败", err.Error())
+				vm.dnf.Dispatch("debug_event", "3 执行JS代码失败")
+				return
+			}
+			//TODO 人工智能转换采集到的日期
+			callAIState := false
+			for j := 0; j < 5; j++ {
+				vm.dnf.Dispatch("debug_event", "3 执行AI提取列表发布时间"+strconv.Itoa(j+1))
+				err := ai.UpdateResultDateStr(listResult)
+				if err == nil {
+					callAIState = true
+					break
+				}
+			}
+			if !callAIState {
+				vm.dnf.Dispatch("debug_event", "3 多轮次调用AI均未得到合理结果")
+				return
+			}
+			//TODO 日期统计
+			for _, r := range listResult {
+				day, err := time.Parse("2006-01-02", r.ListPubTime)
+				if err != nil {
+					continue
+				}
+				if _, ok := tmp[r.Href]; ok { //去重
+					continue
+				}
+				if day.After(startOfYesterday) && day.Before(endOfYesterday) {
+					count += 1
+				} else if day.Before(startOfYesterday) {
+					return
+				}
+			}
+			vm.dnf.Dispatch("debug_event", fmt.Sprintf("4 当前观测昨日信息发布量:%d ", count))
+			//TODO 翻页
+			//fmt.Println("下一页CSS选择器", currentSpiderConfig.ListNextPageCss)
+			chromedp.Run(ctx, chromedp.Tasks{
+				chromedp.Click(fmt.Sprintf(`document.querySelector("%s")`, currentSpiderConfig.ListNextPageCss),
+					chromedp.ByJSPath),
+				chromedp.Sleep(time.Duration(trunPageDelay) * time.Millisecond),
+			})
+		}
+	}
+	return
+}

+ 168 - 0
backend/vm/vm.go

@@ -0,0 +1,168 @@
+package vm
+
+import (
+	"bytes"
+	"context"
+	_ "embed"
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"net/http"
+	"os"
+	be "spidercreator/backend"
+	"strings"
+	"text/template"
+	"time"
+
+	"github.com/chromedp/chromedp"
+
+	"github.com/gabriel-vasile/mimetype"
+)
+
+const (
+	MAX_TRUN_PAGE = 1000
+)
+
+type (
+	//单一任务
+	VM struct {
+		attachesDir string
+		dnf         be.EventNotifyFace
+	}
+	//执行单元
+	Worker struct {
+		vm                    *VM
+		baseCancel, incCancel context.CancelFunc
+		ctx                   context.Context
+		js                    string
+		contentDelay          int64
+	}
+)
+
+var (
+	//go:embed load_list_items.js
+	loadListItemsJS string
+	//go:embed load_content.js
+	loadContentJS string
+)
+
+// renderJavascriptCoder
+func renderJavascriptCoder(tpl string, sc *be.SpiderConfig) string {
+	t, err := template.New("").Parse(tpl)
+	if err != nil {
+		log.Println("创建JS代码模板失败", err.Error())
+		return ""
+	}
+	buf := new(bytes.Buffer)
+	err = t.Execute(buf, sc)
+	if err != nil {
+		log.Println("执行JS代码模板失败", err.Error())
+		return ""
+	}
+	return buf.String()
+}
+
+// downloadAttaches 下载附件
+func downloadAttaches(v *be.ResultItem, attachesDir string) {
+	client := &http.Client{
+		Timeout: 30 * time.Second,
+	}
+	for _, attach := range v.AttachLinks {
+		log.Println("准备下载附件,", attach.Href, attach.Title)
+		req, err := http.NewRequest("GET", attach.Href, nil)
+		if err != nil {
+			log.Println(" 下载附件 构建req 出错:", attach.Href, attach.FileName, err.Error())
+			continue
+		}
+		req.Header.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
+		resp, err := client.Do(req)
+		if err != nil {
+			log.Println(" 下载附件 发送请求 出错:", attach.Href, attach.FileName, err.Error())
+			continue
+		}
+		bs, err := ioutil.ReadAll(resp.Body)
+		if err != nil {
+			log.Println(" 下载附件 下载 出错:", attach.Href, attach.FileName, err.Error())
+			continue
+		}
+		resp.Body.Close()
+		//TODO 写入文件
+		mtype := mimetype.Detect(bs)
+		//不要HTML网页
+		if strings.Contains(strings.ToLower(mtype.String()), "html") {
+			continue
+		}
+		fileName := fmt.Sprintf("%s_%04d_%04d_%04d%s", time.Now().Format("20060102150405"), rand.Intn(9999),
+			rand.Intn(9999), rand.Intn(9999), mtype.Extension())
+		save2File := attachesDir + "/" + fileName
+		fo, err := os.Create(save2File)
+		if err != nil {
+			log.Println(" 下载附件 生成文件 出错:", attach.Href, attach.FileName, save2File, err.Error())
+			continue
+		}
+		fo.Write(bs)
+		fo.Close()
+		attach.FileName = fileName
+		attach.FilePath = save2File
+		attach.FileType = mtype.String()
+		attach.FileSize = fmt.Sprintf("%.02fMB", float32(len(bs))/1024/1024)
+	}
+	//只过滤有效的附件
+	newAttachesLinks := make([]*be.AttachLink, 0)
+	for _, a := range v.AttachLinks {
+		if a.FilePath != "" {
+			newAttachesLinks = append(newAttachesLinks, a)
+		}
+	}
+	v.AttachLinks = newAttachesLinks
+}
+
+// trunPage 翻页,需要作检查
+func trunPage(sc *be.SpiderConfig, delay int64, ctx context.Context) error {
+	if sc.ListBodyCss == "" || (sc.ListNextPageCss == "" && sc.ListTrunPageJSCode == "") {
+		return errors.New("当前爬虫配置,不具备翻页条件")
+	}
+	var runJs, result string = sc.ListTrunPageJSCode, ""
+	if runJs == "" {
+		runJs = fmt.Sprintf(`var link=document.querySelector("%s");if(link)link.click();""`, sc.ListNextPageCss)
+	}
+	log.Println("将要执行翻页的JS代码,", runJs)
+	//TODO 1. 获取当前列表当前页的内容快照,以便与翻页后的结果对比
+	var result1, result2 string
+	var checkRunJs = fmt.Sprintf(`document.querySelector("%s").outerText`, sc.ListBodyCss)
+	log.Println("检查翻页是否成功,执行的JS", checkRunJs)
+	err := chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Evaluate(checkRunJs, &result1),
+	})
+	if err != nil {
+		log.Println("翻页检查1失败,", checkRunJs)
+		return err
+	}
+	if runJs != "" {
+		//可能就没有分页
+		err = chromedp.Run(ctx, chromedp.Tasks{
+			chromedp.Evaluate(runJs, &result),
+			chromedp.Sleep(time.Duration(delay) * time.Millisecond),
+		})
+		if err != nil {
+			log.Println("翻页操作失败,", runJs)
+			return err
+		}
+	} else {
+		return errors.New("trun page error ")
+	}
+	err = chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Evaluate(checkRunJs, &result2),
+	})
+
+	if err != nil {
+		log.Println("翻页检查2失败,", checkRunJs)
+		return err
+	}
+	if result1 == "" || result2 == "" || result1 == result2 {
+		return errors.New("翻页失败,两次翻页获取到的列表区域块不符合要求")
+	}
+	return nil
+}

+ 155 - 0
backend/vm/worker.go

@@ -0,0 +1,155 @@
+package vm
+
+import (
+	"container/list"
+	"fmt"
+	"log"
+	be "spidercreator/backend"
+	"sync"
+	"time"
+
+	"github.com/chromedp/chromedp"
+)
+
+// 销毁
+func (w *Worker) Destory() {
+	if w.incCancel != nil {
+		w.incCancel()
+	}
+	if w.baseCancel != nil {
+		w.baseCancel()
+	}
+}
+
+// NewWorker
+func NewWorker(headless bool, showImage bool, proxyServe string, contentDelay int64, js string, vm *VM) *Worker {
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	return &Worker{baseCancel: baseCancel,
+		incCancel:    cancel,
+		ctx:          ctx,
+		js:           js,
+		contentDelay: contentDelay,
+		vm:           vm,
+	}
+}
+
+// 执行作业
+func (w *Worker) Run(v *be.ResultItem, ch chan *Worker, wg *sync.WaitGroup) {
+	defer func() {
+		ch <- w
+		wg.Done()
+	}()
+	w.vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. %d- 待 下载详情页 %s ", v.No, v.ListTitle))
+	var result string = ""
+	err := chromedp.Run(w.ctx, chromedp.Tasks{
+		chromedp.Navigate(v.Href),
+		chromedp.WaitReady(`document.body`, chromedp.ByJSPath),
+		chromedp.Sleep(time.Duration(w.contentDelay) * time.Millisecond),
+		chromedp.Evaluate(w.js, v),
+	})
+	if err != nil {
+		log.Println("执行JS代码失败_详情", err.Error())
+	}
+	if len(v.AttachLinks) > 0 { //有附件
+		w.vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. 下载附件"))
+		//TODO 下载附件
+		downloadAttaches(v, w.vm.attachesDir)
+	}
+	//关闭当前TAB页
+	chromedp.Run(w.ctx, chromedp.Tasks{
+		chromedp.Evaluate(`var ret="";window.close();ret`, &result),
+	})
+	w.vm.dnf.Dispatch("debug_event", fmt.Sprintf("4. %d- 下载详情页 %s 完成", v.No, v.Title))
+}
+
+// RunSpiderMulThreads
+func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64,
+	trunPageDelay int64, contentDelay int64,
+	headless bool, showImage bool, proxyServe string, threads int,
+	exit chan bool,
+	currentSpiderConfig *be.SpiderConfig, currentResult *list.List) {
+	sc := be.MergeSpiderConfig(currentSpiderConfig, &be.SpiderConfig{Url: url})
+	_, baseCancel, _, _, ctx, cancel := be.NewBrowser(headless, showImage, proxyServe)
+	log.Println("1浏览器打开")
+	vm.dnf.Dispatch("debug_event", "1 浏览器打开")
+	defer func() {
+		cancel()
+		baseCancel()
+		log.Println("0浏览器已经销毁")
+		vm.dnf.Dispatch("debug_event", "0 浏览器已经销毁")
+		close(exit)
+	}()
+	var runListJs, runContentJs string = sc.ListJSCode, sc.ContentJSCode
+	if runListJs == "" {
+		runListJs = renderJavascriptCoder(loadListItemsJS, sc)
+	}
+	if runContentJs == "" {
+		runContentJs = renderJavascriptCoder(loadContentJS, sc)
+	}
+	log.Println("获取列表JS代码", runListJs)
+	ws := make([]*Worker, threads)
+	ch := make(chan *Worker, threads)
+	wg := new(sync.WaitGroup)
+	for i := 0; i < threads; i++ {
+		w := NewWorker(headless, showImage, proxyServe, contentDelay, runContentJs, vm)
+		ws = append(ws, w)
+		ch <- w
+	}
+	//批量销毁
+	defer func() {
+		for _, w := range ws {
+			if w != nil {
+				w.Destory()
+			}
+		}
+	}()
+
+	no := 1
+	//TODO 1.翻页操作,需要在外层打开列表页
+	chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Navigate(sc.Url),
+		chromedp.WaitReady("document.body", chromedp.ByJSPath),
+		chromedp.Sleep(time.Duration(listDealy) * time.Millisecond),
+	})
+	vm.dnf.Dispatch("debug_event", "2 页面已经打开")
+	log.Println("2页面打开")
+	for i := 0; i < maxPages; i++ {
+		listResult := make(be.ResultItems, 0)
+		//TODO 2. 执行JS代码,获取列表页信息
+		err := chromedp.Run(ctx, chromedp.Tasks{
+			chromedp.Evaluate(runListJs, &listResult),
+		})
+		if err != nil {
+			log.Println("执行JS代码失败_列表", err.Error())
+			vm.dnf.Dispatch("debug_event", "2 列表-执行JS代码失败")
+			return
+		}
+		vm.dnf.Dispatch("debug_event", "3 获取列表完成")
+		log.Println("3获取列表完成")
+
+		//TODO 3. 打开详情页 ,支持多线程
+		for _, v := range listResult {
+			select {
+			case <-exit:
+				return
+			default:
+				w := <-ch
+				wg.Add(1)
+				no += 1
+				v.No = no
+				currentResult.PushBack(v)
+				go w.Run(v, ch, wg)
+			}
+		}
+		wg.Wait()
+		vm.dnf.Dispatch("debug_event", "4 当前页采集完成,准备执行翻页逻辑//"+currentSpiderConfig.ListNextPageCss)
+		if err = trunPage(currentSpiderConfig, trunPageDelay, ctx); err != nil {
+			log.Println("翻页失败", err.Error())
+			vm.dnf.Dispatch("debug_event", "6 翻页失败: "+err.Error())
+			time.Sleep(3 * time.Second)
+			break
+		}
+	}
+	vm.dnf.Dispatch("debug_event", "6 采集测试完成")
+	log.Println("6 采集测试完成")
+}

+ 18 - 0
backend/webservice/cert.pem

@@ -0,0 +1,18 @@
+-----BEGIN CERTIFICATE-----
+MIIC+TCCAeGgAwIBAgIQWG6URMK1Ue9VcGjsy8zypjANBgkqhkiG9w0BAQsFADAS
+MRAwDgYDVQQKEwdBY21lIENvMB4XDTI0MDkwNTIxNTM1MloXDTI1MDkwNTIxNTM1
+MlowEjEQMA4GA1UEChMHQWNtZSBDbzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC
+AQoCggEBAK6HcRaSQiUXu1poHgToSSyrJZoC9ry4R6m+4DRMMHjh9tV0ZD0Nikdo
+HVcgneeaIgs+3ZQJTbG7NP2IHVTX26nAIpM4TlkDEXtx+uJnNH5h0V/vtwVk0lE7
+Hv3cwOxzDbVYuIJO23EAII3Dh/BEhT1tL50xtaS6hOUDeYcVv7BRqdcpNMaNVpC3
+156N9lCzYVlEL8/W/km8M3QQa6bpOo6iyHj68VLVjsd8hQG19XctpXnz/RPfZONS
+YqM04fGddRdNtcy+TUBC/qdYxtSJwI9nPNx+K9DynyBVwQ1ppRaPOl9mPAwyxzFp
+CAkr+m3adFWNaDVsWIrQaZNWzh8DWh8CAwEAAaNLMEkwDgYDVR0PAQH/BAQDAgWg
+MBMGA1UdJQQMMAoGCCsGAQUFBwMBMAwGA1UdEwEB/wQCMAAwFAYDVR0RBA0wC4IJ
+bG9jYWxob3N0MA0GCSqGSIb3DQEBCwUAA4IBAQCLAfeh30waA5o5za1CUNJheAr0
+m/mwmVxTdWdxLdwOZTcGdNHnpdXKfwoPVt1ZaSbBKxj9+1haw2FFXjl01pczOuMK
+HSmDOqeJFCtovJVoHazXw+5o35fA5iJukENUmGhE3w3YI7gBaxwIRD/3dO17IkmN
+JJtqmYpFp6WsTYV50v083a8hEqX2FGF4b74l6MbVV/Q1XHBc60HL2UWIwop/V/3s
+KP0zqnSzC/eh7qfFFGFEZ0Xlumt2Pc7+96+0QflZOFrOQIIIskFo4jMqdblB3EV+
+4tx1shZdMnHT6fX6kmKaG6663qgRaMaMIW1DiWdsP+5mh7wqDyPiTmkbvaHh
+-----END CERTIFICATE-----

+ 28 - 0
backend/webservice/key.pem

@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCuh3EWkkIlF7ta
+aB4E6EksqyWaAva8uEepvuA0TDB44fbVdGQ9DYpHaB1XIJ3nmiILPt2UCU2xuzT9
+iB1U19upwCKTOE5ZAxF7cfriZzR+YdFf77cFZNJROx793MDscw21WLiCTttxACCN
+w4fwRIU9bS+dMbWkuoTlA3mHFb+wUanXKTTGjVaQt9eejfZQs2FZRC/P1v5JvDN0
+EGum6TqOosh4+vFS1Y7HfIUBtfV3LaV58/0T32TjUmKjNOHxnXUXTbXMvk1AQv6n
+WMbUicCPZzzcfivQ8p8gVcENaaUWjzpfZjwMMscxaQgJK/pt2nRVjWg1bFiK0GmT
+Vs4fA1ofAgMBAAECggEACguJmr74R6JCCkYL1ER6UbPYCjE5eksw9Lgjt17bO1nm
+FwsH6euplcqMRcN+0yGv6+3GWwreCei4eA8pgQSbg/2m/8ox2DWw/+Xjhrxh7RQ8
+NMVbR1gyMrKwafQWtoU4uMNOe1GGl85mEUK7xDxtXse2AdomlkCV/YhhqkC6M6+n
+OpfYe9SEIeOwJjeiXlLg8tsqWDO9jdgjS7ZwnHqHeYyMorNCErUfpkgxpDEP8Dgt
+IpPnssiENr0bIpR43YO7KNE0WqhwUWIAVLXt9lMTfDayB80mezgeCLKQPP64eWeh
+evcfpzFsma90VTkHojQBbWpN/75QRrDEYzu/zwg4IQKBgQDMnqAUDS/BCXpalj/R
+vcE+WeoUmKEK+TqAbW9d3HahVusH8/eSwrDWUk7zafV3bHNFVW6Fer47vs0BIS1D
+orUygIc6GsDg/77crU5dio4N2F/Pmg+tLJHq31qWQAEZZFVGoaIvRYD2qCabUZPx
+dVIPK3ghaAzSE05v8v6JVYfUbQKBgQDaWohgRWDYJcsPOkVqez4y5Y3gWUZkI968
+zyFwaMZ8tSMbg6m7AJaUZFtz+dePDsrX8n/iGuEpMXZSmSG3sSPwS41k5poiBBdt
+Qy9r/Irp31SlA4FXy9Yq1SpXFSXvUZMSPdxOC03tGfOPa30Z2rH0rX6l81u5KhEa
+vwMXQxnZOwKBgEa5XSMRG7xhBkVhQVXBfJWMhnfv+VnNowbYzHFozigd3sa08JFt
+canicR95NDq+5WjFipngPvhvjnQhf3+tMWvvOM5AiQI740BrNnbmeQsYCqW63khA
+635/DNR58udP4pmzLFeiclzO6ektXTFMF7zejXsed6/0tFvFZW0afwRRAoGBAIU3
+ltyld2BoLmsr8g31Aw2qX9TworGV8N7gwFYElpSfLrwqp/MfeL8wO1uWopz1OWxm
+1v7rx1OKidX690dLG9IPRkS5LHB0bpaK1vPbMCVfzBSg/tjB0/ht9VcL4AkSi9gl
+RbOX0gNGQgLOYZTUiJ3u+8Xjo6Jkt+rJfulCVxLhAoGAZ1M20dYPI0edvYvdUOyL
+xDwmdlkOO2DU4+od7hhV9vdvvshbRC88m3YgjcGx59oFie++ynwf8c+LHi4Qx2Tw
+J/NzmWoOlC4Q2FljTGrNmbxAtVzkXOfpKoL9+YETZzHJEBLYdmETLfXLQK4OaXQz
+AxverbaEA3UNOAQPte5r7pI=
+-----END PRIVATE KEY-----

+ 129 - 0
backend/webservice/webservice.go

@@ -0,0 +1,129 @@
+// 对外服务
+package webservice
+
+import (
+	"crypto/tls"
+	_ "embed"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	be "spidercreator/backend"
+	bdb "spidercreator/backend/db"
+)
+
+const (
+	LISTEN_ADDR = ":8080"
+)
+
+type (
+	SpiderConfigItem struct {
+		Key string `json:"key"`
+		Css string `json:"css"`
+	}
+	WebService struct {
+		db                  *bdb.SpiderDb
+		enf                 be.EventNotifyFace
+		currentSpiderConfig *be.SpiderConfig
+	}
+)
+
+var (
+	//go:embed cert.pem
+	certBytes []byte
+	//go:embed key.pem
+	keyBytes []byte
+)
+
+// NewWebService
+func NewWebService(db *bdb.SpiderDb, enf be.EventNotifyFace, csf *be.SpiderConfig) *WebService {
+	return &WebService{db, enf, csf}
+}
+
+func (ws *WebService) RunHttpServe() {
+	// 设置HTTP服务器
+	mux := http.NewServeMux()
+	// 解析证书
+	cert, err := tls.X509KeyPair(certBytes, keyBytes)
+	if err != nil {
+		log.Println(err.Error())
+		return
+	}
+	// 创建一个TLS配置
+	tlsConfig := &tls.Config{
+		// 可以在这里添加其他TLS配置
+		Certificates:       []tls.Certificate{cert},
+		ServerName:         "localhost",
+		InsecureSkipVerify: true,
+	}
+	server := &http.Server{
+		Addr:      LISTEN_ADDR,
+		Handler:   mux,
+		TLSConfig: tlsConfig,
+	}
+	//这里注册HTTP服务
+	mux.HandleFunc("/save", ws.SaveSpiderConfig)
+	mux.HandleFunc("/load", ws.LoadSpiderConfig)
+	//
+	log.Println("Starting HTTPS server on ", LISTEN_ADDR)
+	err = server.ListenAndServeTLS("", "")
+	if err != nil {
+		log.Println("Failed to start server:  ", err.Error())
+		return
+	}
+}
+
+// LoadCurrentSpiderConfig,json处理
+func (ws *WebService) SaveSpiderConfig(w http.ResponseWriter, r *http.Request) {
+	log.Println("保存设置")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+	w.Header().Set("Content-Type", "application/json")
+	var req = new(SpiderConfigItem)
+	err := json.NewDecoder(r.Body).Decode(req)
+	if err != nil {
+		log.Println("序列化失败")
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+	log.Println("CSS", req.Key, req.Css)
+	//TODO 业务操作
+	switch req.Key {
+	case "listItemCss":
+		ws.currentSpiderConfig.ListItemCss = req.Css
+	case "listLinkCss":
+		ws.currentSpiderConfig.ListLinkCss = req.Css
+	case "listPublishTimeCss":
+		ws.currentSpiderConfig.ListPubtimeCss = req.Css
+	case "listNextPageCss":
+		ws.currentSpiderConfig.ListNextPageCss = req.Css
+	case "listBodyCss":
+		ws.currentSpiderConfig.ListBodyCss = req.Css
+	case "titleCss":
+		ws.currentSpiderConfig.TitleCss = req.Css
+	case "publishUnitCss":
+		ws.currentSpiderConfig.PublishUnitCss = req.Css
+	case "publishTimeCss":
+		ws.currentSpiderConfig.PublishTimeCss = req.Css
+	case "contentCss":
+		ws.currentSpiderConfig.ContentCss = req.Css
+	case "attachCss":
+		ws.currentSpiderConfig.AttachCss = req.Css
+	}
+	fmt.Fprint(w, "{'code':200}")
+	ws.db.SaveOrUpdate(ws.currentSpiderConfig)
+	//TODO 通知开发工具端,CSS选择器有变动
+	ws.enf.Dispatch("spiderConfigChange", ws.currentSpiderConfig)
+}
+
+// LoadCurrentSpiderConfig,加载,返回当前配置项
+func (ws *WebService) LoadSpiderConfig(w http.ResponseWriter, r *http.Request) {
+	log.Println("加载当前配置项")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+	w.Header().Set("Content-Type", "application/json")
+	err := json.NewEncoder(w).Encode(ws.currentSpiderConfig)
+	if err != nil {
+		log.Println("反向序列化失败")
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+}