Quellcode durchsuchen

接口返回值结构调整

mxs vor 10 Monaten
Ursprung
Commit
f25fd23d2e
7 geänderte Dateien mit 271 neuen und 343 gelöschten Zeilen
  1. 44 0
      backend/types.go
  2. 118 0
      backend/vm/check.go
  3. 27 2
      bind4spider.go
  4. 1 1
      frontend/src/components/spider/EditSpider.vue
  5. 0 215
      frontend/wailsjs/go/models.ts
  6. 16 125
      server.go
  7. 65 0
      user.go

+ 44 - 0
backend/types.go

@@ -2,6 +2,7 @@ package backend
 
 import (
 	"container/list"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 )
 
 const (
@@ -111,6 +112,17 @@ type (
 		ResultCache *list.List //缓存
 		ExitCh      chan bool
 	}
+
+	//爬虫配置验证结果
+	SpiderConfigVerifyResult struct {
+		Title        bool `json:"title"`
+		PublishUnit  bool `json:"publishUnit"`
+		PublishTime  bool `json:"publishTime"`
+		Content      bool `json:"content"`
+		Attaches     bool `json:"attaches"`
+		ListItems    bool `json:"listItems"`
+		ListTrunPage bool `json:"listTrunPage"`
+	}
 )
 
 func (sc SpiderConfiges) Len() int {
@@ -144,6 +156,38 @@ func CopyAttribute(dst *string, value1, value2 string) {
 	}
 }
 
+// NewSpiderConfig 生成css对象
+func NewSpiderConfig(param map[string]interface{}) *SpiderConfig {
+	param_common := param["param_common"].(map[string]interface{})
+	css_list, _ := param["css_list"].(map[string]interface{})
+	css_content, _ := param["css_content"].(map[string]interface{})
+	js_list, _ := param["js_list"].(map[string]interface{})
+	js_nextpage, _ := param["js_nextpage"].(map[string]interface{})
+	js_content, _ := param["js_content"].(map[string]interface{})
+	sc := &SpiderConfig{
+		Site:               qu.ObjToString(param_common["site"]),
+		Channel:            qu.ObjToString(param_common["channel"]),
+		Author:             qu.ObjToString(param_common["modifyuser"]),
+		Url:                qu.ObjToString(param_common["href"]),
+		Code:               qu.ObjToString(param_common["code"]),
+		ListBodyCss:        qu.ObjToString(css_list["body"]),
+		ListItemCss:        qu.ObjToString(css_list["title"]),
+		ListLinkCss:        qu.ObjToString(css_list["href"]),
+		ListPubtimeCss:     qu.ObjToString(css_list["ptime"]),
+		ListNextPageCss:    qu.ObjToString(css_list["nextpage"]),
+		TitleCss:           qu.ObjToString(css_content["title"]),
+		PublishUnitCss:     qu.ObjToString(css_content["source"]),
+		PublishTimeCss:     qu.ObjToString(css_content["ptime"]),
+		ContentCss:         qu.ObjToString(css_content["content"]),
+		AttachCss:          qu.ObjToString(css_content["file"]),
+		ListJSCode:         qu.ObjToString(js_list["js"]),
+		ContentJSCode:      qu.ObjToString(js_content["js"]),
+		ListTrunPageJSCode: qu.ObjToString(js_nextpage["js"]),
+		//AttachJSCode    :     string `json:"attachJs"` //无效
+	}
+	return sc
+}
+
 // MergeSpiderConfig 合并
 func MergeSpiderConfig(src1, src2 *SpiderConfig) *SpiderConfig {
 	nsc := new(SpiderConfig)

+ 118 - 0
backend/vm/check.go

@@ -0,0 +1,118 @@
+package vm
+
+import (
+	"container/list"
+	"log"
+	be "spidercreator/backend"
+	"time"
+
+	"github.com/chromedp/chromedp"
+)
+
+// VerifySpiderConfig 验证爬虫配置,支持翻页,列表项数据只提取2条
+func (vm *VM) VerifySpiderConfig(sf *be.SpiderConfig, verifyResult *list.List) (*be.SpiderConfigVerifyResult, error) {
+	ret := &be.SpiderConfigVerifyResult{true, true, true, true, true, true, true}
+	_, baseCancelFn, _, _, ctx, incCancelFn := be.NewBrowser(false, false, "")    //列表页使用
+	_, baseCancelFn2, _, _, ctx2, incCancelFn2 := be.NewBrowser(false, false, "") //详情页使用
+	defer func() {
+		incCancelFn2()
+		baseCancelFn2()
+		incCancelFn()
+		baseCancelFn()
+	}()
+
+	listRunJs, contentRunJs := sf.ListJSCode, sf.ContentJSCode
+	//TODO 2. 执行JS代码,获取列表页信息
+	if listRunJs == "" {
+		listRunJs = renderJavascriptCoder(loadListItemsJS, sf)
+	}
+	if contentRunJs == "" {
+		contentRunJs = renderJavascriptCoder(loadContentJS, sf)
+	}
+	//TODO 3.打开列表,获取条目清单
+	chromedp.Run(ctx, chromedp.Tasks{
+		chromedp.Navigate(sf.Url),
+		chromedp.WaitReady("document.body", chromedp.ByJSPath),
+		chromedp.Sleep(1000 * time.Millisecond),
+	})
+	no := 1
+T:
+	for j := 0; j < 2; j++ { //最多检查2页
+		listResult := make(be.ResultItems, 0)
+		err := chromedp.Run(ctx, chromedp.Tasks{
+			chromedp.Evaluate(listRunJs, &listResult),
+		})
+		if err != nil {
+			log.Println("执行JS代码失败", err.Error())
+			continue
+		}
+		//TODO 5.操作详情页
+		for contentIndex, r := range listResult {
+			if contentIndex > 1 { //每页只采集2条
+				break
+			}
+			//打开详情页
+			err = chromedp.Run(ctx2, chromedp.Tasks{
+				chromedp.Navigate(r.Href),
+				chromedp.WaitReady("document.body", chromedp.ByJSPath),
+				chromedp.Sleep(1000 * time.Millisecond),
+			})
+			if err != nil {
+				continue
+			}
+			//获取详情页内容
+			err = chromedp.Run(ctx2, chromedp.Tasks{
+				chromedp.Evaluate(contentRunJs, r),
+			})
+			if err != nil {
+				continue
+			}
+			if sf.AttachCss != "" {
+				downloadAttaches(r, vm.attachesDir)
+			}
+			r.Site = sf.Site
+			r.Channel = sf.Channel
+			if r.Title == "" {
+				r.Title = r.ListTitle
+			}
+			if r.PublishTime == "" {
+				r.PublishTime = r.ListPubTime
+			}
+			r.No = no
+			no += 1
+			//结果放入缓存
+			verifyResult.PushBack(r)
+		}
+
+		//TODO 6.翻页
+		if err = trunPage(sf, 2000, ctx); err != nil {
+			ret.ListTrunPage = false
+			break T
+		}
+	}
+	//检查
+	for el := verifyResult.Front(); el != nil; el = el.Next() {
+		r, _ := el.Value.(*be.ResultItem)
+		if ret.Title {
+			ret.Title = r.Title != ""
+		}
+		if ret.PublishUnit {
+			ret.PublishUnit = r.PublishUnit != ""
+		}
+		if ret.PublishTime {
+			ret.PublishTime = r.PublishTime != ""
+		}
+		if ret.Content {
+			ret.Content = r.Content != ""
+		}
+		if ret.Attaches {
+			ret.Attaches = len(r.AttachLinks) > 0
+		}
+	}
+	if ret.ListItems {
+		ret.ListItems = verifyResult.Len() > 2
+	}
+
+	//TODO:每次验证结果存库、内存?
+	return ret, nil
+}

+ 27 - 2
bind4spider.go

@@ -22,7 +22,7 @@ func (a *App) DebugSpider(url string, maxPages int, listDealy int64, trunPageDel
 	}
 }
 
-// 停止调试
+// StopDebugSpider 停止调试
 func (a *App) StopDebugSpider() string {
 	defer func() {
 		if err := recover(); err != nil {
@@ -37,7 +37,7 @@ func (a *App) StopDebugSpider() string {
 func (a *App) ViewResultItemAll() be.ResultItems {
 	ret := make(be.ResultItems, 0)
 	index := 0
-	for el := currentResults.Front(); el != nil; el = el.Next() {
+	for el := currentResults.Back(); el != nil; el = el.Prev() {
 		if index > 50 {
 			break
 		}
@@ -128,3 +128,28 @@ func (a *App) ExportJobResult(code string, filePath string) string {
 	vm.ExportJobResult(code, filePath)
 	return "ok"
 }
+
+// VerifySpiderConfig 验证
+func (a *App) VerifySpiderConfig() *be.SpiderConfigVerifyResult {
+	currentResults.Init()
+	ret, err := vm.VerifySpiderConfig(currentSpiderConfig, currentResults)
+	if err != nil {
+		log.Println("验证脚本配置失败::", err.Error())
+	} else {
+		log.Println("验证脚本配置成功")
+	}
+	return ret
+}
+
+// VerifySpiderConfig 验证
+//func (a *App) VerifySpiderConfig(param map[string]interface{}) *be.SpiderConfigVerifyResult {
+//	sc := be.NewSpiderConfig(param)
+//	currentResults.Init()
+//	ret, err := vm.VerifySpiderConfig(sc, currentResults)
+//	if err != nil {
+//		log.Println("验证脚本配置失败::", err.Error())
+//	} else {
+//		log.Println("验证脚本配置成功")
+//	}
+//	return ret
+//}

+ 1 - 1
frontend/src/components/spider/EditSpider.vue

@@ -330,7 +330,7 @@ const handleSave = () => {
     // dialogVisible.value = false;
     const payload = getPageData()
     emit("custom-event", payload)
-    formData.value = {}
+    // formData.value = {}
 }
 
 //这里是重点: 向外部组建暴露可供调用的方法

+ 0 - 215
frontend/wailsjs/go/models.ts

@@ -1,215 +0,0 @@
-export namespace backend {
-	
-	export class JobItem {
-	    code: string;
-	    site: string;
-	    channel: string;
-	    url: string;
-	    proxyServe: string;
-	    maxPages: number;
-	    threads: number;
-	    listDelay: number;
-	    trunPageDelay: number;
-	    contentDelay: number;
-	    needDownloadAttaches: boolean;
-	
-	    static createFrom(source: any = {}) {
-	        return new JobItem(source);
-	    }
-	
-	    constructor(source: any = {}) {
-	        if ('string' === typeof source) source = JSON.parse(source);
-	        this.code = source["code"];
-	        this.site = source["site"];
-	        this.channel = source["channel"];
-	        this.url = source["url"];
-	        this.proxyServe = source["proxyServe"];
-	        this.maxPages = source["maxPages"];
-	        this.threads = source["threads"];
-	        this.listDelay = source["listDelay"];
-	        this.trunPageDelay = source["trunPageDelay"];
-	        this.contentDelay = source["contentDelay"];
-	        this.needDownloadAttaches = source["needDownloadAttaches"];
-	    }
-	}
-	export class Job {
-	    code: string;
-	    name: string;
-	    items: JobItem[];
-	    proxyServe: string;
-	    maxPages: number;
-	    threads: number;
-	    listDelay: number;
-	    trunPageDelay: number;
-	    contentDelay: number;
-	    state: number;
-	    stateType: string;
-	    progress: number;
-	    needDownloadAttaches: boolean;
-	
-	    static createFrom(source: any = {}) {
-	        return new Job(source);
-	    }
-	
-	    constructor(source: any = {}) {
-	        if ('string' === typeof source) source = JSON.parse(source);
-	        this.code = source["code"];
-	        this.name = source["name"];
-	        this.items = this.convertValues(source["items"], JobItem);
-	        this.proxyServe = source["proxyServe"];
-	        this.maxPages = source["maxPages"];
-	        this.threads = source["threads"];
-	        this.listDelay = source["listDelay"];
-	        this.trunPageDelay = source["trunPageDelay"];
-	        this.contentDelay = source["contentDelay"];
-	        this.state = source["state"];
-	        this.stateType = source["stateType"];
-	        this.progress = source["progress"];
-	        this.needDownloadAttaches = source["needDownloadAttaches"];
-	    }
-	
-		convertValues(a: any, classs: any, asMap: boolean = false): any {
-		    if (!a) {
-		        return a;
-		    }
-		    if (a.slice && a.map) {
-		        return (a as any[]).map(elem => this.convertValues(elem, classs));
-		    } else if ("object" === typeof a) {
-		        if (asMap) {
-		            for (const key of Object.keys(a)) {
-		                a[key] = new classs(a[key]);
-		            }
-		            return a;
-		        }
-		        return new classs(a);
-		    }
-		    return a;
-		}
-	}
-	export class AttachLink {
-	    title: string;
-	    href: string;
-	    fileName: string;
-	    fileType: string;
-	    fileSize: string;
-	    filePath: string;
-	
-	    static createFrom(source: any = {}) {
-	        return new AttachLink(source);
-	    }
-	
-	    constructor(source: any = {}) {
-	        if ('string' === typeof source) source = JSON.parse(source);
-	        this.title = source["title"];
-	        this.href = source["href"];
-	        this.fileName = source["fileName"];
-	        this.fileType = source["fileType"];
-	        this.fileSize = source["fileSize"];
-	        this.filePath = source["filePath"];
-	    }
-	}
-	export class ResultItem {
-	    no: number;
-	    site: string;
-	    channel: string;
-	    href: string;
-	    listTitle: string;
-	    listPubishTime: string;
-	    title: string;
-	    publishUnit: string;
-	    publishTime: string;
-	    content: string;
-	    contentHtml: string;
-	    attachLinks: AttachLink[];
-	    attachJson: string;
-	
-	    static createFrom(source: any = {}) {
-	        return new ResultItem(source);
-	    }
-	
-	    constructor(source: any = {}) {
-	        if ('string' === typeof source) source = JSON.parse(source);
-	        this.no = source["no"];
-	        this.site = source["site"];
-	        this.channel = source["channel"];
-	        this.href = source["href"];
-	        this.listTitle = source["listTitle"];
-	        this.listPubishTime = source["listPubishTime"];
-	        this.title = source["title"];
-	        this.publishUnit = source["publishUnit"];
-	        this.publishTime = source["publishTime"];
-	        this.content = source["content"];
-	        this.contentHtml = source["contentHtml"];
-	        this.attachLinks = this.convertValues(source["attachLinks"], AttachLink);
-	        this.attachJson = source["attachJson"];
-	    }
-	
-		convertValues(a: any, classs: any, asMap: boolean = false): any {
-		    if (!a) {
-		        return a;
-		    }
-		    if (a.slice && a.map) {
-		        return (a as any[]).map(elem => this.convertValues(elem, classs));
-		    } else if ("object" === typeof a) {
-		        if (asMap) {
-		            for (const key of Object.keys(a)) {
-		                a[key] = new classs(a[key]);
-		            }
-		            return a;
-		        }
-		        return new classs(a);
-		    }
-		    return a;
-		}
-	}
-	export class SpiderConfig {
-	    site: string;
-	    channel: string;
-	    author: string;
-	    url: string;
-	    code: string;
-	    listBodyCss: string;
-	    listItemCss: string;
-	    listLinkCss: string;
-	    listPublishTimeCss: string;
-	    listNextPageCss: string;
-	    titleCss: string;
-	    publishUnitCss: string;
-	    publishTimeCss: string;
-	    contentCss: string;
-	    attachCss: string;
-	    listJs: string;
-	    contentJs: string;
-	    attachJs: string;
-	    listTrunPageJs: string;
-	
-	    static createFrom(source: any = {}) {
-	        return new SpiderConfig(source);
-	    }
-	
-	    constructor(source: any = {}) {
-	        if ('string' === typeof source) source = JSON.parse(source);
-	        this.site = source["site"];
-	        this.channel = source["channel"];
-	        this.author = source["author"];
-	        this.url = source["url"];
-	        this.code = source["code"];
-	        this.listBodyCss = source["listBodyCss"];
-	        this.listItemCss = source["listItemCss"];
-	        this.listLinkCss = source["listLinkCss"];
-	        this.listPublishTimeCss = source["listPublishTimeCss"];
-	        this.listNextPageCss = source["listNextPageCss"];
-	        this.titleCss = source["titleCss"];
-	        this.publishUnitCss = source["publishUnitCss"];
-	        this.publishTimeCss = source["publishTimeCss"];
-	        this.contentCss = source["contentCss"];
-	        this.attachCss = source["attachCss"];
-	        this.listJs = source["listJs"];
-	        this.contentJs = source["contentJs"];
-	        this.attachJs = source["attachJs"];
-	        this.listTrunPageJs = source["listTrunPageJs"];
-	    }
-	}
-
-}
-

+ 16 - 125
server.go

@@ -13,21 +13,10 @@ import (
 
 const HREF = "http://127.0.0.1:8091/%s"
 
-type UserInfo struct {
-	ID       string   `json:"_id"`
-	Username string   `json:"s_name"`
-	Fullname string   `json:"s_fullname"`
-	Email    string   `json:"s_email"`
-	Auth     int      `json:"i_auth"`
-	Scope    int      `json:"i_scope"`
-	Identity int      `json:"i_identity"`
-	Ids      []string `json:"ids"`
-}
-
 type Result struct {
 	Msg  string `json:"msg"`
 	Err  int    `json:"err"`
-	Data Data   `json:"data"`
+	Data *Data  `json:"data"`
 }
 
 type Data struct {
@@ -35,84 +24,26 @@ type Data struct {
 	Total int                      `json:"total"`
 }
 
-var (
-	//User map[string]interface{} //user对象
-	User *UserInfo //user对象
-)
-
-// ServerActionUserLogin 登录
-func (a *App) ServerActionUserLogin(param map[string]interface{}) map[string]interface{} {
-	qu.Debug("param---", param)
-	User = &UserInfo{}
-	//User = map[string]interface{}{}
-	getResult(User, param, "login")
-	qu.Debug("user:", *User)
-	if User.ID == "" {
-		return map[string]interface{}{
-			"msg":  "登录失败",
-			"err":  1,
-			"data": nil,
-		}
-	}
-	return map[string]interface{}{
-		"msg":  "",
-		"err":  0,
-		"data": User,
-	}
-}
-
-// ServerActionCheckLogin 检查是否登录
-func (a *App) ServerActionCheckLogin() map[string]interface{} {
-	if User != nil && User.ID != "" {
-		return map[string]interface{}{
-			"msg":  "",
-			"err":  0,
-			"data": User,
-		}
-	}
-	return map[string]interface{}{
-		"msg":  "",
-		"err":  1,
-		"data": nil,
-	}
-}
-
-// ServerActionUserLogout 退出登录
-func (a *App) ServerActionUserLogout() map[string]interface{} {
-	User = &UserInfo{}
-	return map[string]interface{}{
-		"msg":  "退出成功",
-		"err":  0,
-		"data": nil,
-	}
-}
-
 // ServerActionCodeList 获取爬虫列表
-func (a *App) ServerActionCodeList(param map[string]interface{}) Result {
+func (a *App) ServerActionCodeList(param map[string]interface{}) *Result {
 	qu.Debug("param---", param)
 	data := &Result{}
 	if User != nil {
 		formatUser(param)
 		qu.Debug("param---", param)
-		getResult(data, param, "getcodes")
+		getResult(param, data, "getcodes")
 	} else {
 		data.Msg = "用户登录异常,请重新登录!"
-		data.Err = 1
 		qu.Debug(data.Msg)
 	}
-	return *data
-	//return map[string]interface{}{
-	//	"msg":  msg,
-	//	"err":  err,
-	//	"data": data,
-	//}
+	return data
 }
 
 // ServerActionGetModifyUsers 获取爬虫开发人员列表
-func (a *App) ServerActionGetModifyUsers() Result {
+func (a *App) ServerActionGetModifyUsers() *Result {
 	data := &Result{}
 	if User != nil && User.Auth > 1 { //禁止开发查询
-		getResult(data, nil, "getmodifyusers")
+		getResult(nil, data, "getmodifyusers")
 		if len(User.Ids) > 0 && User.Identity == 0 { //外包审核员或管理员只能查看外包开发人员信息
 			resultUsers := []map[string]interface{}{}
 			for _, user := range data.Data.List {
@@ -128,14 +59,13 @@ func (a *App) ServerActionGetModifyUsers() Result {
 			data.Data.Total = len(resultUsers)
 		}
 	} else {
-		data.Msg = "查询开发列表失败"
-		data.Err = 1
+		data.Msg = "查询开发列表失败,权限不够!"
 	}
-	return *data
+	return data
 }
 
 // ServerActionClaimCodes 爬虫认领
-func (a *App) ServerActionClaimCodes() Result {
+func (a *App) ServerActionClaimCodes() *Result {
 	data := &Result{}
 	if User.Auth == 1 {
 		param := map[string]interface{}{}
@@ -143,56 +73,17 @@ func (a *App) ServerActionClaimCodes() Result {
 		getResult(data, param, "claimcode")
 	} else {
 		data.Msg = "认领失败!"
-		data.Err = 1
 	}
-	return *data
+	return data
 }
 
 // ServerActionUpdateCode 爬虫更新
-func (a *App) ServerActionUpdateCode(param []map[string]interface{}) map[string]interface{} {
+func (a *App) ServerActionUpdateCode(param map[string]interface{}) *Result {
 	qu.Debug("param---", param)
-
-	////[
-	////	[
-	////		{
-	////			"query":{},
-	////			"set":{}
-	////		},
-	////	],
-	////	[
-	////		{
-	////			"query":{},
-	////			"set":{}
-	////		},
-	////	],
-	////]
-	//data := &Result{}
-	//var msg string
-	//var err int
-	//arr := [][]map[string]interface{}{}
-	//for _, tmp := range param {
-	//	query, _ := tmp["query"].(map[string]interface{})
-	//	set, _ := tmp["set"].(map[string]interface{})
-	//	if query == nil || set == nil {
-	//		msg = "更新条件错误!"
-	//		err = 1
-	//		break
-	//	}
-	//	update := []map[string]interface{}{
-	//		query,
-	//		set,
-	//	}
-	//	arr = append(arr, update)
-	//}
-	//
-	//getResult(data, arr, "updatecode")
-	//
-	//return map[string]interface{}{
-	//	"msg":  "",
-	//	"err":  0,
-	//	"data": data,
-	//}
-	return map[string]interface{}{}
+	data := &Result{}
+	getResult(param, data, "updatecode")
+	qu.Debug(*data)
+	return data
 }
 
 // 格式化User对象
@@ -210,7 +101,7 @@ func formatUser(tmp map[string]interface{}) {
 }
 
 // 获取接口结果
-func getResult(result, param interface{}, route string) {
+func getResult(param, result interface{}, route string) {
 	jsonData, err := json.Marshal(param)
 	if err != nil {
 		qu.Debug("Error marshaling request:", err)

+ 65 - 0
user.go

@@ -0,0 +1,65 @@
+package main
+
+import qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+
+type UserInfo struct {
+	ID       string   `json:"_id"`
+	Username string   `json:"s_name"`
+	Fullname string   `json:"s_fullname"`
+	Email    string   `json:"s_email"`
+	Auth     int      `json:"i_auth"`
+	Scope    int      `json:"i_scope"`
+	Identity int      `json:"i_identity"`
+	Ids      []string `json:"ids"`
+}
+
+var (
+	User *UserInfo //user对象
+)
+
+// ServerActionUserLogin 登录
+func (a *App) ServerActionUserLogin(param map[string]interface{}) map[string]interface{} {
+	qu.Debug("param---", param)
+	User = &UserInfo{}
+	//User = map[string]interface{}{}
+	getResult(param, User, "login")
+	qu.Debug("user:", *User)
+	if User.ID == "" {
+		return map[string]interface{}{
+			"msg":  "登录失败",
+			"err":  0,
+			"data": nil,
+		}
+	}
+	return map[string]interface{}{
+		"msg":  "",
+		"err":  1,
+		"data": User,
+	}
+}
+
+// ServerActionCheckLogin 检查是否登录
+func (a *App) ServerActionCheckLogin() map[string]interface{} {
+	if User != nil && User.ID != "" {
+		return map[string]interface{}{
+			"msg":  "",
+			"err":  1,
+			"data": User,
+		}
+	}
+	return map[string]interface{}{
+		"msg":  "",
+		"err":  0,
+		"data": nil,
+	}
+}
+
+// ServerActionUserLogout 退出登录
+func (a *App) ServerActionUserLogout() map[string]interface{} {
+	User = &UserInfo{}
+	return map[string]interface{}{
+		"msg":  "退出成功",
+		"err":  1,
+		"data": nil,
+	}
+}