瀏覽代碼

功能调整

mxs 9 月之前
父節點
當前提交
f7d91139fe

+ 2 - 2
backend/browser.go

@@ -81,8 +81,8 @@ func NewBrowser(headless bool, showImage bool, proxyServe string) (
 		chromedp.Flag("no-sandbox", true),
 		chromedp.Flag("disable-dev-shm-usage", false),
 		chromedp.Flag("default-browser-check", false),
-		chromedp.Flag("ignore-certificate-errors", false), //忽略错误
-		chromedp.Flag("disable-web-security", true),       //禁用网络安全标志
+		chromedp.Flag("ignore-certificate-errors", true), //忽略错误 zj_zjsggzyjyzx_zbwjgs_gc
+		chromedp.Flag("disable-web-security", true),      //禁用网络安全标志
 		chromedp.Flag("mute-audio", false),
 		chromedp.Flag("accept-language", `zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6`),
 	)

+ 2 - 0
backend/types.go

@@ -3,6 +3,7 @@ package backend
 import (
 	"container/list"
 	"encoding/json"
+	"regexp"
 )
 
 const (
@@ -13,6 +14,7 @@ const (
 var (
 	DataResults   = map[string]*list.List{}                //调试运行结果
 	VerifyResults = map[string]*SpiderConfigVerifyResult{} //调试验证结果
+	RegSpace      = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
 )
 
 type (

+ 3 - 3
backend/vm/check.go

@@ -144,10 +144,10 @@ func (vm *VM) VerifySpiderConfig(sc *be.SpiderConfig) (*be.SpiderConfigVerifyRes
 
 	listRunJs, contentRunJs := sc.ListJSCode, sc.ContentJSCode
 	//TODO 2. 执行JS代码,获取列表页信息
-	if listRunJs == "" {
+	if be.RegSpace.ReplaceAllString(listRunJs, "") == "" {
 		listRunJs = renderJavascriptCoder(loadListItemsJS, sc)
 	}
-	if contentRunJs == "" {
+	if be.RegSpace.ReplaceAllString(contentRunJs, "") == "" {
 		contentRunJs = renderJavascriptCoder(loadContentJS, sc)
 	}
 	qu.Debug("列表页JS:", listRunJs)
@@ -196,7 +196,7 @@ T:
 		qu.Debug("列表采集条数结果:", verifyResult.Len())
 		//TODO 6.翻页
 		if len(listResult) > 0 && !ret.ListTrunPage {
-			if err = trunPage(sc, 2000, ctx); err != nil { //翻页失败
+			if err = trunPage(sc, sc.ListTurnDelayTime, ctx); err != nil { //翻页失败
 				break T
 			} else {
 				ret.ListTrunPage = true

+ 5 - 5
backend/vm/single.go

@@ -7,7 +7,7 @@ import (
 	"github.com/chromedp/chromedp"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	be "spider_creator/backend"
-	ai "spider_creator/backend/ai"
+	"spider_creator/backend/ai"
 	"strconv"
 	"time"
 )
@@ -49,7 +49,7 @@ func (vm *VM) RunSpiderTmp(url string, maxPages int, listDealy, trunPageDelay, c
 	var runJs string = sc.ListJSCode
 
 	//TODO 2. 执行JS代码,获取列表页信息
-	if runJs == "" {
+	if be.RegSpace.ReplaceAllString(runJs, "") == "" {
 		runJs = renderJavascriptCoder(loadListItemsJS, sc)
 	}
 	qu.Debug("列表页执行JS:", runJs)
@@ -129,7 +129,7 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 	var runJs string = sc.ListJSCode
 	listResult := make(be.ResultItems, 0)
 	//TODO 2. 执行JS代码,获取列表页信息
-	if runJs == "" {
+	if be.RegSpace.ReplaceAllString(runJs, "") == "" {
 		runJs = renderJavascriptCoder(loadListItemsJS, sc)
 	}
 	qu.Debug("execute list jscode", runJs)
@@ -142,11 +142,11 @@ func (vm *VM) RunSpider(url string, maxPages int, listDealy int64, contentDelay
 		return
 	}
 	vm.dnf.Dispatch("debug_event", "3 获取列表完成")
-	qu.Debug("3获取列表完成")
+	qu.Debug("3获取列表完成", len(listResult))
 
 	//TODO 3. 打开详情页 ,最多打开10条
 	runJs = sc.ContentJSCode
-	if runJs == "" {
+	if be.RegSpace.ReplaceAllString(runJs, "") == "" {
 		runJs = renderJavascriptCoder(loadContentJS, sc)
 	}
 	currentResult := list.New()

+ 1 - 1
backend/vm/vm.go

@@ -125,7 +125,7 @@ func trunPage(sc *be.SpiderConfig, delay int64, ctx context.Context) error {
 		return errors.New("当前爬虫配置,不具备翻页条件")
 	}
 	var runJs, result string = sc.ListTurnPageJSCode, ""
-	if runJs == "" {
+	if be.RegSpace.ReplaceAllString(runJs, "") == "" {
 		runJs = fmt.Sprintf(`var link=document.querySelector("%s");if(link)link.click();""`, sc.ListNextPageCss)
 	}
 	qu.Debug("将要执行翻页的JS代码,", runJs)

+ 2 - 2
backend/vm/worker.go

@@ -86,10 +86,10 @@ func (vm *VM) RunSpiderMulThreads(url string, maxPages int, listDealy int64, tru
 		close(exit)
 	}()
 	var runListJs, runContentJs string = sc.ListJSCode, sc.ContentJSCode
-	if runListJs == "" {
+	if be.RegSpace.ReplaceAllString(runListJs, "") == "" {
 		runListJs = renderJavascriptCoder(loadListItemsJS, sc)
 	}
-	if runContentJs == "" {
+	if be.RegSpace.ReplaceAllString(runContentJs, "") == "" {
 		runContentJs = renderJavascriptCoder(loadContentJS, sc)
 	}
 	qu.Debug("获取列表JS代码", runListJs)

+ 1 - 1
frontend/src/App.vue

@@ -30,7 +30,7 @@
           </el-col>
           <el-col :span="10" style="padding:15px;text-align: right;" v-if="showLogoutModule">
             <!-- <span>当前用户: {{ userName }} / {{ userRole }} &nbsp;</span> -->
-            <span>当前用户: {{ userName }} &nbsp;</span>
+            <span>当前用户{{ userName }} &nbsp;</span>
             <el-tooltip content="退出登录" placement="bottom">
               <el-button type="danger" @click="doLogout">
                 <el-icon><SwitchButton /></el-icon>

+ 17 - 9
frontend/src/components/spider/EditSpider.vue

@@ -204,12 +204,16 @@
 import { ref, defineEmits, computed } from 'vue';
 import { TemplateJsCode } from './jscodetpl.js'
 import { Link } from '@element-plus/icons-vue'
+import { useStore } from 'vuex'
+import { USER_ROLE_ADMIN, USER_ROLE_DEVELOPER, USER_ROLE_REVIEWER } from '../../data/user'
 import { ServerActionCurrentOpenTab } from "../../../wailsjs/go/main/App"
 const emit = defineEmits(['custom-event', 'data-tag', 'form-change']);
 let originData = {}
 
 const dialogTitle = ref('仅编辑 CSS选择器部分')
 
+const store = useStore()
+
 const defaultFormValue = {
     delayTime: 500,
     maxPages: 1,
@@ -273,20 +277,24 @@ const fastKeyDownMap = {
 }
 // 背景色map
 const cssInputBg = {
-    listItemCss: { color: "#fff9c4", label: "列表条目", formLabel: '条目区域块' },
-    listLinkCss: { color: "#bbdefb", label: "列表标题", formLabel: '条目链接' },
-    listPublishTimeCss: { color: "#c8e6c9", label: "列表发布时间", formLabel: '条目发布时间' },
-    listNextPageCss: { color: "#dcedc8", label: "列表下一页", formLabel: '翻页下一页' },
-    titleCss: { color: "#e7f3fe", label: "文章标题", formLabel: '详情页标题' },
-    publishTimeCss: { color: "#ffe0b2", label: "文章发布时间", formLabel: '详情页发布时间' },
-    publishUnitCss: { color: "#ffe1e1", label: "文章发布单位", formLabel: '详情页发布单位' },
-    contentCss: { color: "#e0f7fa", label: "文章正文", formLabel: '详情页正文' },
-    attachCss: { color: "#fff1e5", label: "文章附件", formLabel: '详情页附件' },
+  listItemCss: { color: "#fff9c4", label: "列表条目", formLabel: '条目区域块CSS' },
+  listLinkCss: { color: "#bbdefb", label: "列表标题", formLabel: '条目链接CSS' },
+  listPublishTimeCss: { color: "#c8e6c9", label: "列表发布时间", formLabel: '条目发布时间CSS' },
+  listNextPageCss: { color: "#dcedc8", label: "列表下一页", formLabel: '翻页下一页CSS' },
+  titleCss: { color: "#e7f3fe", label: "文章标题", formLabel: '详情页标题CSS' },
+  publishTimeCss: { color: "#ffe0b2", label: "文章发布时间", formLabel: '详情页发布时间CSS' },
+  publishUnitCss: { color: "#ffe1e1", label: "文章发布单位", formLabel: '详情页发布单位CSS' },
+  contentCss: { color: "#e0f7fa", label: "文章正文", formLabel: '详情页正文CSS' },
+  attachCss: { color: "#fff1e5", label: "文章附件", formLabel: '详情页附件CSS' },
 }
 
 const activeName = ref("first")
 const dialogVisible = ref(false)
 
+// 用户身份标识
+const userRole = computed(() => store.getters.userRole)
+const isDeveloper = computed(() => [USER_ROLE_DEVELOPER].includes(userRole.value))
+
 // 待完成和未通过的爬虫可以保存,其他都不可以提交(并禁用保存按钮)
 const canSubmitStatusArr = [0, 2]
 const canSubmit = computed(() => canSubmitStatusArr.includes(formData.value.state))