فهرست منبع

添加加载忽略文件

小阿七 7 ماه پیش
والد
کامیت
171c4e8fc5

+ 2 - 2
backend/browser.go

@@ -159,7 +159,7 @@ func NewBrowser(headless bool, showImage bool, proxyServe bool, baseUrl string)
 
 func GetProxyAddr() string {
 	proxyAddr := "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
-	roxyAuthor := "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
+	proxyAuthor := "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
 	//获取代理
 	req, err := http.NewRequest(http.MethodGet, proxyAddr, nil)
 	if err != nil {
@@ -167,7 +167,7 @@ func GetProxyAddr() string {
 		return ""
 	}
 	//添加请求头
-	req.Header.Add("Authorization", roxyAuthor)
+	req.Header.Add("Authorization", proxyAuthor)
 	client := http.Client{}
 	//发送请求
 	resp, err := client.Do(req)

+ 9 - 2
backend/config.go

@@ -8,8 +8,15 @@ import (
 
 // 配置
 type Config struct {
-	IsOnly4MainSite     bool `yaml:"isOnly4MainSite"` //仅仅使用重点网站数据
-	DisableLoadResource string
+	DisableLoadResource             string `yaml:"disableLoadResource"`
+	IsOnly4MainSite                 bool   `yaml:"isOnly4MainSite"`            //仅仅使用重点网站数据
+	BrowserLoadResourceTimeout      int64  `yaml:"browserLoadResourceTimeout"` //浏览器加载资源超时设定、
+	ServerCodeTimeOut               int64  `yaml:"timeout"`
+	ServerCodeAddress               string `yaml:"address"`
+	ServerCodeFreeAddressOcr        string `yaml:"freeaddressocr"`
+	ServerCodeFreeAddressArithmetic string `yaml:"freeaddressarithmetic"`
+	Username                        string `yaml:"username"`
+	Password                        string `yaml:"password"`
 }
 
 var (

+ 18 - 0
backend/config.yaml

@@ -0,0 +1,18 @@
+## socks5代理地址
+#proxy: "127.0.0.1:30000"
+## 是否忽略证书错误,在请求http协议时可能会有问题
+#ignore-certificate-errors: false
+## 禁止网站安全检测,会被反爬识别
+#disable-web-security: false
+## 禁用插件扩展
+#disable-extensions: true
+## 默认浏览器检查
+#default-browser-check: false
+browserLoadResourceTimeout: 5
+#验证码解析
+timeout: 15
+address: "http://pycaptcha.spdata.jianyu360.com/v1/images/discern?pic_type="
+freeaddressocr: "http://pycaptcha.spdata.jianyu360.com/v1/images/verify"
+freeaddressarithmetic: "http://pycaptcha.spdata.jianyu360.com/v1/images/arithmetic"
+username: "jianyu001"
+password: "123qwe!A"

+ 317 - 10
backend/script/script.go

@@ -1,25 +1,30 @@
 package script
 
 import (
+	"bytes"
 	"context"
+	"crypto/tls"
+	"encoding/json"
 	"errors"
 	"fmt"
+	"github.com/chromedp/cdproto/browser"
+	"github.com/chromedp/cdproto/network"
+	"github.com/chromedp/cdproto/page"
+	"github.com/chromedp/chromedp"
+	"github.com/imroc/req/v3"
+	"github.com/yuin/gopher-lua"
 	"github.com/yuin/gopher-lua/parse"
+	"io/ioutil"
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"net/http"
 	"net/url"
 	"os"
 	"path/filepath"
 	"spider_creator/backend"
+	be "spider_creator/backend"
 	"strconv"
 	"strings"
 	"time"
-
-	"github.com/chromedp/cdproto/browser"
-	"github.com/chromedp/cdproto/network"
-	"github.com/chromedp/cdproto/page"
-	"github.com/chromedp/chromedp"
-	"github.com/yuin/gopher-lua"
-	be "spider_creator/backend"
 )
 
 const (
@@ -423,6 +428,267 @@ func (b *GLBrowser) DownloadFile(tabTitle, tabUrl string, timeout int64, selecto
 		act)
 }
 
+func (b *GLBrowser) AnalyzeCodeByPath(path, stype, head, cookie string, proxy bool) (code string, rh http.Header, rc []*http.Cookie) {
+	//先用免费,为识别再用收费
+	ok := false
+	code, rh, rc, _, ok = getCodeByFree(path, stype, head, cookie, proxy) //自己的服务
+	qu.Debug("Get Code By Free Result:", path, ok, code)
+	if qu.IntAll(stype) > 0 && !ok {
+		code, rh, rc = getCodeByPay(path, stype, head, cookie, proxy) //超级鹰收费
+	}
+	return
+}
+
+func getCodeByFree(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie, getCodeResp *req.Response, ok bool) {
+	defer qu.Catch()
+	client := req.C().
+		SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
+		SetTLSClientConfig(&tls.Config{
+			Renegotiation:      tls.RenegotiateOnceAsClient,
+			InsecureSkipVerify: true,
+		}) //忽略证书验证
+	headers := map[string]string{}
+	if head != "" {
+		json.Unmarshal([]byte(head), &headers)
+	}
+	cookies := []*http.Cookie{}
+	if cookie != "" {
+		json.Unmarshal([]byte(cookie), &cookies)
+	}
+	for times := 1; times <= 6; times++ { //重试三次
+		if times > 2 || proxy { //重试第4次开始,使用代理ip
+			if stype == "-1" {
+				return
+			}
+			proxyIp := be.GetProxyAddr() //获取代理地址
+			qu.Debug("proxy:", path, proxyIp)
+			client.SetProxyURL(proxyIp) //设置代理IP
+		}
+		request := client.R()
+		if len(headers) > 0 {
+			request.SetHeaders(headers)
+		}
+		if len(cookies) > 0 {
+			request.SetCookies(cookies...)
+		}
+		//下载验证码图片
+		var err error
+		var resultByte []byte
+		//address := be.Cfg.ServerCodeFreeAddressOcr
+		if stype == "-1" { //传base64的图片
+			resultByte = []byte(path)
+		} else {
+			if stype == "6001" { //计算类验证码解析接口地址
+				//address = be.Cfg.ServerCodeFreeAddressArithmetic
+			}
+			getCodeResp, err = request.Get(path) //通过请求图片地址返回的byte
+			resultByte = getCodeResp.Bytes()
+		}
+		if err != nil {
+			qu.Debug("Get Code By Path Error: ", path, err)
+			continue
+		}
+		code, err = getCode(resultByte, stype, true)
+		if err == nil && code != "" {
+			if getCodeResp != nil {
+				respheader = getCodeResp.Header
+				respcookie = getCodeResp.Cookies()
+			}
+			ok = true
+			return
+		}
+		//解析验证码
+		//codeResp, err := client.R().
+		//	SetHeader("accept", "application/json").
+		//	SetFileReader("file", "1", bytes.NewReader(resultByte)).
+		//	Post(address)
+		//if err != nil {
+		//	qu.Debug("analysis code by path err: ", path, err)
+		//	continue
+		//}
+		//yzmResult := map[string]interface{}{}
+		//json.Unmarshal(codeResp.Bytes(), &yzmResult)
+		//qu.Debug(path, yzmResult)
+		//if err != nil || yzmResult == nil {
+		//	continue
+		//}
+		//result := yzmResult["r"].(map[string]interface{})
+		//yzm := fmt.Sprint(result["code"])
+		//if yzm != "" {
+		//	if stype == "6001" || len(yzm) >= 4 {
+		//		code = yzm //长度小于4的视为识别错误
+		//		if getCodeResp != nil {
+		//			respheader = getCodeResp.Header
+		//			respcookie = getCodeResp.Cookies()
+		//		}
+		//		ok = true
+		//		return
+		//	}
+		//}
+	}
+	return
+}
+
+func getCodeByPay(path, stype, head, cookie string, proxy bool) (code string, respheader http.Header, respcookie []*http.Cookie) {
+	defer qu.Catch()
+	client := req.C().
+		SetTimeout(time.Duration(be.Cfg.ServerCodeTimeOut) * time.Second).
+		SetTLSClientConfig(&tls.Config{
+			Renegotiation:      tls.RenegotiateOnceAsClient,
+			InsecureSkipVerify: true,
+		}) //忽略证书验证
+	headers := map[string]string{}
+	if head != "" {
+		json.Unmarshal([]byte(head), &headers)
+	}
+	cookies := []*http.Cookie{}
+	if cookie != "" {
+		json.Unmarshal([]byte(cookie), &cookies)
+	}
+	for times := 1; times <= 2; times++ { //重试三次
+		//atomic.AddInt64(&PyTimes, 1)
+		if times > 1 || proxy { //重试第2次开始,使用代理ip
+			proxyIp := be.GetProxyAddr() //获取代理地址
+			qu.Debug("proxy:", path, proxyIp)
+			client.SetProxyURL(proxyIp) //设置代理IP
+		}
+		request := client.R()
+		if len(headers) > 0 {
+			request.SetHeaders(headers)
+		}
+		if len(cookies) > 0 {
+			request.SetCookies(cookies...)
+		}
+		//下载验证码图片
+		getCodeResp, err := request.Get(path)
+		//log.Println("respHeader---", getCodeResp.Header)
+		//log.Println("respCookie---", getCodeResp.Cookies())
+		if err != nil {
+			qu.Debug("Get Code By Path Error: ", path, err)
+			continue
+		}
+		code, err = getCode(getCodeResp.Bytes(), stype, false)
+		if err == nil && code != "" {
+			respheader = getCodeResp.Header
+			respcookie = getCodeResp.Cookies()
+			return
+		}
+		//解析验证码
+		//data := map[string]string{
+		//	"grant_type":     "",
+		//	"username":       "jianyu001",
+		//	"password":       "123qwe!A",
+		//	"scope":          "",
+		//	"client_id":      "",
+		//	"client_secret ": "",
+		//}
+		//codeResp, err := client.R().
+		//	SetHeader("accept", "application/json").
+		//	SetFileReader("file", "1", bytes.NewReader(getCodeResp.Bytes())).
+		//	SetFormData(data).
+		//	Post(be.Cfg.ServerCodeAddress + stype)
+		//if err != nil {
+		//	qu.Debug("analysis code by path err: ", path, err)
+		//	continue
+		//}
+		//codeResult := map[string]interface{}{}
+		//json.Unmarshal(codeResp.Bytes(), &codeResult)
+		//qu.Debug("codeResult:", codeResult)
+		//qu.Debug("codeResult:", result)
+		//if err != nil || result == nil {
+		//	continue
+		//}
+		//if yzm, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && yzm != "" && len(yzm) >= 4 {
+		//	code = yzm
+		//	respheader = getCodeResp.Header
+		//	respcookie = getCodeResp.Cookies()
+		//	return
+		//}
+	}
+	return
+}
+func getCode(b []byte, stype string, free bool) (code string, err error) {
+	qu.Debug("验证码类型:", stype, ",是否免费:", free)
+	//解析验证码
+	request := req.C().R().
+		SetHeader("accept", "application/json").
+		SetFileReader("file", "1", bytes.NewReader(b))
+	address := be.Cfg.ServerCodeFreeAddressOcr
+	if !free {
+		data := map[string]string{
+			"grant_type":     "",
+			"username":       be.Cfg.Username,
+			"password":       be.Cfg.Password,
+			"scope":          "",
+			"client_id":      "",
+			"client_secret ": "",
+		}
+		request.SetFormData(data)
+		address = be.Cfg.ServerCodeAddress + stype
+	} else if stype == "6001" { //计算类验证码解析接口地址
+		address = be.Cfg.ServerCodeFreeAddressArithmetic
+	}
+	qu.Debug("address:", address)
+	var resp *req.Response
+	resp, err = request.Post(address)
+	if err != nil {
+		qu.Debug("analysis code by path err: ", err)
+		return
+	}
+	var result map[string]interface{}
+	err = json.Unmarshal(resp.Bytes(), &result)
+	qu.Debug("验证码解析结果:", free, result)
+	if err == nil && result != nil {
+		if free {
+			r, _ := result["r"].(map[string]interface{})
+			codeTmp := qu.ObjToString(r["code"])
+			if len(codeTmp) >= 4 || stype == "6001" && codeTmp != "" {
+				return codeTmp, nil
+			}
+		} else {
+			if codeTmp, ok := result["r"].(map[string]interface{})["pic_str"].(string); ok && codeTmp != "" {
+				if stype == "6001" || len(codeTmp) >= 4 {
+					return codeTmp, nil
+				}
+			}
+		}
+	}
+	return
+}
+
+// AnalyzeCodeScreenShot 截屏解析验证码
+func (b *GLBrowser) AnalyzeCodeScreenShot(tabTitle, tabUrl, selector string, selectorType int, timeout int64, stype string) (code string, err error) {
+	ctx, err := b.findTabContext(tabTitle, tabUrl, timeout)
+	if err != nil {
+		return
+	}
+	var act chromedp.QueryAction
+	var bt []byte
+	switch selectorType {
+	case selector_type_id:
+		act = chromedp.Screenshot(selector, &bt, chromedp.ByID)
+	case selector_type_query:
+		act = chromedp.Screenshot(selector, &bt, chromedp.ByQuery)
+	//case selector_type_search:
+	//case selector_type_jspath:
+	default:
+		//option = chromedp.ByQueryAll
+		chromedp.Screenshot(selector, &bt, chromedp.ByQueryAll)
+	}
+	err = chromedp.Run(ctx,
+		act,
+	)
+	//保存
+	if err = ioutil.WriteFile("code.png", bt, 0755); err != nil {
+		qu.Debug(err)
+	}
+	code, err = getCode(bt, stype, true) //免费
+	if err != nil || code == "" {
+		code, err = getCode(bt, stype, false) //收费
+	}
+	return
+}
+
 // BindLuaState
 func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 	//执行暂停
@@ -591,6 +857,35 @@ func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 		}
 		return 1
 	}))
+	//s.SetGlobal("browser_analyzecode_bypath", s.NewFunction(func(S *lua.LState) int {
+	//	proxy := S.ToBool(-5)
+	//	url := S.ToString(-4)
+	//	stype := S.ToString(-3)
+	//	head := S.ToTable(-2)
+	//	cookie := S.ToString(-1)
+	//	headMap := TableToMap(head)
+	//	//qu.Debug("cookie----------", cookie)
+	//	//qu.Debug("headMap----------", headMap)
+	//	headJsonStr := ""
+	//	headByte, err := json.Marshal(headMap)
+	//	if err == nil {
+	//		headJsonStr = string(headByte)
+	//	}
+	//	code, respHead, respCookie := b.AnalyzeCodeByPath(url, stype, headJsonStr, cookie, proxy)
+	//	rhead, _ := json.Marshal(respHead)
+	//	respHeadMap := map[string]interface{}{}
+	//	json.Unmarshal(rhead, &respHeadMap)
+	//	hTable := MapToTable(respHeadMap)
+	//
+	//	rcookie, _ := json.Marshal(respCookie)
+	//	respCookieMap := []map[string]interface{}{}
+	//	json.Unmarshal(rcookie, &respCookieMap)
+	//	cTable := MapToTable(map[string]interface{}{"cookie": respCookieMap})
+	//	S.Push(lua.LString(code))
+	//	S.Push(hTable)
+	//	S.Push(cTable.RawGetString("cookie"))
+	//	return 3
+	//}))
 	//发布时间格式化
 	s.SetGlobal("browser_publishtime", s.NewFunction(func(l *lua.LState) int {
 		text := l.ToString(-1)
@@ -598,17 +893,29 @@ func (b *GLBrowser) BindLuaState(s *lua.LState, recordId string) {
 		l.Push(lua.LString(publishtime))
 		return 1
 	}))
+	//截屏功能
+	s.SetGlobal("browser_analyzecode_screenshot", s.NewFunction(func(l *lua.LState) int {
+		tabTitle := l.ToString(-6)
+		tabUrl := l.ToString(-5)
+		stype := l.ToString(-4)
+		timeout := l.ToInt64(-3)
+		selectorType := l.ToInt(-2)
+		selector := l.ToString(-1)
+		code, _ := b.AnalyzeCodeScreenShot(tabTitle, tabUrl, selector, selectorType, timeout, stype)
+		l.Push(lua.LString(code))
+		return 1
+	}))
 	//保存数据
 	s.SetGlobal("browser_savedata", s.NewFunction(func(l *lua.LState) int {
 		//fmt.Println("---browser_savedata---")
-		page := l.ToString(-2)
+		pageType := l.ToString(-2)
 		data := l.ToTable(-1)
 		result := TableToMap(data)
-		if page == "list" {
+		if pageType == "list" {
 			result["recordid"] = recordId
 		}
 		DataCache <- result
-		return 1
+		return 0
 	}))
 	//获取数据
 	s.SetGlobal("browser_getdata", s.NewFunction(func(l *lua.LState) int {

+ 41 - 18
frontend/src/components/spider/CodeEditor.vue

@@ -1,4 +1,5 @@
 <template>
+<<<<<<< HEAD
     <el-dialog title="编辑代码" :model-value="props.show" @update:model-value="updateModelValue" :close-on-click-modal="false" width="70%">
         <el-button-group>
             <el-tooltip v-for="item,index in TemplateJsCode.InitListPageJsCodes" :key="index" class="box-item" effect="dark" :content="item.tooltip"
@@ -15,6 +16,23 @@
             <el-button type="primary" @click="handleSave(false)">保 存</el-button>
         </div>
     </el-dialog>
+=======
+  <el-dialog title="编辑代码" :model-value="props.show" @update:model-value="updateModelValue" :close-on-click-modal="false" width="70%">
+    <el-button-group>
+      <el-tooltip v-for="item,index in TemplateJsCode.InitListPageJsCodes" :key="index" class="box-item" effect="dark" :content="item.tooltip" placement="top-start">
+        <el-button size="small" type="primary" @click='useInitPageJsCode(index)'>{{item.name}}</el-button>
+      </el-tooltip>
+    </el-button-group>
+    <div style="line-height: 6px;height: 6px;"></div>
+    <div class="textarea-container">
+      <el-input v-model="dialogInfo.text" class="textarea" autofocus :autosize="{ minRows: 12, maxRows: 13 }" type="textarea" placeholder="Please input"></el-input>
+    </div>
+    <div slot="footer" class="dialog-footer" style="text-align:right">
+      <el-button @click="handleSave(false)">取 消</el-button>
+      <el-button type="primary" @click="handleSave(false)">保 存</el-button>
+    </div>
+  </el-dialog>
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 </template>
 <script setup>
 import { reactive } from 'vue';
@@ -22,47 +40,52 @@ import { TemplateJsCode } from './jscodetpl.js'
 
 const emit = defineEmits(['update:show', 'save'])
 const props = defineProps({
-    show: {
-        type: Boolean,
-        default: false
-    }
+  show: {
+    type: Boolean,
+    default: false
+  }
 })
 
 //使用页面初始化模板
 const useInitPageJsCode=(index)=>{
+<<<<<<< HEAD
     let code =  TemplateJsCode.InitListPageJsCodes[index].code
     dialogInfo.text = code
+=======
+  let code =  TemplateJsCode.InitListPageJsCodes[index].code
+  dialogInfo.text = code
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 }
 
 const dialogInfo = reactive({
-    key: '',
-    text: '',
+  key: '',
+  text: '',
 })
 
 const setPageData = ({ text, key }) => {
-    dialogInfo.text = text
-    dialogInfo.key = key
+  dialogInfo.text = text
+  dialogInfo.key = key
 }
 const handleSave = (f) => {
-    emit('update:show', f)
-    emit('save', {
-        text: dialogInfo.text,
-        key: dialogInfo.key,
-    })
+  emit('update:show', f)
+  emit('save', {
+    text: dialogInfo.text,
+    key: dialogInfo.key,
+  })
 }
 const updateModelValue = (e) => {
-    emit('update:show', e)
+  emit('update:show', e)
 }
 
 defineExpose({
-    setPageData
+  setPageData
 })
 </script>
 
 <style lang="scss" scoped>
 .textarea-container {
-    .textarea {
-        height: 300px;
-    }
+  .textarea {
+    height: 300px;
+  }
 }
 </style>

+ 17 - 2
frontend/src/components/spider/EditSpider.vue

@@ -247,7 +247,7 @@
 </template>
 
 <script setup>
-import { ref, reactive, defineEmits, computed, watchEffect } from 'vue';
+import { ref, reactive, defineEmits, defineProps, computed, watchEffect } from 'vue';
 import { ElMessage, ElMessageBox } from 'element-plus'
 import { TemplateJsCode } from './jscodetpl.js'
 import { Link } from '@element-plus/icons-vue'
@@ -262,6 +262,13 @@ const codeEditor =ref(null)
 const codeEditorShow = ref(false)
 
 const emit = defineEmits(['custom-event', 'data-tag', 'form-change']);
+const props = defineProps({
+  from: {
+    type: String,
+    default: ''
+  }
+})
+
 let originData = {}
 
 const dialogTitle = ref('仅编辑 CSS选择器部分')
@@ -371,10 +378,18 @@ const dialogVisible = ref(false)
 // 用户身份标识
 const userRole = computed(() => store.getters.userRole)
 const isDeveloper = computed(() => [USER_ROLE_DEVELOPER].includes(userRole.value))
+const isReviewer = computed(() => [USER_ROLE_REVIEWER].includes(userRole.value))
 
 // 待完成和未通过的爬虫可以保存,其他都不可以提交(并禁用保存按钮)
 const canSubmitStatusArr = [0, 2]
-const canSubmit = computed(() => canSubmitStatusArr.includes(formData.value.state) && isDeveloper.value)
+const userSubmitRolePass = computed(() => isDeveloper.value || isReviewer.value)
+const canSubmit = computed(() => {
+  if (props.from === 'reviewList') {
+    return userSubmitRolePass.value
+  } else {
+    return canSubmitStatusArr.includes(formData.value.state) && userSubmitRolePass.value
+  }
+})
 const savaButtonDisabled = computed(() => !canSubmit.value)
 
 //编辑器事件管理

+ 15 - 1
frontend/src/components/spider/RunSpiderDialog.vue

@@ -22,6 +22,12 @@ import { useStore } from 'vuex';
 import { USER_ROLE_ADMIN, USER_ROLE_DEVELOPER, USER_ROLE_REVIEWER } from '../../data/user'
 
 const emit = defineEmits(['save'])
+const props = defineProps({
+  from: {
+    type: String,
+    default: ''
+  }
+})
 
 const store = useStore();
 
@@ -34,10 +40,18 @@ const dialogTitle = ref('调试/运行')
 // 用户身份标识
 const userRole = computed(() => store.getters.userRole)
 const isDeveloper = computed(() => [USER_ROLE_DEVELOPER].includes(userRole.value))
+const isReviewer = computed(() => [USER_ROLE_REVIEWER].includes(userRole.value))
 
 // 待完成和未通过的爬虫可以保存,其他都不可以提交(并禁用保存按钮)
 const canSubmitStatusArr = [0, 2]
-const canSubmit = computed(() => canSubmitStatusArr.includes(formData.value.state) && isDeveloper.value)
+const userSubmitRolePass = computed(() => isDeveloper.value || isReviewer.value)
+const canSubmit = computed(() => {
+  if (props.from === 'reviewList') {
+    return userSubmitRolePass.value
+  } else {
+    return canSubmitStatusArr.includes(formData.value.state) && userSubmitRolePass.value
+  }
+})
 const savaButtonDisabled = computed(() => !canSubmit.value)
 
 const setPageData = (e) => {

+ 151 - 84
frontend/src/components/spider/jscodetpl.js

@@ -221,23 +221,23 @@ if ("{{.PublishTimeCss}}" != "") {//发布时间
 	if (tmp) ret["publishTime"] = tmp.getAttribute("title") || tmp.innerText
 }
 if ("{{.ContentCss}}" != "") {//正文内容
-  tmp = document.querySelector("{{.ContentCss}}") || document.querySelector("第二套CSS选择器,请修改")
-  if (tmp) {
-    ret["content"] = tmp.innerText
-    ret["contentHtml"] = tmp.innerHTML
-    var patchContent = false
-    //处理详情页中的大图,大图作为附件使用
-    const images = tmp.querySelectorAll("img");
-    images.forEach((img, i) => {
-      if (img.width > 300) {
-        patchContent = true
-        const a = document.createElement("a");
-        a.href = img.src;
-        a.innerText = img.src;
-        tmp.appendChild(a);
-      }
-    })
-  }
+    tmp = document.querySelector("{{.ContentCss}}") || document.querySelector("第二套CSS选择器,请修改")
+        if (tmp) {
+            ret["content"] = tmp.innerText
+            ret["contentHtml"] = tmp.innerHTML
+            var patchContent = false
+            //处理详情页中的大图,大图作为附件使用
+            const images = tmp.querySelectorAll("img");
+            images.forEach((img, i) => {
+                if (img.width > 300) {
+                    patchContent = true
+                    const a = document.createElement("a");
+                    a.href = img.src;
+                    a.innerText = img.src;
+                    tmp.appendChild(a);
+                }
+            })
+    }
 }
 if("{{.AttachCss}}"!=""){//附件
 	tmp = document.querySelectorAll("{{.AttachCss}} a")  
@@ -270,51 +270,51 @@ ret
     var tmp = null
     
     if ("{{.TitleCss}}" != "") {//标题
-    tmp = document.querySelector("{{.TitleCss}}") || document.querySelector("#activity-name")
-    if (tmp) ret["title"] = tmp.getAttribute("title") || tmp.innerText
+        tmp = document.querySelector("{{.TitleCss}}") || document.querySelector("#activity-name")
+        if (tmp) ret["title"] = tmp.getAttribute("title") || tmp.innerText
     }
     if ("{{.PublishUnitCss}}" != "") {//采购单位
-    tmp = document.querySelector("{{.PublishUnitCss}}") || document.querySelector("#js_name")
-    if (tmp) ret["publishUnit"] = tmp.getAttribute("title") || tmp.innerText
+        tmp = document.querySelector("{{.PublishUnitCss}}") || document.querySelector("#js_name")
+        if (tmp) ret["publishUnit"] = tmp.getAttribute("title") || tmp.innerText
     }
     if ("{{.PublishTimeCss}}" != "") {//发布时间
-    tmp = document.querySelector("{{.PublishTimeCss}}") || document.querySelector("#publish_time")
-    if (tmp) ret["publishTime"] = tmp.getAttribute("title") || tmp.innerText
+        tmp = document.querySelector("{{.PublishTimeCss}}") || document.querySelector("#publish_time")
+        if (tmp) ret["publishTime"] = tmp.getAttribute("title") || tmp.innerText
     }
     if ("{{.ContentCss}}" != "") {//正文内容
-    tmp = document.querySelector("{{.ContentCss}}") || document.querySelector("#js_content")
-    if (tmp) {
-    ret["content"] = tmp.innerText
-    ret["contentHtml"] = tmp.innerHTML
-    var patchContent = false
-    //处理详情页中的大图,大图作为附件使用
-    const images = tmp.querySelectorAll("img");
-    images.forEach((img, i) => {
-      if (img.width > 300) {
-        patchContent = true
-        const a = document.createElement("a");
-        a.href = img.src;
-        a.innerText = img.src;
-        tmp.appendChild(a);
-      }
-    })
-    }
+        tmp = document.querySelector("{{.ContentCss}}") || document.querySelector("#js_content")
+        if (tmp) {
+            ret["content"] = tmp.innerText
+            ret["contentHtml"] = tmp.innerHTML
+            var patchContent = false
+            //处理详情页中的大图,大图作为附件使用
+            const images = tmp.querySelectorAll("img");
+            images.forEach((img, i) => {
+              if (img.width > 300) {
+                patchContent = true
+                const a = document.createElement("a");
+                a.href = img.src;
+                a.innerText = img.src;
+                tmp.appendChild(a);
+              }
+            })
+        }
     }
     if("{{.AttachCss}}"!=""){//附件
-    tmp = document.querySelectorAll("{{.AttachCss}} a")
-    let attach=[]
-    if(tmp){
-        tmp.forEach((v,i)=>{
-            attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
-        })
-    }
-    tmp = document.querySelectorAll("#js_content a")
-    if(tmp){
-        tmp.forEach((v,i)=>{
-            attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
-        })
-    }
-    ret["attachLinks"]=attach
+        tmp = document.querySelectorAll("{{.AttachCss}} a")
+        let attach=[]
+        if(tmp){
+            tmp.forEach((v,i)=>{
+                attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
+            })
+        }
+        tmp = document.querySelectorAll("#js_content a")
+        if(tmp){
+            tmp.forEach((v,i)=>{
+                attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
+            })
+        }
+        ret["attachLinks"]=attach
     }
     //检查中文字符个数,少于20,修正正文内容
     let regex = /[\\u4e00-\\u9fa5]/g;
@@ -393,51 +393,115 @@ var ret = {}
 var tmp = null
 
 if ("{{.TitleCss}}" != "") {//标题
-tmp = document.querySelector("{{.TitleCss}}")
-if (tmp) ret["title"] = tmp.getAttribute("title") || tmp.innerText
+    tmp = document.querySelector("{{.TitleCss}}")
+    if (tmp) ret["title"] = tmp.getAttribute("title") || tmp.innerText
 }
 if ("{{.PublishUnitCss}}" != "") {//采购单位
-tmp = document.querySelector("{{.PublishUnitCss}}")
-if (tmp) ret["publishUnit"] = tmp.getAttribute("title") || tmp.innerText
+    tmp = document.querySelector("{{.PublishUnitCss}}")
+    if (tmp) ret["publishUnit"] = tmp.getAttribute("title") || tmp.innerText
 }
 if ("{{.PublishTimeCss}}" != "") {//发布时间
-tmp = document.querySelector("{{.PublishTimeCss}}")
-if (tmp) {
-    //        格式:2024/01/05                  2024-01-05                   2024年01月05日                   15:01:01 (仅时间,可以自己修改)
-    var regTpl = ["(\\\\d{4}/\\\\d{1,2}/\\\\d{1,2})","(\\\\d{4}-\\\\d{1,2}-\\\\d{1,2})","(\\\\d{4}年\\\\d{1,2}月\\\\d{1,2}日)","\\\\d{1,2}:\\\\d{1,2}:\\\\d{1,2})"]
-    //TODO 重点要修改这里的regTpl 索引号,也可以自己修改设置正则表达式
-    var reg = new RegExp(regTpl[0])
-    tmp = tmp.innerText.match(reg)
-    if(tmp && tmp.length>1)ret["publishTime"] = tmp[1]
+    tmp = document.querySelector("{{.PublishTimeCss}}")
+    if (tmp) {
+        //        格式:2024/01/05                  2024-01-05                   2024年01月05日                   15:01:01 (仅时间,可以自己修改)
+        var regTpl = ["(\\\\d{4}/\\\\d{1,2}/\\\\d{1,2})","(\\\\d{4}-\\\\d{1,2}-\\\\d{1,2})","(\\\\d{4}年\\\\d{1,2}月\\\\d{1,2}日)","\\\\d{1,2}:\\\\d{1,2}:\\\\d{1,2})"]
+        //TODO 重点要修改这里的regTpl 索引号,也可以自己修改设置正则表达式
+        var reg = new RegExp(regTpl[0])
+        tmp = tmp.innerText.match(reg)
+        if(tmp && tmp.length>1)ret["publishTime"] = tmp[1]
     }
 }
 if ("{{.ContentCss}}" != "") {//正文内容
-tmp = document.querySelector("{{.ContentCss}}")
-if (tmp) {
-ret["content"] = tmp.innerText
-ret["contentHtml"] = tmp.innerHTML
-var patchContent = false
-//处理详情页中的大图,大图作为附件使用
-const images = tmp.querySelectorAll("img");
-images.forEach((img, i) => {
-  if (img.width > 300) {
-    patchContent = true
-    const a = document.createElement("a");
-    a.href = img.src;
-    a.innerText = img.src;
-    tmp.appendChild(a);
-  }
-})
+    tmp = document.querySelector("{{.ContentCss}}")
+    if (tmp) {
+        ret["content"] = tmp.innerText
+        ret["contentHtml"] = tmp.innerHTML
+        var patchContent = false
+        //处理详情页中的大图,大图作为附件使用
+        const images = tmp.querySelectorAll("img");
+        images.forEach((img, i) => {
+          if (img.width > 300) {
+            patchContent = true
+            const a = document.createElement("a");
+            a.href = img.src;
+            a.innerText = img.src;
+            tmp.appendChild(a);
+          }
+        })
+    }
 }
+if("{{.AttachCss}}"!=""){//附件
+    tmp = document.querySelectorAll("{{.AttachCss}} a")
+    let attach=[]
+    if(tmp){
+        tmp.forEach((v,i)=>{
+            attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
+        })
+    }
+    ret["attachLinks"]=attach
+}
+//检查中文字符个数,少于20,修正正文内容
+let regex = /[\\u4e00-\\u9fa5]/g;
+let chineseCharacters = ret["content"]?ret["content"].match(regex):[];
+let chineseCharactersLen=chineseCharacters ? chineseCharacters.length : 0;
+if (chineseCharactersLen < 20 && ret["attachLinks"] && ret["attachLinks"].length>0) ret["content"] = '详情请访问原网页!'
+ret 
+`}, {
+            "name": "模版6",
+            "tooltip": "基础模板扩展,正文、附件区域,都需要找附件",
+            "code": `
+var ret = {}
+var tmp = null
+
+if ("{{.TitleCss}}" != "") {//标题
+    tmp = document.querySelector("{{.TitleCss}}")
+    if (tmp) ret["title"] = tmp.getAttribute("title") || tmp.innerText
+}
+if ("{{.PublishUnitCss}}" != "") {//采购单位
+    tmp = document.querySelector("{{.PublishUnitCss}}")
+    if (tmp) ret["publishUnit"] = tmp.getAttribute("title") || tmp.innerText
+}
+if ("{{.PublishTimeCss}}" != "") {//发布时间
+    tmp = document.querySelector("{{.PublishTimeCss}}")
+    if (tmp) ret["publishTime"] = tmp.getAttribute("title") || tmp.innerText
+}
+if ("{{.ContentCss}}" != "") {//正文内容
+    tmp = document.querySelector("{{.ContentCss}}")
+    if (tmp) {
+        ret["content"] = tmp.innerText
+        ret["contentHtml"] = tmp.innerHTML
+        var patchContent = false
+        //处理详情页中的大图,大图作为附件使用
+        const images = tmp.querySelectorAll("img");
+        images.forEach((img, i) => {
+            if (img.width > 300) {
+                patchContent = true
+                const a = document.createElement("a");
+                a.href = img.src;
+                a.innerText = img.src;
+                tmp.appendChild(a);
+            }
+        })
+    }
 }
 if("{{.AttachCss}}"!=""){//附件
-tmp = document.querySelectorAll("{{.AttachCss}} a")
-let attach=[]
+    //附件区域检查
+    tmp = document.querySelectorAll("{{.AttachCss}} a")
+    let attach=[]
+    if(tmp){
+    tmp.forEach((v,i)=>{
+        attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
+    })
+}
+//正文区域附件检查
+tmp = document.querySelectorAll("{{.ContentCss}} a")
 if(tmp){
     tmp.forEach((v,i)=>{
         attach.push({title:v.getAttribute("title")||v.innerText,href:v.href})
     })
 }
+//附件过滤
+attch = attach.filter((item)=>item.href && item.href.startsWith('http'))
 ret["attachLinks"]=attach
 }
 //检查中文字符个数,少于20,修正正文内容
@@ -446,6 +510,7 @@ let chineseCharacters = ret["content"]?ret["content"].match(regex):[];
 let chineseCharactersLen=chineseCharacters ? chineseCharacters.length : 0;
 if (chineseCharactersLen < 20 && ret["attachLinks"] && ret["attachLinks"].length>0) ret["content"] = '详情请访问原网页!'
 ret 
+<<<<<<< HEAD
 `}, {
             "name": "模版6",
             "tooltip": "基础模板扩展,正文、附件区域,都需要找附件",
@@ -510,6 +575,8 @@ let chineseCharacters = ret["content"]?ret["content"].match(regex):[];
 let chineseCharactersLen=chineseCharacters ? chineseCharacters.length : 0;
 if (chineseCharactersLen < 20 && ret["attachLinks"] && ret["attachLinks"].length>0) ret["content"] = '详情请访问原网页!'
 ret 
+=======
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 `,
         },
     ],

+ 19 - 3
frontend/src/views/ReviewList.vue

@@ -90,7 +90,7 @@
                         </el-tooltip>
                     </template>
                 </el-table-column>
-                <el-table-column label="功能" :width="isAdmin ? 160 : 120" align="center">
+                <el-table-column label="功能" width="160" align="center">
                     <template #default="scope">
                         <!-- 管理员:上线和退回 -->
                         <template v-if="tableActionShow.adminGroup(scope.row)">
@@ -126,6 +126,11 @@
                                 <el-icon><CircleClose /></el-icon>
                             </el-button>
                         </el-tooltip>
+                        <el-tooltip content="退回" placement="top" v-if="tableActionShow.reviewerRollback2(scope.row)">
+                          <el-button size="small" :class="{ active: scope.row._action_clicked_reject_code }" @click="tableEvents.adminRollback(scope.$index, scope.row)">
+                            <el-icon><DArrowLeft /></el-icon>
+                          </el-button>
+                        </el-tooltip>
                     </template>
                 </el-table-column>
             </el-table>
@@ -138,8 +143,13 @@
             </div>
         </el-main>
     </el-card>
-    <EditSpider ref="editSpiderDialog" @custom-event="dialogEvents.editSpiderConfigSaveEvent" @data-tag="editDialogMarkClick($event)" />
-    <RunSpiderDialog ref="runSpiderDialog" />
+    <EditSpider
+        ref="editSpiderDialog"
+        from="reviewList"
+        @custom-event="dialogEvents.editSpiderConfigSaveEvent"
+        @data-tag="editDialogMarkClick($event)"
+    />
+    <RunSpiderDialog from="reviewList" ref="runSpiderDialog" />
     <VerifySpider ref="verifySpiderDialog" />
     <el-dialog v-model="dialog.rollbackReason" title="选择退回原因" width="500">
       <el-form>
@@ -521,6 +531,9 @@ const tableActionShow = {
     adminRollback(row) {
         // 只有已上线,才展示退回
         return row.state === 11
+    },
+    reviewerRollback2(row) {
+        return isReviewer.value
     }
 }
 const actionButtonDisabled = {
@@ -538,6 +551,9 @@ const actionButtonDisabled = {
     reviewerRollback(row) {
         return this.reviewerSubmit(row)
     },
+    reviewerRollback2(row) {
+      return this.reviewerSubmit(row)
+    },
     adminSubmit(row) {
         // 只有审核通过才能上线,否则不展示
         const canOnline = row.state === 3 || row.state === 6

+ 22 - 1
main.go

@@ -3,17 +3,27 @@ package main
 import (
 	"container/list"
 	"embed"
+<<<<<<< HEAD
+=======
+	"github.com/wailsapp/wails/v2"
+	"github.com/wailsapp/wails/v2/pkg/options"
+	"github.com/wailsapp/wails/v2/pkg/options/assetserver"
+	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 	be "spider_creator/backend"
 	bdb "spider_creator/backend/db"
 	"spider_creator/backend/script"
 	bvm "spider_creator/backend/vm"
 	bws "spider_creator/backend/webservice"
+<<<<<<< HEAD
 
 	qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 
 	"github.com/wailsapp/wails/v2"
 	"github.com/wailsapp/wails/v2/pkg/options"
 	"github.com/wailsapp/wails/v2/pkg/options/assetserver"
+=======
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 )
 
 var (
@@ -23,16 +33,20 @@ var (
 	db                   *bdb.SpiderDb
 	exitCh               chan bool
 	baseDir, attachesDir string           = ".", ""
-	qlmDir               string           = ""
 	currentSpiderConfig  *be.SpiderConfig = new(be.SpiderConfig)
 	currentResults                        = list.New() //b.ResultItems = make(b.ResultItems, 0)
 	vm                   *bvm.VM
 	glvm                 *script.GLVm
 	ws                   *bws.WebService
+<<<<<<< HEAD
 	//重点网站和正式环境
 	isOnly4MainSite             string = "false"
 	browserDisableLoadResources        = "ws://;wss://;.tof;.woff;.ico;.mp4;.zip;.rar;.exe;"
 	serverAddress                      = "http://visualizeld.spdata.jianyu360.com/%s" //正式环境
+=======
+	isOnly4MainSite      = "false"
+	serverAddress        = "http://visualizeld.spdata.jianyu360.com/%s" //正式环境
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 	//serverAddress = "http://127.0.0.1:8091/%s" //正式环境
 )
 
@@ -40,13 +54,20 @@ var (
 // wails build -ldflags="-X 'main.isOnly4MainSite=false'" -o="剑鱼可视化爬虫开发工具_正式.exe"
 
 func init() {
+<<<<<<< HEAD
 	//be.LoadConfig("./config.yaml")
 	be.Cfg.DisableLoadResource = browserDisableLoadResources
+=======
+	be.LoadConfig("backend/config.yaml")
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 	be.Cfg.IsOnly4MainSite = isOnly4MainSite == "true"
 	if be.Cfg.IsOnly4MainSite {
 		serverAddress = "http://visualize.spdata.jianyu360.com/%s" //重点网站
 	}
+<<<<<<< HEAD
 
+=======
+>>>>>>> 3146e627030e181299474dd54a941ed33c8183bb
 	qu.Debug("重点网站:", be.Cfg.IsOnly4MainSite, serverAddress)
 }
 

+ 1 - 0
qianlima.go

@@ -59,6 +59,7 @@ func (a *App) QlmDetailDataDownload(param map[string]interface{}, record map[str
 			if detailScript != "" {
 				script.Datas = []map[string]interface{}{}
 				getData(nil, qu.ObjToString(record["recordid"]), "json", "download", &script.Datas)
+				qu.Debug("获取待采数据量:", len(script.Datas))
 				if len(script.Datas) > 0 {
 					r.Err = 1
 					go DownloadData(record, detailScript, page) //下载