ソースを参照

验证码调整

maxiaoshan 2 年 前
コミット
0c5c755387

+ 2 - 1
src/analysiscode/config.json

@@ -2,7 +2,8 @@
 	"serverport": ":8030",
 	"serveraddress": "127.0.0.1:8030",
 	"servercodeaddress": "http://123.57.163.80:2119/v1/images/discern?pic_type=",
-	"servercodefreeaddress": "http://123.57.163.80:2119/v1/images/verify",
+	"servercodefreeaddressocr": "http://123.57.163.80:2119/v1/images/verify",
+	"servercodefreeaddressarithmetic": "http://123.57.163.80:2119/v1/images/arithmetic",
 	"username":"jianyu001",
 	"password": "123qwe!A",
 	"proxyaddr": "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch",

+ 27 - 16
src/analysiscode/server.go

@@ -57,16 +57,19 @@ func (cs *CodeService) GetCodeByImgPath(ctx context.Context, req *proto.GetCodeb
 func getCode(path, stype, head, cookie string) (code string, respheader http.Header, respcookie []*http.Cookie) {
 	//先用免费,为识别再用收费
 	coderesp := &req.Response{}
-	code, respheader, respcookie, coderesp = getCodeByFree(path, stype, head, cookie) //自己的服务
-	util.Debug("Get Code By Free Result:", code)
-	if util.IntAll(stype) > 0 && len(code) < 4 { //免费的未能识别使用超级鹰
+	ok := false
+	code, respheader, respcookie, coderesp, ok = getCodeByFree(path, stype, head, cookie) //自己的服务
+	util.Debug("Get Code By Free Result:", ok, code)
+	//if util.IntAll(stype) > 0 && len(code) < 4 { //免费的未能识别使用超级鹰
+	if util.IntAll(stype) > 0 && !ok {
 		savefile(path, coderesp)                                               //保存图片
 		code, respheader, respcookie = getCodeByPay(path, stype, head, cookie) //超级鹰收费
 	}
+	//}
 	return
 }
 
-func getCodeByFree(path, stype, head, cookie string) (code string, respheader http.Header, respcookie []*http.Cookie, getCodeResp *req.Response) {
+func getCodeByFree(path, stype, head, cookie string) (code string, respheader http.Header, respcookie []*http.Cookie, getCodeResp *req.Response, ok bool) {
 	defer util.Catch()
 	client := req.C().
 		SetTimeout(spiderutil.Config.TimeOut * time.Second).
@@ -101,10 +104,14 @@ func getCodeByFree(path, stype, head, cookie string) (code string, respheader ht
 		//下载验证码图片
 		var err error
 		var resultByte []byte
+		address := spiderutil.Config.ServerCodefreeAddressOcr
 		if stype == "-1" { //传base64的图片
 			resultByte = []byte(path)
-		} else { //通过请求图片地址返回的byte
-			getCodeResp, err = request.Get(path)
+		} else {
+			if stype == "6001" { //计算类验证码解析接口地址
+				address = spiderutil.Config.ServerCodefreeAddressArithmetic
+			}
+			getCodeResp, err = request.Get(path) //通过请求图片地址返回的byte
 			resultByte = getCodeResp.Bytes()
 		}
 		if err != nil {
@@ -115,7 +122,7 @@ func getCodeByFree(path, stype, head, cookie string) (code string, respheader ht
 		codeResp, err := client.R().
 			SetHeader("accept", "application/json").
 			SetFileReader("file", "1", bytes.NewReader(resultByte)).
-			Post(spiderutil.Config.ServerCodefreeAddress)
+			Post(address)
 		if err != nil {
 			util.Debug("analysis code by path err: ", err)
 			continue
@@ -123,14 +130,18 @@ func getCodeByFree(path, stype, head, cookie string) (code string, respheader ht
 		yzmResult := map[string]interface{}{}
 		json.Unmarshal(codeResp.Bytes(), &yzmResult)
 		util.Debug(yzmResult)
-		yzm := util.ObjToString(yzmResult["r"].(map[string]interface{})["code"])
-		if yzm != "" && len(yzm) >= 4 {
-			code = yzm //长度小于4的视为识别错误
-			if getCodeResp != nil {
-				respheader = getCodeResp.Header
-				respcookie = getCodeResp.Cookies()
+		result := yzmResult["r"].(map[string]interface{})
+		yzm := fmt.Sprint(result["code"])
+		if yzm != "" {
+			if stype == "6001" || len(yzm) >= 4 {
+				code = yzm //长度小于4的视为识别错误
+				if getCodeResp != nil {
+					respheader = getCodeResp.Header
+					respcookie = getCodeResp.Cookies()
+				}
+				ok = true
+				return
 			}
-			return
 		}
 	}
 	return
@@ -152,8 +163,8 @@ func getCodeByPay(path, stype, head, cookie string) (code string, respheader htt
 	if cookie != "" {
 		json.Unmarshal([]byte(cookie), &cookies)
 	}
-	for times := 1; times <= 6; times++ { //重试三次
-		if times > 4 { //重试第4次开始,使用代理ip
+	for times := 1; times <= 2; times++ { //重试三次
+		if times > 1 { //重试第2次开始,使用代理ip
 			proxyIp := spiderutil.GetProxyAddr(spiderutil.Config.ProxyAddr, spiderutil.Config.ProxyAuthor) //获取代理地址
 			util.Debug("proxy:", proxyIp)
 			client.SetProxyURL(proxyIp) //设置代理IP

+ 7 - 6
src/spiderutil/sysconfig.go

@@ -36,7 +36,7 @@ type config struct {
 	DayNum            int                        `json:"daynum"`
 	Modal             int                        `json:"Modal"`             //1列表页三级页分开采集,0原始采完列表采三级页(7000,7700)
 	IsHistoryEvent    bool                       `json:"ishistoryevent"`    //只有7000为true
-	SiteType          []string                   `json:"sitetype"`          //网站类型
+	SiteType          map[string][]string        `json:"sitetype"`          //网站类型
 	SiteColl          string                     `json:"sitecoll"`          //网站表名
 	ThreadBaseNum     int                        `json:"threadbasenum"`     //开启线程的数据基数
 	ThreadUpperLimit  int                        `json:"threadupperlimit"`  //总线程上限
@@ -92,11 +92,12 @@ type config struct {
 	//es
 	Es map[string]interface{} `json:"es"`
 	//获取验证码相关
-	ServerAddress         string        `json:"serveraddress"`
-	ServerPort            string        `json:"serverport"`
-	ServerCodeAddress     string        `json:"servercodeaddress"`
-	ServerCodefreeAddress string        `json:"servercodefreeaddress"`
-	TimeOut               time.Duration `json:"timeout"`
+	ServerAddress                   string        `json:"serveraddress"`
+	ServerPort                      string        `json:"serverport"`
+	ServerCodeAddress               string        `json:"servercodeaddress"`
+	ServerCodefreeAddressOcr        string        `json:"servercodefreeaddressocr"`
+	ServerCodefreeAddressArithmetic string        `json:"servercodefreeaddressarithmetic"`
+	TimeOut                         time.Duration `json:"timeout"`
 	//js相关
 	JsServerAddress string `json:"jsserveraddress"`
 	//系统库

+ 1 - 1
src/spiderutil/template.go

@@ -74,7 +74,7 @@ function downloadAndParseListPage(pageno)
 				item=findMap(item,"<table><tr>"..v.."</tr></table>")
 				if item["title"]~=nil and item["title"]~="" then --title校验
 					item["title"]=com.trim(item["title"])
-					sendListNum(k,list) --推送下载量
+					sendListNum(pageno,list) --推送下载量
 					local timeType="%s"
 					item["publishtime"]=com.parseDate(item["publishtime"],timeType)--时间格式
 					item["href"]=com.gethref(spiderTargetChannelUrl,item["href"])