fengweiqiang пре 5 година
родитељ
комит
3efa765d02
2 измењених фајлова са 51 додато и 40 уклоњено
  1. 50 38
      src/jy/extract/extract.go
  2. 1 2
      src/jy/extract/extractInit.go

+ 50 - 38
src/jy/extract/extract.go

@@ -27,12 +27,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut           = ju.NewCut()                          //获取正文并清理
-	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask                //任务列表
-	ClearTaskList map[string]*ClearTask                  //清理任务列表
-	saveLimit     = 100                                  //抽取日志批量保存
-	PageSize      = 5000                                 //查询分页
+	cut     = ju.NewCut()                          //获取正文并清理
+	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask          //任务列表
+	ClearTaskList map[string]*ClearTask            //清理任务列表
+	saveLimit     = 100                            //抽取日志批量保存
+	PageSize      = 5000                           //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -319,8 +319,8 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
 		RuleBlock: e.RuleBlock,
 	}
-	if (j.Jsondata != nil||(*j.Jsondata) != nil)  && (*j.Jsondata)["jsoncontent"]!= nil{
-		delete((*j.Jsondata),"jsoncontent")
+	if (j.Jsondata != nil || (*j.Jsondata) != nil) && (*j.Jsondata)["jsoncontent"] != nil {
+		delete((*j.Jsondata), "jsoncontent")
 	}
 	if isextFile {
 		jf = &ju.Job{
@@ -339,8 +339,8 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 			RuleBlock:  e.RuleBlock,
 			IsFile:     isextFile,
 		}
-		if (jf.Jsondata != nil||(*jf.Jsondata) != nil)  && (*jf.Jsondata)["jsoncontent"]!= nil{
-			delete((*jf.Jsondata),"jsoncontent")
+		if (jf.Jsondata != nil || (*jf.Jsondata) != nil) && (*jf.Jsondata)["jsoncontent"] != nil {
+			delete((*jf.Jsondata), "jsoncontent")
 		}
 	}
 	codeSite := j.SpiderCode
@@ -974,7 +974,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
 								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content})
-								if data[len(data)-1].(bool){
+								if data[len(data)-1].(bool) {
 									j.BlockPackage[k].Budget = qu.Float64All(data[0])
 									j.BlockPackage[k].IsTrueBudget = true
 								}
@@ -984,7 +984,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
 								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content})
-								if data[len(data)-1].(bool){
+								if data[len(data)-1].(bool) {
 									j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
 									j.BlockPackage[k].IsTrueBidamount = true
 								}
@@ -1026,7 +1026,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 						cfn := e.ClearFn[in.Field]
 						lock.Unlock()
 						data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
-						if data[len(data)-1].(bool){
+						if data[len(data)-1].(bool) {
 							j.BlockPackage[k].Budget = qu.Float64All(data[0])
 							j.BlockPackage[k].IsTrueBudget = true
 						}
@@ -1037,7 +1037,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 						cfn := e.ClearFn[in.Field]
 						lock.Unlock()
 						data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
-						if data[len(data)-1].(bool){
+						if data[len(data)-1].(bool) {
 							j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
 							j.BlockPackage[k].IsTrueBidamount = true
 						}
@@ -1071,7 +1071,7 @@ func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]ma
 				kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
 					"code":        "winnerorder",
 					"field":       vc.Field,
-					"ruletext":    "中标候选人_"+ v["sortstr"].(string),
+					"ruletext":    "中标候选人_" + v["sortstr"].(string),
 					"extfrom":     v["sortstr"],
 					"sourcevalue": v["price"],
 					"value":       v["price"],
@@ -1596,7 +1596,8 @@ type FieldValue struct {
 	Value interface{}
 	Count int
 }
-var clearWinnerReg =regexp.MustCompile("名称|施工|拟定供应商名称|:|:")
+
+var clearWinnerReg = regexp.MustCompile("名称|施工|拟定供应商名称|:|:")
 
 //分析抽取结果并保存
 func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
@@ -1630,20 +1631,27 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				}
 			}
 		}
+		if len(j.PackageInfo) > 10 {
+			for k, v := range j.PackageInfo {
+				j.PackageInfo = map[string]map[string]interface{}{}
+				j.PackageInfo[k] = v
+				break
+			}
+		}
 		if len(j.PackageInfo) > 0 { //分包信息
 			tmp["package"] = j.PackageInfo
 			//包预算,中标金额合并大于抽取就覆盖
 			var tmpBidamount, tmpBudget float64
 			//s_winner逗号分隔拼接,分包中标人
-			var tmpstr,savewinner []string
+			var tmpstr, savewinner []string
 			//按包排序
 			for b, v := range j.PackageInfo {
-				if v["winner"]!= nil && v["winner"]!=""{
-					tmpstr = append(tmpstr,b)
+				if v["winner"] != nil && v["winner"] != "" {
+					tmpstr = append(tmpstr, b)
 				}
 			}
 			//包预算,中标金额合并大于抽取就覆盖
-			if len(j.PackageInfo) >1{
+			if len(j.PackageInfo) > 1 {
 				//包数大于1累加
 				for _, v := range j.PackageInfo {
 					if v["budget"] != nil {
@@ -1659,10 +1667,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
 					tmp["bidamount"] = tmpBidamount
 				}
-			}else {
+			} else {
 				//包数等于1,tmp没有值取包里的值
 				if tmp["budget"] == nil || tmp["budget"] == 0 {
-					for _,v := range j.PackageInfo {
+					for _, v := range j.PackageInfo {
 						if v["budget"] != nil {
 							tmp["budget"] = v["budget"]
 						}
@@ -1670,7 +1678,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 
 				}
 				if tmp["bidamount"] == nil || tmp["bidamount"] == 0 {
-					for _,v := range j.PackageInfo {
+					for _, v := range j.PackageInfo {
 						if v["bidamount"] != nil {
 							tmp["bidamount"] = v["bidamount"]
 						}
@@ -1679,21 +1687,21 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 			//s_winner逗号分隔拼接,分包中标人
 			sort.Strings(tmpstr)
-			for _,v := range tmpstr{
+			for _, v := range tmpstr {
 				svvvv := qu.ObjToString(j.PackageInfo[v]["winner"])
 				savevvv := clearWinnerReg.ReplaceAllString(svvvv, "")
-				if savevvv == ""{
+				if savevvv == "" {
 					continue
 				}
-				savewinner = append(savewinner,savevvv)
+				savewinner = append(savewinner, savevvv)
 			}
-			if (savewinner  == nil || len(savewinner)==0) && tmp["winner"]!=nil{
+			if (savewinner == nil || len(savewinner) == 0) && tmp["winner"] != nil {
 				tmp["s_winner"] = tmp["winner"]
-			}else if savewinner != nil{
-				tmp["s_winner"] = strings.Join(savewinner,",")
+			} else if savewinner != nil {
+				tmp["s_winner"] = strings.Join(savewinner, ",")
 			}
 
-		}else if tmp["winner"]!= nil && tmp["winner"]!=""{
+		} else if tmp["winner"] != nil && tmp["winner"] != "" {
 			//没有分包取winner
 			tmp["s_winner"] = tmp["winner"]
 		}
@@ -1853,8 +1861,12 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		} else { //测试结果
 			delete(tmp, "_id")
 			if len(j.BlockPackage) > 0 { //分包详情
-				bs, _ := json.Marshal(j.BlockPackage)
-				tmp["epackage"] = string(bs)
+				if len(j.BlockPackage) >10{
+					tmp["epackage"] = "分包异常"
+				}else {
+					bs, _ := json.Marshal(j.BlockPackage)
+					tmp["epackage"] = string(bs)
+				}
 			}
 			tmp["result"] = result
 			tmp["resultf"] = resultf
@@ -2046,7 +2058,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 {                            //reids未找到,执行规则匹配
+	if i == 0 { //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库
@@ -2138,20 +2150,20 @@ func resetWinnerorder(j *ju.Job) {
 	if maxlen > 0 {
 		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
 		if j.Winnerorder[0]["price"] != nil {
-			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"],""})
-			if tmpPrice[len(tmpPrice)-1].(bool){
-				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder",SourceValue:j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 0.5})
+			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""})
+			if tmpPrice[len(tmpPrice)-1].(bool) {
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 0.5})
 			}
 		}
 	}
 	if j.Result["winner"] == nil && len(winners) > 0 {
 		j.Result["winner"] = winners
-	} else {
+	} else if len(winners) > 0 {
 		j.Result["winner"] = append(j.Result["winner"], winners...)
 	}
 	if j.Result["bidamount"] == nil && len(bidamounts) > 0 {
 		j.Result["bidamount"] = bidamounts
-	} else {
+	} else if len(bidamounts) > 0 {
 		j.Result["bidamount"] = append(j.Result["bidamount"], bidamounts...)
 	}
 	//j.Result["winner"] = winners

+ 1 - 2
src/jy/extract/extractInit.go

@@ -1409,8 +1409,7 @@ func (e *ExtractTask) ResultSave(init bool) {
 						log.Debug(err)
 					})
 				}
-
-				time.Sleep(3 * time.Second)
+				time.Sleep(2 * time.Second)
 			}
 		}()
 	} else {