Browse Source

Merge branch 'dev3.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.2

fengweiqiang 6 years ago
parent
commit
3e45969c93

+ 3 - 3
src/config.json

@@ -3,14 +3,14 @@
     "mgodb": "192.168.3.207:27082",
     "dbsize": 2,
     "dbname": "extract_kf",
-    "redis": "buyer=192.168.3.207:3379,winner=192.168.3.207:3379,agency=192.168.3.207:3379",
+    "redis": "buyer=192.168.3.207:1377,winner=192.168.3.207:1378,agency=192.168.3.207:1379",
     "elasticsearch": "http://192.168.3.11:9800",
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
     "qualityaudit": false,
-    "saveblock": true,
+    "saveblock": false,
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,

+ 60 - 42
src/jy/extract/extract.go

@@ -419,27 +419,34 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 		}
 		//候选人加入
 		if len(j.Winnerorder) > 0 {
-			winner := &ju.ExtField{
-				Field:     "winner",
-				Code:      "",
-				RuleText:  "",
-				Type:      "winnerorder",
-				MatchType: "winnerorder",
-				ExtFrom:   "",
-				Value:     j.Winnerorder[0]["entname"],
-				Score:     0,
-			}
-			if len([]rune(qu.ObjToString(j.Winnerorder[0]["entname"]))) < 4 {
-				winner.Score = -5
+			//候选人中标金额
+			if price := j.Winnerorder[0]["price"]; price != nil {
+				bidamount := &ju.ExtField{
+					Field:     "bidamount",
+					Code:      "",
+					RuleText:  "",
+					Type:      "winnerorder",
+					MatchType: "winnerorder",
+					ExtFrom:   "",
+					Value:     price,
+					Score:     0,
+				}
+				j.Result["bidamount"] = []*ju.ExtField{bidamount}
 			}
-			winners := j.Result["winner"]
-			if winners != nil {
-				winners = append(winners, winner)
-			} else {
-				winners = []*ju.ExtField{}
-				winners = append(winners, winner)
+			//候选人中标单位
+			if entname := j.Winnerorder[0]["entname"]; entname != nil {
+				winner := &ju.ExtField{
+					Field:     "winner",
+					Code:      "",
+					RuleText:  "",
+					Type:      "winnerorder",
+					MatchType: "winnerorder",
+					ExtFrom:   "",
+					Value:     entname,
+					Score:     0,
+				}
+				j.Result["winner"] = []*ju.ExtField{winner}
 			}
-			j.Result["winner"] = winners
 		}
 		//函数清理
 		for key, val := range j.Result {
@@ -535,27 +542,34 @@ func (e *ExtractTask) ExtractFile(j *ju.Job) {
 		}
 		//候选人加入
 		if len(j.Winnerorder) > 0 {
-			winner := &ju.ExtField{
-				Field:     "winner",
-				Code:      "",
-				RuleText:  "",
-				Type:      "winnerorder",
-				MatchType: "winnerorder",
-				ExtFrom:   "",
-				Value:     j.Winnerorder[0]["entname"],
-				Score:     0,
-			}
-			if len([]rune(qu.ObjToString(j.Winnerorder[0]["entname"]))) < 4 {
-				winner.Score = -5
+			//候选人中标金额
+			if price := j.Winnerorder[0]["price"]; price != nil {
+				bidamount := &ju.ExtField{
+					Field:     "bidamount",
+					Code:      "",
+					RuleText:  "",
+					Type:      "winnerorder",
+					MatchType: "winnerorder",
+					ExtFrom:   "",
+					Value:     price,
+					Score:     0,
+				}
+				j.Result["bidamount"] = []*ju.ExtField{bidamount}
 			}
-			winners := j.Result["winner"]
-			if winners != nil {
-				winners = append(winners, winner)
-			} else {
-				winners = []*ju.ExtField{}
-				winners = append(winners, winner)
+			//候选人中标单位
+			if entname := j.Winnerorder[0]["entname"]; entname != nil {
+				winner := &ju.ExtField{
+					Field:     "winner",
+					Code:      "",
+					RuleText:  "",
+					Type:      "winnerorder",
+					MatchType: "winnerorder",
+					ExtFrom:   "",
+					Value:     entname,
+					Score:     0,
+				}
+				j.Result["winner"] = []*ju.ExtField{winner}
 			}
-			j.Result["winner"] = winners
 		}
 		//函数清理
 		for key, val := range j.Result {
@@ -740,6 +754,7 @@ func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) map[string][]map
 					text := ju.TrimLRSpace(vv.Value, "")
 					if text != "" {
 						kvmap[field] = append(kvmap[field], map[string]interface{}{
+							"code":        "CL_" + vv.Key,
 							"field":       field,
 							"ruletext":    vv.Key,
 							"extfrom":     vc.ExtFrom,
@@ -920,10 +935,10 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
 					}
-					j.Result[in.Field][k].Value = text
 					if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
 						continue
 					}
+					j.Result[in.Field][k].Value = text
 					exts = append(exts, map[string]interface{}{
 						"field":     v.Field,
 						"code":      v.Code,
@@ -950,10 +965,10 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
 					}
-					j.Result[key][k].Value = text
 					if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
 						continue
 					}
+					j.Result[key][k].Value = text
 					exts = append(exts, map[string]interface{}{
 						"field":     v.Field,
 						"code":      v.Code,
@@ -1004,7 +1019,7 @@ func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *Re
 		return
 	}
 	logdata := map[string]interface{}{
-		"code":       v.Code,
+		"code":       qu.If(v.Code == "", "kv", v.Code),
 		"name":       v.Name,
 		"type":       ftype,
 		"ruletext":   v.RuleText,
@@ -1104,10 +1119,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		tmp := map[string]interface{}{} //抽取值
 		tmp["fieldall"] = auxinfo
 		for _, val := range result {
-			for _, v := range val { //取第一个非负数
+			for _, v := range val { //取第一个非负数,项目名称除外
 				if v.Score > -1 {
 					tmp[v.Field] = v.Value
 					break
+				} else if v.Field == "projectname" {
+					tmp[v.Field] = v.Value
+					break
 				}
 			}
 		}

+ 1 - 38
src/jy/extract/score.go

@@ -98,25 +98,6 @@ func init() {
 					}
 				}
 			}
-			if winnerorders, ok := tmp["winnerorder"].([]interface{}); ok {
-				for _, winnerorder := range winnerorders {
-					if p, ok := winnerorder.(map[string]interface{}); ok {
-						qu.Try(func() {
-							strReq, _ := p["regstr"].(string)
-							if strings.Contains(strReq, "\\u") {
-								strReq = strings.Replace(strReq, "\\", "\\\\", -1)
-								strReq = strings.Replace(strReq, "\\\\u", "\\u", -1)
-								strReq, _ = strconv.Unquote(`"` + strReq + `"`)
-								p["regexp"] = regexp.MustCompile(strReq)
-							} else {
-								p["regexp"] = regexp.MustCompile(strReq)
-							}
-						}, func(err interface{}) {
-							log.Println(err)
-						})
-					}
-				}
-			}
 		}
 	}
 }
@@ -271,26 +252,8 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 						}
 					}
 				}
-				//4.中标候选人打分
-				if winnerorders, ok := scoreRule["winnerorder"].([]interface{}); ok {
-					for _, winnerorder := range winnerorders {
-						if p, ok := winnerorder.(map[string]interface{}); ok {
-							qu.Try(func() {
-								if p["regexp"] != nil {
-									reg := p["regexp"].(*regexp.Regexp)
-									if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
-										tmps[tmpsindex].Score += qu.Float64All(p["score"])
-										tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "位置打分winnerorder" + fmt.Sprint(p["describe"]), Code: "winnerorder", RuleText: reg.String(), ScoreFrom: "fieldscore.json.winnerorder", Value: tmpsvalue.Value, Score: qu.Float64All(p["score"])})
-									}
-								}
-							}, func(err interface{}) {
-								log.Println(err)
-							})
-						}
-					}
-				}
 			}
-			//5.数据范围打分
+			//4.数据范围打分
 			if scoreRule["type"] == "float" {
 				min := qu.IntAll(scoreRule["min"])
 				max := qu.IntAll(scoreRule["max"])

+ 5 - 3
src/jy/pretreated/analystep.go

@@ -30,7 +30,7 @@ func AnalyStart(job *util.Job) {
 		}
 	}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock) //分块
-	if len(blockArrays) > 0 { //有分块
+	if len(blockArrays) > 0 {                                                //有分块
 		//从块里面找分包
 		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title) //从块里面找分包
 		for _, bl := range blockArrays {
@@ -46,6 +46,7 @@ func AnalyStart(job *util.Job) {
 			//新加 未分块table中未能解析到中标候选人,从正文中解析
 			if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
 				bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
+				job.Winnerorder = bl.Winnerorder
 			}
 			job.Block = append(job.Block, bl)
 		}
@@ -77,6 +78,7 @@ func AnalyStart(job *util.Job) {
 		//新加 未分块table中未能解析到中标候选人,从正文中解析
 		if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
 			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
+			job.Winnerorder = bl.Winnerorder
 		}
 		//log.Println(bl.Text)
 		job.Block = append(job.Block, bl)
@@ -112,8 +114,8 @@ func FindProjectCode(newCon string, job *util.Job) {
 		splitStr := strings.Split(newCon, " ")
 		if len(splitStr) >= 2 {
 			newCon = "项目编号:" + splitStr[len(splitStr)-1]
-		}else if len(splitStr) == 1{
-			if tmpstr := projectcodeRegAll2.FindString(splitStr[0]);tmpstr!=""{
+		} else if len(splitStr) == 1 {
+			if tmpstr := projectcodeRegAll2.FindString(splitStr[0]); tmpstr != "" {
 				newCon = "项目编号:" + tmpstr
 			}else if strings.Contains(newCon,"、"){
 				tmpstrs :=strings.Split(newCon,"、")

+ 12 - 7
src/jy/pretreated/winnerorder.go

@@ -1,6 +1,7 @@
 package pretreated
 
 import (
+	"log"
 	//"jy/clear"
 	"jy/util"
 	qutil "qfw/util"
@@ -94,6 +95,17 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int) []map[string
 			}
 		}
 	}
+	//候选人有一半以上是错误的话,那么就认为全部抽错了
+	invalidCount := 0
+	for _, v := range winners {
+		if !findCandidate.MatchString(qutil.ObjToString(v["entname"])) {
+			invalidCount++
+		}
+	}
+	log.Println(invalidCount)
+	if invalidCount > len(winners)/2 {
+		return []map[string]interface{}{}
+	}
 	return winners
 }
 
@@ -208,13 +220,6 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				winners = append(winners, object)
 				object = map[string]interface{}{}
 			}
-			//新加 从正文抽取时对v校验
-			if from == 1 || from == 3 {
-				v = findCandidate.FindString(v)
-				if v == "" {
-					continue
-				}
-			}
 			val := wo.clear("中标单位", v)
 			if val != nil {
 				count++

+ 5 - 7
src/res/fieldscore.json

@@ -179,6 +179,11 @@
                 "describe": "包含负分",
                 "regstr": "(附件|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
                 "score": -10
+            },
+			{
+                "describe": "非结尾",
+                "regstr": ".*[^集团|公司|学校|中心|家具城|门诊|\\[大中小\\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行]$",
+                "score": -5
             }
         ],
         "length": [
@@ -206,13 +211,6 @@
                     -1
                 ]
             }
-        ],
-        "winnerorder": [
-            {
-                "describe": "非结尾",
-                "regstr": ".*[^集团|公司|学校|中心|家具城|门诊|\\[大中小\\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行]$",
-                "score": -5
-            }
         ]
     },
     "agency": {

BIN
src/src