Explorar el Código

候选人取第一个

wcj hace 6 años
padre
commit
bd3d2065d5

+ 3 - 3
src/config.json

@@ -3,14 +3,14 @@
     "mgodb": "192.168.3.207:27082",
     "dbsize": 2,
     "dbname": "extract_kf",
-    "redis": "buyer=192.168.3.207:3379,winner=192.168.3.207:3379,agency=192.168.3.207:3379",
+    "redis": "buyer=192.168.3.207:1377,winner=192.168.3.207:1378,agency=192.168.3.207:1379",
     "elasticsearch": "http://192.168.3.11:9800",
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
     "qualityaudit": false,
-    "saveblock": true,
+    "saveblock": false,
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,

+ 8 - 4
src/jy/extract/extract.go

@@ -754,6 +754,7 @@ func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) map[string][]map
 					text := ju.TrimLRSpace(vv.Value, "")
 					if text != "" {
 						kvmap[field] = append(kvmap[field], map[string]interface{}{
+							"code":        "CL_" + vv.Key,
 							"field":       field,
 							"ruletext":    vv.Key,
 							"extfrom":     vc.ExtFrom,
@@ -934,10 +935,10 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
 					}
-					j.Result[in.Field][k].Value = text
 					if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
 						continue
 					}
+					j.Result[in.Field][k].Value = text
 					exts = append(exts, map[string]interface{}{
 						"field":     v.Field,
 						"code":      v.Code,
@@ -964,10 +965,10 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
 					}
-					j.Result[key][k].Value = text
 					if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
 						continue
 					}
+					j.Result[key][k].Value = text
 					exts = append(exts, map[string]interface{}{
 						"field":     v.Field,
 						"code":      v.Code,
@@ -1018,7 +1019,7 @@ func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *Re
 		return
 	}
 	logdata := map[string]interface{}{
-		"code":       v.Code,
+		"code":       qu.If(v.Code == "", "kv", v.Code),
 		"name":       v.Name,
 		"type":       ftype,
 		"ruletext":   v.RuleText,
@@ -1118,10 +1119,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		tmp := map[string]interface{}{} //抽取值
 		tmp["fieldall"] = auxinfo
 		for _, val := range result {
-			for _, v := range val { //取第一个非负数
+			for _, v := range val { //取第一个非负数,项目名称除外
 				if v.Score > -1 {
 					tmp[v.Field] = v.Value
 					break
+				} else if v.Field == "projectname" {
+					tmp[v.Field] = v.Value
+					break
 				}
 			}
 		}

+ 7 - 1
src/jy/pretreated/analystep.go

@@ -107,10 +107,16 @@ func FindProjectCode(newCon string, job *util.Job) {
 	}
 	var proCode string
 	blCode := &util.Block{}
-	if projectcodeRegAll.MatchString(newCon) { //项目名称项目编号一起的5d424bdfa5cb26b9b7ac7a85
+	if newCon = projectcodeRegAll.FindString(newCon); newCon != "" { //项目名称项目编号一起的
+		//5d424bdfa5cb26b9b7ac7a85
+		//5d425a48a5cb26b9b7df5fec
 		splitStr := strings.Split(newCon, " ")
 		if len(splitStr) >= 2 {
 			newCon = "项目编号:" + splitStr[len(splitStr)-1]
+		} else if len(splitStr) == 1 {
+			if tmpstr := projectcodeRegAll2.FindString(splitStr[0]); tmpstr != "" {
+				newCon = "项目编号:" + tmpstr
+			}
 		}
 	}
 	proCode = projectcodeReg.FindString(newCon)

+ 5 - 4
src/jy/pretreated/analytable.go

@@ -107,9 +107,10 @@ var (
 	underline                   = regexp.MustCompile("_+$")
 	iswinnertabletag            = regexp.MustCompile("(中标|候选人|成交|结果)")
 	nswinnertabletag            = regexp.MustCompile("[评得分估]+")
-	projectcodeRegAll           = regexp.MustCompile(`采购项目名称及项目编号[:|:]?`)
-	projectcodeReg              = regexp.MustCompile(`((|\(|\[){1}(编号|项目编号|标段编号|招标编号){1}(:|:)(.){4,30}()|\)|\])`)
-	projectcodeReg2             = regexp.MustCompile(`((?:^|\n)编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9a-zA-Z)号]`)
+	projectcodeRegAll           = regexp.MustCompile(`采购项目名称及[项目]?编号[:|:]?.*[\n]?`)
+	projectcodeRegAll2          = regexp.MustCompile("[((].{4,30}[))]")
+	projectcodeReg              = regexp.MustCompile(`((|\(|\[){1}(^([\s]?编号)|项目编号|标段编号|招标编号){1}(:|:)(.){4,30}()|\)|\])`)
+	projectcodeReg2             = regexp.MustCompile(`(^([\s]?编号)|项目编号){1}(:|:)(.{4,39})[0-9a-zA-Z)号]`)
 	projectcodeReg3             = regexp.MustCompile("(^询价单编号[A-Za-z0-9/-]*|公告编号[A-Za-z0-9/-]*)")
 	jsonReg                     = regexp.MustCompile(`\{.+:[^}]*\} `) //  \{".*\":\".+\"}
 	regHz                       = regexp.MustCompile("[\u4e00-\u9fa5]")
@@ -3150,7 +3151,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 9 - 4
src/res/fieldscore.json

@@ -319,9 +319,14 @@
         "positivewords": [
             {
                 "describe": "有关键字加分",
-                "regstr": "(财采|招字|财购|赣购){1}",
+                "regstr": "(财采|招字|财购|赣购|豫财|管字|豫政){1}",
                 "score": 2
             },
+            {
+                "describe": "有关键字加分",
+                "regstr":"(【[0-9]{4}】.{2,5}号){1}",
+                "score": 0.5
+            },
             {
                 "describe": "号结尾加分",
                 "regstr": ".{4,35}(号)$",
@@ -340,9 +345,9 @@
                 "score": -0.2
             },
             {
-                "describe": "中文汉字大于5个",
-                "regstr": "[\\u4e00-\\u9fa5]{5,}",
-                "score": -2
+                "describe": "中文汉字大于6个",
+                "regstr": "[\\u4e00-\\u9fa5]{6,}",
+                "score": -1.5
             },
             {
                 "describe": "全为中文汉字或符号",