fengweiqiang 4 年之前
父節點
當前提交
a76dca09d4
共有 2 個文件被更改,包括 127 次插入8 次删除
  1. 53 5
      src/jy/extract/extract.go
  2. 74 3
      src/res/fieldscore.json

+ 53 - 5
src/jy/extract/extract.go

@@ -979,6 +979,11 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
 			if len(extinfo) > 0 {
 				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 			}
+		} else if in.Field == "qualifies" {
+			extinfo := extRegCoreToResult(extfrom,pretreated.HtmlToText(qu.ObjToString(doc[extfrom]) ), &map[string]string{}, j, in, isSite)
+			if len(extinfo) > 0 {
+				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
+			}
 		} else {
 			for _, v := range j.Block {
 				btag := make(map[string]string)
@@ -1392,7 +1397,6 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job,
 					}
 				}
 			}
-			//fmt.Println(text)
 			tmps := []map[string]interface{}{}
 			for i := 0; i < len(apos); i++ {
 				if strings.TrimSpace(rep[vre.Field+"_"+fmt.Sprint(i)]) != "" {
@@ -1407,8 +1411,6 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job,
 						"blocktag":  *tag,
 						"score":     score,
 					}
-					tmps = append(tmps, tmp)
-
 					exfield := ju.ExtField{
 						BlockTag:    *tag,
 						Field:       vre.Field,
@@ -1419,7 +1421,15 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job,
 						ExtFrom:     extfrom,
 						SourceValue: rep[vre.Field+"_"+fmt.Sprint(i)],
 						Value:       rep[vre.Field+"_"+fmt.Sprint(i)],
-						Score:       score}
+						Score:       score,
+					}
+					if vre.Field == "qualifies" {
+						if len(rep) >= 2 {
+							tmp["ruletext"] = rep[vre.Field+"_key_"+fmt.Sprint(i)]
+							exfield.RuleText = rep[vre.Field+"_key_"+fmt.Sprint(i)]
+						}
+					}
+					tmps = append(tmps, tmp)
 					if tmp["blocktag"] != nil {
 						exfield.BlockTag = tmp["blocktag"].(map[string]string)
 					}
@@ -1803,7 +1813,37 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if len(*j.Jsondata) > 0 {
 			tmp["jsondata"] = j.Jsondata
 		}
-		for _, val := range result {
+		for k, val := range result {
+			if k == "qualifies" {
+				squalifies := make([]interface{}, 0)
+				squalifiesMap := make(map[string]*scoreIndex, 0)
+				for _, kv := range val {
+					skey := kv.RuleText
+					if kv.Score > 0 {
+						if squalifiesMap[skey] == nil {
+							squalifiesMap = map[string]*scoreIndex{
+								skey: &scoreIndex{
+									Score: kv.Score,
+									Index: len(squalifies),
+								},
+							}
+							squalifies = append(squalifies, map[string]interface{}{
+								"key":   skey,
+								"value": kv.Value,
+							})
+						} else {
+							if squalifiesMap[skey].Score < kv.Score {
+								squalifies[squalifiesMap[skey].Index] = map[string]interface{}{
+									"key":   skey,
+									"value": kv.Value,
+								}
+							}
+						}
+					}
+				}
+				tmp[k] = squalifies
+				continue
+			}
 			for _, v := range val { //取第一个非负数,项目名称除外
 				//存0是否有效
 				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Score > -1 {
@@ -2133,6 +2173,9 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 		delete(tmp, "supervisorrate")
 	}
 	for k, v := range tmp {
+		if k == "qualifies" {
+			continue
+		}
 		if k == "contract_guarantee" || k == "bid_guarantee" {
 			if len(fmt.Sprint(v)) > 0 {
 				tmp[k] = true
@@ -2504,3 +2547,8 @@ func RemoveReplicaSliceString(slc []string) []string {
 	}
 	return result
 }
+
+type scoreIndex struct {
+	Score float64
+	Index int
+}

+ 74 - 3
src/res/fieldscore.json

@@ -879,14 +879,24 @@
         "positivewords": [
             {
                 "describe": "包含关键字",
-                "regstr": "(交纳|投标|金额)",
+                "regstr": "(交纳|投标)",
                 "score": 3
+            },
+            {
+                "describe": "金额",
+                "regstr": "(金额|元|¥)",
+                "score": 1
+            },
+            {
+                "describe": "数字金额",
+                "regstr": "([\\d,.,万]*元|人民币)",
+                "score": 1
             }
         ],
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(详见|test|null|公告|原因|测试|未知|收费|标注|流标|不满?足|终止原因|([::]|帐号|申请表|修改|清单|变更|公告|凭证|缴纳|澄清|答疑)$)",
+                "regstr": "(注[::]|详见|test|null|公告|原因|测试|未知|收费|标注|流标|不满?足|终止原因|([::??]|帐号|申请表|修改|清单|变更|公告|凭证|缴纳|澄清|答疑)$)",
                 "score": -8
             }
         ],
@@ -932,12 +942,22 @@
                 "describe": "包含关键字",
                 "regstr": "(履约保证金额?|合同履约金)",
                 "score": 3
+            },
+            {
+                "describe": "金额",
+                "regstr": "(金额|元|¥)",
+                "score": 1
+            },
+            {
+                "describe": "数字金额",
+                "regstr": "([\\d,.,万]*元|人民币)",
+                "score": 1
             }
         ],
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(投标(保证|担保)金|详见|test|null|公告|原因|测试|未知|收费|标注|流标|不满?足|终止原因|([::]|帐号|申请表|修改|清单|变更|公告|凭证|缴纳|澄清|答疑)$)",
+                "regstr": "(注[::]|投标(保证|担保)金|详见|test|null|公告|原因|测试|未知|收费|标注|流标|不满?足|终止原因|([::??]|帐号|申请表|修改|清单|变更|公告|凭证|缴纳|澄清|答疑)$)",
                 "score": -8
             }
         ],
@@ -975,5 +995,56 @@
                 ]
             }
         ]
+    },
+    "qualifies": {
+        "type": "string",
+        "positivewords": [
+        ],
+        "negativewords": [
+            {
+                "describe": "包含负分",
+                "regstr": "(注[::]|详见|test|null|公告|原因|测试|未知|流标|终止原因|([::??])$)",
+                "score": -8
+            },
+            {
+                "describe": "包空格",
+                "regstr": "\\s",
+                "score": -2
+            }
+        ],
+        "length": [
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    0,
+                    36,
+                    -10
+                ]
+            },
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    36,
+                    150,
+                    3
+                ]
+            },
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    150,
+                    500,
+                    2
+                ]
+            },
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    500,
+                    -1,
+                    -10
+                ]
+            }
+        ]
     }
 }