浏览代码

联系人、电话优化

maxiaoshan 5 年之前
父节点
当前提交
6b1dcd0758
共有 7 个文件被更改,包括 47 次插入31 次删除
  1. 1 1
      src/config.json
  2. 1 1
      src/jy/clear/clear.go
  3. 1 1
      src/jy/clear/cutspace.go
  4. 8 8
      src/jy/pretreated/analytable.go
  5. 3 3
      src/main_blocktest.go
  6. 32 16
      src/res/fieldscore.json
  7. 1 1
      src/res/formattext.json

+ 1 - 1
src/config.json

@@ -1,7 +1,7 @@
 {
     "port": "9090",
     "mgodb": "127.0.0.1:27092",
-    "dbsize": 2,
+    "dbsize": 10,
     "dbname": "extract_kf",
     "redis": "buyer=127.0.0.1:6379,winner=127.0.0.1:6379,agency=127.0.0.1:6379,qyk_redis=127.0.0.1:6379",
     "elasticsearch": "http://127.0.0.1:9800",

+ 1 - 1
src/jy/clear/clear.go

@@ -52,7 +52,7 @@ func DoClearFn(clear []string, data []interface{}) []interface{} {
 }
 
 //取手机号
-var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|\\d{3,4}\\*{3,4}\\d{3,4}|\\d{3,4}[\u3000\u2003\u00a0\\s]*\\d{4,5}[\u3000\u2003\u00a0\\s]*\\d{3,4}|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
+var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|\\d{3,4}\\*{3,4}\\d{3,4}|\\d{3,4}[\u3000\u2003\u00a0\\s]*\\d{4,5}[\u3000\u2003\u00a0\\s]*\\d{3,4}|(\\d{2,}[×―—-\\-])+\\d{2,}[×―—-\\-]+(\\d{3,})+|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
 
 func GetPhone(data []interface{}) []interface{} {
 	data[0] = PhoneReg.FindString(fmt.Sprint(data[0]))

+ 1 - 1
src/jy/clear/cutspace.go

@@ -25,7 +25,7 @@ func init() {
 	separateSymbol, _ = regexp.Compile("[\\s\u3000\u2003\u00a0\\n,,、/。|]")
 	placeReg, _ = regexp.Compile("^.*(公司|学(校)?|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合(会|体)|工作室)$")
 	clearNum, _ = regexp.Compile("[\\d-]+")
-	endSymblo = regexp.MustCompile(`[/\\?]$`)
+	endSymblo = regexp.MustCompile(`[/\\?+]$`)
 }
 
 var LableStr = "&?(amp|nbsp|#8266);?|(<).*?(>?)"

+ 8 - 8
src/jy/pretreated/analytable.go

@@ -101,7 +101,7 @@ var (
 	projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
 	MhSpilt        = regexp.MustCompile("[::]")
 	//识别采购单位联系人、联系电话、代理机构联系人、联系电话
-	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
+	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
 	ContactInfoMustReg  = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
 	ContactType         = map[string]*regexp.Regexp{
 		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
@@ -2145,7 +2145,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 							}
 						}
 					}
-				} else if vcgdw.Value == "中标金额" && len(val) == len(index){
+				} else if vcgdw.Value == "中标金额" && len(val) == len(index) {
 					for bi, bv := range index {
 						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount == 0 {
 							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
@@ -2807,11 +2807,11 @@ L:
 	}
 	(*contactFormat).IndexMap = indexMap
 	(*contactFormat).MatchMap = matchMap
-	//	for _, tr := range tn.TRs {
-	//		for _, td := range tr.TDs {
-	//			qutil.Debug("td.sort.map---", td.SortKV.Map)
-	//		}
-	//	}
+	// for _, tr := range tn.TRs {
+	// 	for _, td := range tr.TDs {
+	// 		qutil.Debug("td.sort.map---", td.SortKV.Map)
+	// 	}
+	// }
 }
 
 //modle
@@ -3320,7 +3320,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 3 - 3
src/main_blocktest.go

@@ -51,7 +51,7 @@ func all() {
 }
 func one() {
 	m := mongodbutil.MgoFactory(3, 3, 120, "127.0.0.1:27092", "extract_kf")
-	d, _ := m.FindById("bidding_data", "5c442fa4a5cb26b9b7f52f70", extract.Fields)
+	d, _ := m.FindById("bidding_data2", "5e59321150b5ea296ed84985", extract.Fields)
 	com(*d)
 }
 func com(doc map[string]interface{}) {
@@ -169,7 +169,7 @@ func com(doc map[string]interface{}) {
 			for kkk, vvv := range vv.ScoreItem {
 				log.Println("--", kkk, k, fmt.Sprintf("%+v", vvv))
 			}
-			log.Println("\n")
+			//log.Println("\n")
 		}
 	}
 	log.Println("=============中标候选人================")
@@ -219,7 +219,7 @@ func com(doc map[string]interface{}) {
 		f.WriteString(j.SourceMid + "-----" + v.Title + "---" + fmt.Sprint(v.Titles) + "\n")
 		continue
 		for _, kv := range v.ColonKV.Kvs {
-			log.Println("\n")
+			//log.Println("\n")
 			log.Println(kv.Key, "---", kv.Value)
 			log.Println(kv.Line)
 			log.Println("=======================")

+ 32 - 16
src/res/fieldscore.json

@@ -329,11 +329,27 @@
                 "score": -10
             },  {
                 "describe": "包含负分",
-                "regstr": "(详见公告)",
+                "regstr": "(详(|情)|公告|名称)",
                 "score": -10
             }
         ],
         "length": [
+         	{
+                "describe": "[gt,lte,score]",
+                "range": [
+                    1,
+                    3,
+                    3
+                ]
+            },
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    17,
+                    -1,
+                    -10
+                ]
+            },
             {
                 "describe": "[gt,lte,score]",
                 "range": [
@@ -346,15 +362,22 @@
     },
    	"agencytel": {
         "type": "string",
-        "positivewords": [],
+        "positivewords": [
+        	{
+                "describe": "某些电话中有中文转字",
+                "regstr": "[转]",
+                "score": 15
+            }  
+        ],
         "negativewords": [
             {
                 "describe": "出现中文汉字",
                 "regstr": "[\\u4e00-\\u9fa5]",
                 "score": -10
-            },  {
+            },  
+            {
                 "describe": "包含负分",
-                "regstr": "(详见公告)",
+                "regstr": "(详(|情)|公告)",
                 "score": -10
             }
         ],
@@ -374,14 +397,6 @@
                     14,
                     3
                 ]
-            },
-            {
-                "describe": "[gt,∞,score]",
-                "range": [
-                    14,
-                    -1,
-                    -1
-                ]
             }
         ]
     },
@@ -406,7 +421,7 @@
                 "score": -10
             },  {
                 "describe": "包含负分",
-                "regstr": "(详见公告)",
+                "regstr": "(详(|情)|公告)",
                 "score": -10
             }
         ],
@@ -429,9 +444,10 @@
                 "describe": "出现中文汉字",
                 "regstr": "[\\u4e00-\\u9fa5]",
                 "score": -10
-            },  {
+            },
+            {
                 "describe": "包含负分",
-                "regstr": "(详见公告)",
+                "regstr": "(详(|情)|公告)",
                 "score": -10
             }
         ],
@@ -527,7 +543,7 @@
                 "score": -10
             },  {
                 "describe": "包含负分",
-                "regstr": "(详见公告)",
+                "regstr": "(详(|情)|公告)",
                 "score": -10
             }
         ],

+ 1 - 1
src/res/formattext.json

@@ -82,7 +82,7 @@
             "desc": ""
 		},
 		{
-			"reg": "([\n))])(联系人)及(手机|电话)[::](.+?)[\\s\u3000\u2003\u00a0/]+(.+)",
+			"reg": "([\n))])(联系人)及(手机|电话)[::](.+?)[\\s\u3000\u2003\u00a0/,,]+(.+)",
             "separator": "$1$2:$4\n$3:$5",
             "desc": ""
 		},