Эх сурвалжийг харах

buyerperson、buyertel优化

maxiaoshan 6 жил өмнө
parent
commit
64d4d4a5c5

+ 2 - 1
src/jy/clear/clear.go

@@ -26,6 +26,7 @@ func init() {
 	BindFn("clearProjectName", ClearProjectName) //清理项目名称
 	BindFn("getPhone", GetPhone)                 //取手机号
 	BindFn("chiToEng", ChiToEng)                 //中文符号转英文
+	BindFn("clearBuyerPerson", ClearBuyerPerson) //处理较长采购联系人
 }
 
 //绑定清理方法
@@ -49,7 +50,7 @@ func DoClearFn(clear []string, data []interface{}) []interface{} {
 }
 
 //取手机号
-var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,4}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{4})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
+var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,4}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{4})?|\\d{3,4}\\*{3,4}\\d{3,4}|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
 
 func GetPhone(data []interface{}) []interface{} {
 	data[0] = PhoneReg.FindString(fmt.Sprint(data[0]))

+ 19 - 0
src/jy/clear/cutspace.go

@@ -9,12 +9,14 @@ import (
 var cutSpace *regexp.Regexp
 var cutAllSpace *regexp.Regexp
 var catSymbol *regexp.Regexp
+var separateSymbol *regexp.Regexp
 var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
 
 func init() {
 	cutSpace, _ = regexp.Compile(`^\s*|\s*$`)
 	cutAllSpace, _ = regexp.Compile(`\s*`)
 	catSymbol, _ = regexp.Compile(`[]+`)
+	separateSymbol, _ = regexp.Compile("[\\s\u3000\u2003\u00a0\\n、.,,.。、|]")
 }
 
 var LableStr = "&?(amp|nbsp|#8266);?|(<).*?(>?)"
@@ -164,3 +166,20 @@ func ChiToEng(data []interface{}) []interface{} {
 	data[0] = value
 	return data
 }
+
+func ClearBuyerPerson(data []interface{}) []interface{} {
+	value := fmt.Sprint(data[0])
+	tmp := []string{}
+	if len([]rune(value)) > 4 { //名字默认最长4
+		valuearr := separateSymbol.Split(value, -1)
+		for _, v := range valuearr {
+			if len([]rune(v)) <= 4 { //长度大于4认为不是名字
+				tmp = append(tmp, v)
+			}
+		}
+		data[0] = strings.Join(tmp, ",")
+	} else {
+		data[0] = value
+	}
+	return data
+}

+ 2 - 2
src/jy/pretreated/analytable.go

@@ -93,10 +93,10 @@ var (
 	projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
 	MhSpilt        = regexp.MustCompile("[::]")
 	//识别采购单位联系人、联系电话、代理机构联系人、联系电话
-	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|(征求意见|报名审核购买)?((联系人?(及|和)?|办公)?((电话([//]传真)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|(项目)?经办)人)|采购方代表")
+	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公)?((电话([//]传真)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|(项目)?经办)人)|采购方代表")
 	ContactInfoMustReg  = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
 	ContactType         = map[string]*regexp.Regexp{
-		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|发布人?|甲|招标(服务)?|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人"),
+		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|发布人?|甲|招标(服务)?|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人|采购部"),
 		"代理机构": regexp.MustCompile("(代理|受托).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
 	}
 	ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")

+ 23 - 9
src/jy/pretreated/colonkv.go

@@ -243,10 +243,13 @@ func formatText(content, key string) string {
 }
 func IsContactKvHandle(value string, m map[string]bool) bool {
 	for k, _ := range m {
-		if k != value && (strings.HasPrefix(k, value) || strings.HasPrefix(value, k)) {
-			continue
-		}
-		if strings.Contains(value, k) || strings.Contains(k, value) {
+		//		if k != value && (strings.HasPrefix(k, value) || strings.HasPrefix(value, k)) {
+		//			continue
+		//		}
+		//		if strings.Contains(value, k) || strings.Contains(k, value) {
+		//			return true
+		//		}
+		if k == value {
 			return true
 		}
 	}
@@ -728,6 +731,16 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 		kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
 	}
 	for _, findkv := range findkvs {
+		//		if ContactInfoMustReg.MatchString(findkv.Value) { //名称、地址、联系人、邮编、电话
+		//			preval := findkv.PrevLine
+		//			ctkarr := HasOrderContactType(preval)
+		//			if len(ctkarr) > 0 {
+		//				for i, ct_k := range ctkarr {
+		//					indexMap[i+1] = ct_k
+		//				}
+		//			}
+		//			qutil.Debug("----", indexMap)
+		//		}
 		k, val, nextval := findkv.Key, strings.TrimSpace(findkv.Value), strings.TrimSpace(findkv.NextLine)
 		//val是空的话,不打标签
 		if filterValue.MatchString(val) {
@@ -740,7 +753,7 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 		}
 		key = colonkvEntity.blockTitleKV(title, key)
 		//先用新的key
-		tags := GetAppointTags(key, tagdbs)
+		tags := GetAppointTags(key, tagdbs) //找标签库
 		if len(tags) == 0 && len(key) < 10 && len(title) > 0 && len(title) < 15 {
 			key = title + key
 			tags = GetAppointTags(key, tagdbs)
@@ -766,12 +779,13 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 				} else if nextval != "" && utf8.RuneCountInString(nextval) < 30 {
 					if strings.Contains(nextval, ":") || strings.Contains(nextval, ":") {
 						if len(strings.Split(nextval, ":")) > 1 || len(strings.Split(nextval, ":")) > 1 {
-							nextval = strings.Split(nextval, ":")[0]
-							nextval = strings.Split(nextval, ":")[0]
-							if strings.TrimSpace(nextval) == "" {
+							tmpnextval := ""
+							tmpnextval = strings.Split(nextval, ":")[0]
+							tmpnextval = strings.Split(nextval, ":")[0]
+							if strings.TrimSpace(tmpnextval) == "" {
 								continue
 							}
-							if GetAppointTags(nextval, tagdbs).Len() > 0 || GetAppointTags(k, tagdbs).Len() > 0 {
+							if GetAppointTags(tmpnextval, tagdbs).Len() > 0 || GetAppointTags(k, tagdbs).Len() > 0 {
 								continue
 							}
 						}

+ 1 - 1
src/main_blocktest.go

@@ -49,7 +49,7 @@ func all() {
 }
 func one() {
 	m := mongodbutil.MgoFactory(3, 3, 120, "192.168.3.207:27081", "qfw")
-	d, _ := m.FindById("bidding", "5d423d13a5cb26b9b76e4479", nil)
+	d, _ := m.FindById("bidding", "5d423cb9a5cb26b9b76d16e9", nil)
 	com(*d)
 }
 func com(doc map[string]interface{}) {

+ 12 - 5
src/main_test.go

@@ -40,11 +40,18 @@ func Test_extractcity(t *testing.T) {
 	extract.FindBuyer()
 }
 func Test_reg(t *testing.T) {
-	context := `sfsa发斯蒂芬.`
-	//reg := regexp.MustCompile(`(勘察|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|[,,\.。、::“”‘’"])`)
-	reg := regexp.MustCompile(`[\\p{Han}]`)
-	tmp := reg.MatchString(context)
-	log.Println("--", tmp)
+	value := `名字1名字斯蒂fasd`
+	separateSymbol, _ := regexp.Compile("[\\s\u3000\u2003\u00a0\\n、.,,.。、]")
+	tmp := []string{}
+	if len([]rune(value)) > 4 { //名字默认最长4
+		valuearr := separateSymbol.Split(value, -1)
+		for _, v := range valuearr {
+			if len([]rune(v)) < 4 { //长度大于4认为不是名字
+				tmp = append(tmp, v)
+			}
+		}
+	}
+	log.Println(strings.Join(tmp, ","))
 }
 
 func Test_reg1(t *testing.T) {

+ 7 - 17
src/res/fieldscore.json

@@ -261,7 +261,13 @@
                 "score": 3
             }
         ],
-        "negativewords": [],
+        "negativewords": [
+            {
+                "describe": "出现符号",
+                "regstr": "[*]",
+                "score": -10
+            }
+        ],
         "length": [
             {
                 "describe": "[gt,lte,score]",
@@ -270,22 +276,6 @@
                     1,
                     -5
                 ]
-            },
-            {
-                "describe": "[gt,lte,score]",
-                "range": [
-                    1,
-                    7,
-                    3
-                ]
-            },
-            {
-                "describe": "[gt,∞,score]",
-                "range": [
-                    7,
-                    -1,
-                    -1
-                ]
             }
         ]
     },

+ 2 - 1
src/res/specialsymbols.json

@@ -92,7 +92,8 @@
             ",",
             "。",
             ";",
-            ";"
+            ";",
+			"、"
         ]
     },
     "messycode": {

+ 1 - 1
src/web/templates/admin/clear.html

@@ -102,7 +102,7 @@ menuActive("version")
 var field = {{.field}};
 var _id = "";
 //var clearArr = ["cutspace","cutallspace","cutSymbol","cutNotPrs","clearAllWord","clearMaxAmount","clearProjectName","toint","tofloat","totimestamp","tomoney","getcurrency","getrate","getPhone","rateToFloat"]; 
-var clearMap = {"中文符号转英文":"chiToEng","去除首尾空格":"cutspace","去除所有空格":"cutallspace","清理符号":"cutSymbol","清理不成对符号后面的内容":"cutNotPrs","清理全部是汉字或者特殊符号的情况":"clearAllWord","过滤大于1万亿":"clearMaxAmount","清理项目名称":"clearProjectName","转int":"toint","转float":"tofloat","转时间戳":"totimestamp","转换金额":"tomoney","获取币种":"getcurrency","获取汇率":"getrate","取手机号":"getPhone","费率转小数":"rateToFloat"}
+var clearMap = {"中文符号转英文":"chiToEng","去除首尾空格":"cutspace","去除所有空格":"cutallspace","清理符号":"cutSymbol","清理不成对符号后面的内容":"cutNotPrs","清理全部是汉字或者特殊符号的情况":"clearAllWord","过滤大于1万亿":"clearMaxAmount","清理项目名称":"clearProjectName","转int":"toint","转float":"tofloat","转时间戳":"totimestamp","转换金额":"tomoney","获取币种":"getcurrency","获取汇率":"getrate","取手机号":"getPhone","费率转小数":"rateToFloat","处理较长采购联系人":"clearBuyerPerson"}
 $(function () {
 	ttableclear=$('#clearTable').DataTable({
 		"lengthChange": false,