瀏覽代碼

新版本~备份

zhengkun 2 年之前
父節點
當前提交
333a61992d

+ 1 - 19
src/config.json

@@ -22,25 +22,7 @@
     "udpport": "6601",
     "udptaskid": "60b493c2e138234cb4adb640",
     "nextNode": [],
-    "esconfig": {
-        "available": true,
-        "AccessID": "LTAI4G5x9aoZx8dDamQ7vfZi",
-        "AccessSecret": "Bk98FsbPYXcJe72n1bG3Ssf73acuNh",
-        "ZoneIds": [
-            {
-                "zoneid": "cn-beijing-g",
-                "LaunchTemplateId4": "lt-2ze5ktfgopayi48ok0hu",
-                "LaunchTemplateId8": "lt-2ze0qfrxdnkuwldj9s0u",
-                "vswitchid": "vsw-2ze586sxfwsaov4s5w88d"
-            },
-            {
-                "zoneid": "cn-beijing-h",
-                "LaunchTemplateId4": "lt-2ze1h0akjvi4sdemm7cj",
-                "LaunchTemplateId8": "lt-2ze5fzxwgt8jcqczvmjy",
-                "vswitchid": "vsw-2ze1n1k3mo3fv2irsfdps"
-            }
-        ]
-    },
+    "esconfig": {},
     "istest": true,
     "isSaveTag": false,
     "tomail": "zhengkun@topnet.net.cn",

+ 1 - 1
src/jy/extract/extract.go

@@ -2368,7 +2368,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 
 		//城市抽取
 		if e.IsExtractCity {
-			//e.NewExtractCity(j, &tmp)
+			//e.NewExtractCity(j, &tmp) //旧版
 			e.ExtractRegionInfo(j, &tmp, true)
 		}
 		//品牌抽取

+ 28 - 27
src/jy/extract/extractInit.go

@@ -113,13 +113,14 @@ type ExtractTask struct {
 	DistrictSimGet *ju.DFA //区或县简称
 	StreetGet      *ju.DFA //街道
 
+	XjbtCityArr           []map[string]interface{} //新疆兵团相关数据
+	SensitiveFullCity     *sensitive.Filter
+	SensitiveSimCity      *sensitive.Filter
+	SensitiveFullDistrict *sensitive.Filter
+
 	PostCodeMap map[string]*PostCode //邮编
 	AreaCodeMap map[string]*AreaCode //区号
-
-	XjbtCityArr       []map[string]interface{} //新疆兵团相关数据
-	SensitiveFullCity *sensitive.Filter
-	SensitiveSimCity  *sensitive.Filter
-	InfoType          []map[string]interface{}
+	InfoType    []map[string]interface{}
 
 	Trie_Full_Province  *ju.Trie       //省全称 省、直辖市、自治区
 	Trie_Full_City      *ju.Trie       //市全称 地级市
@@ -141,6 +142,8 @@ type SiteCity struct {
 	P string //省简称
 	C string //市全称
 	D string //区全称
+	T string //站点类型
+	Q string //企业地域
 }
 
 type ClearTaskInfo struct {
@@ -1003,16 +1006,14 @@ func InitProvincesx() []map[string]interface{} {
 //加载站点库site城市信息
 func InitSite() []map[string]interface{} {
 	defer qu.Catch()
-	query := map[string]interface{}{
-		"site_type": map[string]interface{}{
-			"$ne": "代理机构",
-		},
-	}
+	query := map[string]interface{}{}
 	list, _ := ju.Site_Mgo.Find("site", query, nil, map[string]interface{}{
-		"site":     1,
-		"area":     1,
-		"city":     1,
-		"district": 1,
+		"site":      1,
+		"area":      1,
+		"city":      1,
+		"district":  1,
+		"site_type": 1,
+		"qy_area":   1,
 	})
 	return list
 	//list, _ := db.Mgo.Find("site", query, nil, `{"site":1,"area":1,"city":1,"district":1}`, false, -1, -1)
@@ -1039,17 +1040,14 @@ func (e *ExtractTask) InitUpdateSite() {
 	e.SiteCityMap = make(map[string]*SiteCity)
 	for _, v := range InitSite() {
 		site := qu.ObjToString(v["site"])
-		area := qu.ObjToString(v["area"])
-		city := qu.ObjToString(v["city"])
-		district := qu.ObjToString(v["district"])
-		if area != "" && area != "全国" && site != "" {
-			s := &SiteCity{
-				P: area,
-				C: city,
-				D: district,
-			}
-			e.SiteCityMap[site] = s
+		s := &SiteCity{
+			P: qu.ObjToString(v["area"]),
+			C: qu.ObjToString(v["city"]),
+			D: qu.ObjToString(v["district"]),
+			T: qu.ObjToString(v["site_type"]),
+			Q: qu.ObjToString(v["qy_area"]),
 		}
+		e.SiteCityMap[site] = s
 	}
 	log.Debug("有效站点数量:", len(e.SiteCityMap))
 }
@@ -1140,14 +1138,14 @@ func (e *ExtractTask) InitCityInfo() {
 			e.Trie_Full_City.AddWords(qc_city) //加入市全称Trie(k:杭州市)
 			e.SensitiveFullCity.AddWord(qc_city)
 			c := &City{}
-			c.Name = qc_city //市全称:杭州市
+			c.Name = qc_city           //市全称:杭州市
+			e.CityFullMap[qc_city] = c //杭州市:市信息{}
 			if jc_city != "" {
 				c.Brief = jc_city                 //市简称:杭州
 				e.Trie_Sim_City.AddWords(c.Brief) //加入市简称Trie(k:杭州)
 				e.SensitiveSimCity.AddWord(c.Brief)
 				e.CityMap[qc_city] = c.Brief //杭州市:杭州
 				e.CityBriefMap[c.Brief] = c  //杭州:市信息{}
-				e.CityFullMap[qc_city] = c   //杭州市:市信息{}
 			}
 			c.P = p
 			if city_alias, ok := vcity["city_alias"].([]interface{}); ok {
@@ -1185,6 +1183,7 @@ func (e *ExtractTask) initDistricts(jc_province string, qc_city string, c *City,
 		d.Name = qc_district
 		d.C = c
 		e.Trie_Full_District.AddWords(qc_district) //加入区或县全称Trie
+		e.SensitiveFullDistrict.AddWord(qc_district)
 		if jc_district != "" {
 			e.Trie_Sim_District.AddWords(jc_district) //加入区或县简称Trie
 			//初始化城市简称
@@ -1218,7 +1217,7 @@ func (e *ExtractTask) initDistricts(jc_province string, qc_city string, c *City,
 				}
 			}
 		}
-		//街道
+		//街道乡镇
 		towns := towns_maps[jc_province][qc_city][qc_district]
 		for _, vtown := range towns {
 			strvtown := qu.ObjToString(vtown["town"])
@@ -1286,6 +1285,8 @@ func (e *ExtractTask) InitVar() {
 	//敏感词-筛选
 	e.SensitiveFullCity = sensitive.New()
 	e.SensitiveSimCity = sensitive.New()
+	e.SensitiveFullDistrict = sensitive.New()
+
 }
 
 //初始化邮编库

+ 271 - 180
src/jy/extract/extractcity_new.go

@@ -10,28 +10,16 @@ import (
 //抽取地域信息
 func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{}, isLog bool) {
 	defer qu.Catch()
-
-	/*
-		调整逻辑~ 2022.12.6
-			1、标题组,若之前未抽取到值,区县不采用简称
-			2、站点匹配的地域~标准化校验
-			3、编写脚本,每天同步更新站点表信息
-	*/
-
 	//日志记录
 	logRecordInfo := []map[string]interface{}{}
 	f_area, f_city, f_district := "", "", ""
 	all_regions := map[string]map[string]map[string]string{}
-	//jsondata ~ 初步确认
+	//jsondata ~ 前置条件
 	e.GetRegionByTentativeJsonData(j, &all_regions)
-	//site ~ 初步确认
-	e.GetRegionByTentativeSite(j, &all_regions)
-	//记录
-	if isLog {
+	if isLog && len(all_regions) > 0 {
 		valueArr := []string{}
 		valueArr = append(valueArr, qu.ObjToString((*j.Jsondata)["area_city_district"]))
-		valueArr = append(valueArr, qu.ObjToString((*j.Data)["site"]))
-		LogProcessRecordingForTentative("jsondata_site", valueArr, all_regions, &logRecordInfo)
+		LogProcessRecordingForTentative("jsondata", valueArr, all_regions, &logRecordInfo)
 	}
 	b := ConfirmUniqueRegionInfo(all_regions, &f_area, &f_city, &f_district)
 	if b {
@@ -43,67 +31,74 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 		(*tmp)["regions_log"] = logRecordInfo
 		return
 	}
-
 	//字段可控
-	CityFieldsArr := []string{
-		"projectaddr,addressing",
-		"buyer,approvedepartment",
-		"buyerzipcode,buyertel",
-		"bidopenaddress,buyeraddr",
-		"title,projectname",
+	RegionFieldsArr := ju.DefaultRegions
+	//采购单位比较特殊~需要根据站点类型进行重新组合
+	if e.IsConsecutionRegion(qu.ObjToString((*tmp)["site"])) {
+		RegionFieldsArr = ju.AdjustmentRegions
 	}
-
-	for _, v := range CityFieldsArr {
+	for _, v := range RegionFieldsArr {
 		keyArr := strings.Split(v, ",")
-		//临时调试看到具体的值
-		isContinue, textValues := TextGroupInfo(keyArr, *tmp)
-		if !isContinue {
-			continue
-		}
-		isUseful := false //当前组提取前~是否有值
-		if len(all_regions) > 0 {
-			isUseful = true
-		}
-		field_regions, old_regions, new_regions := e.GetRegionByGroupInfo(keyArr, isUseful, *tmp)
-		AnalysisIsUniqueInfo(new_regions, &all_regions)
-		if isLog { //日志记录
-			LogProcessRecordingForGroupInfo(strings.Join(keyArr, "_"), textValues, field_regions, old_regions, all_regions, &logRecordInfo)
-		}
-		b = ConfirmUniqueRegionInfo(all_regions, &f_area, &f_city, &f_district)
-		if b {
-			CompleteRegionInfo(&f_area, &f_city, &f_district)
-			//最终赋值
-			(*tmp)["area"] = f_area
-			(*tmp)["city"] = f_city
-			(*tmp)["district"] = f_district
-			(*tmp)["regions_log"] = logRecordInfo
-			return
+		isExists, textValues, field_regions, old_regions, new_regions := e.GetRegionByGroupInfo(keyArr, *tmp)
+		if isExists { //是否存在抽取有效值
+			AnalysisIsUniqueInfo(new_regions, &all_regions)
+			if isLog { //日志记录
+				LogProcessRecordingForGroupInfo(strings.Join(keyArr, "_"), textValues, field_regions, old_regions, all_regions, &logRecordInfo)
+			}
+			b = ConfirmUniqueRegionInfo(all_regions, &f_area, &f_city, &f_district)
+			if b {
+				CompleteRegionInfo(&f_area, &f_city, &f_district)
+				//最终赋值
+				(*tmp)["area"] = f_area
+				(*tmp)["city"] = f_city
+				(*tmp)["district"] = f_district
+				(*tmp)["regions_log"] = logRecordInfo
+				return
+			}
 		}
 	}
-
 	//未提前结束~筛选出~最终的
 	ConfirmUniqueRegionInfo(all_regions, &f_area, &f_city, &f_district)
 	//給地域做建议的清洗完善
 	CompleteRegionInfo(&f_area, &f_city, &f_district)
 
-	//新疆兵团核对校验
+	//用到的字段
+	projectname := qu.ObjToString((*tmp)["projectname"])
 	buyer := qu.ObjToString((*tmp)["buyer"])
+	site := qu.ObjToString((*tmp)["site"])
+	//新疆兵团补充地域~
 	if xjbtReg.MatchString(buyer) && f_city == "" {
-		if a, c, d, ok := e.CheckingXjbtCity(buyer); ok {
-			f_area = a
-			f_city = c
-			f_district = d
+		if a, c, d, ok := e.NewVerifyXjCorpsInfo(buyer); ok {
+			f_area, f_city, f_district = a, c, d
 		}
 	}
 
-	//敏感词校验核对方法
-	if f_area != "全国" && f_city == "" {
-		if sensitive_city := e.SensitiveCityData(qu.ObjToString((*j.Data)["detail"]), f_area); sensitive_city != "" {
-			f_city = sensitive_city
+	//此时进行特殊链路新增、补充原则
+	if f_city == "" {
+		e.LinkSpecialRuleFullStep(projectname, &f_area, &f_city, &f_district)
+	}
+	if f_city == "" {
+		e.LinkSpecialRuleBriefStep(projectname, &f_area, &f_city, &f_district)
+	}
+	if f_city == "" {
+		e.LinkSpecialRuleBriefStep(buyer, &f_area, &f_city, &f_district)
+	}
+
+	//正文补充地域~
+	if f_area == "全国" || f_area == "" || f_city == "" {
+		if b := e.NewVerifySensitiveInfo(qu.ObjToString((*j.Data)["detail"]), &f_area, &f_city, &f_district); b {
 			(*tmp)["is_sensitive"] = 1
 		}
 	}
+	//最终站点补充
+	if f_area == "全国" || f_area == "" {
+		if sc := e.SiteCityMap[site]; sc != nil && sc.Q != "" {
+			f_area = sc.Q
+		}
+	}
 
+	//最终在清洗一遍数据
+	CompleteRegionInfo(&f_area, &f_city, &f_district)
 	//最终赋值
 	(*tmp)["area"] = f_area
 	(*tmp)["city"] = f_city
@@ -112,32 +107,52 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 }
 
 //对组进行分析处理
-func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, isUseful bool, tmp map[string]interface{}) (map[string]interface{}, map[string]map[string]map[string]string, map[string]map[string]map[string]string) {
+func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, tmp map[string]interface{}) (bool, []string, map[string]interface{}, map[string]map[string]map[string]string, map[string]map[string]map[string]string) {
 	old_regions := map[string]map[string]map[string]string{}
+	isExists := false
 	textArr := []string{}
 	field_regions := map[string]interface{}{}
 	for _, key := range keyArr {
-		text := qu.ObjToString(tmp[key])
+		text := ""
+		if key == "site_area" || key == "site_city" {
+			text = qu.ObjToString(tmp["site"])
+		} else if key == "buyer_filiale" {
+			text = GetFilialeByBuyerInfo(qu.ObjToString(tmp["buyer"]))
+		} else if key == "projectname" {
+			text = CleanRegionProjectNameInfo(qu.ObjToString(tmp[key]), qu.ObjToString(tmp["buyer"]))
+		} else {
+			text = qu.ObjToString(tmp[key])
+		}
 		textArr = append(textArr, text)
+		if text != "" {
+			isExists = true
+		} else {
+			continue //无值不用提取
+		}
 		valuesArr := []map[string]interface{}{}
 		if key == "buyerzipcode" {
 			valuesArr = e.GetRegionByPostCode(text, &old_regions)
 		} else if key == "buyertel" {
 			valuesArr = e.GetRegionByTelNumber(text, &old_regions)
+		} else if key == "site_area" {
+			valuesArr = e.GetRegionBySite(text, &old_regions, 1)
+		} else if key == "site_city" {
+			valuesArr = e.GetRegionBySite(text, &old_regions, 2)
+		} else if key == "buyer_filiale" {
+			valuesArr = e.GetRegionFromText(text, &old_regions, false, false, 2)
 		} else {
-			//地址类~采用优先级模式~不能多省
-			isAddress := false
+			isAddress, isBrief := false, false
 			if key == "projectaddr" || key == "addressing" || key == "bidopenaddress" || key == "buyeraddr" {
 				isAddress = true
 			}
-			valuesArr = e.GetRegionFromText(text, &old_regions, isAddress, 2)
+			valuesArr = e.GetRegionFromText(text, &old_regions, isAddress, isBrief, 2)
 		}
 		field_regions[key] = valuesArr
 	}
 	//校验当前组的合理性
 	new_regions := ReasonableGroupRegionInfo(old_regions)
 
-	return field_regions, old_regions, new_regions
+	return isExists, textArr, field_regions, old_regions, new_regions
 }
 
 //邮政编号
@@ -191,7 +206,7 @@ func (e *ExtractTask) GetRegionByTentativeJsonData(j *ju.Job, all_regions *map[s
 	if j.Jsondata != nil {
 		jsondata := *j.Jsondata
 		if a_c_d, ok := jsondata["area_city_district"].(string); ok && a_c_d != "" {
-			e.GetRegionFromText(a_c_d, &regions, false, 1)
+			e.GetRegionFromText(a_c_d, &regions, false, false, 1)
 		}
 	}
 	if len(regions) == 1 {
@@ -226,10 +241,68 @@ func (e *ExtractTask) GetRegionByTentativeJsonData(j *ju.Job, all_regions *map[s
 	}
 }
 
-//初步确认~站点
-func (e *ExtractTask) GetRegionByTentativeSite(j *ju.Job, all_regions *map[string]map[string]map[string]string) {
+//简称全程标准化的校验~
+func (e *ExtractTask) StandardizedegionInfo(area *string, city *string, district *string) {
+	//特殊市补充
+	if *area == "北京" {
+		*city = "北京市"
+	} else if *area == "天津" {
+		*city = "天津市"
+	} else if *area == "上海" {
+		*city = "上海市"
+	} else if *area == "重庆" {
+		*city = "重庆市"
+	}
+	//非空与空~是否标准校验
+	if *area == "" {
+		*city = ""
+		*district = ""
+	} else {
+		if province := e.ProvinceMap[*area]; province != "" {
+			*area = province
+		}
+		if *city == "" {
+			*district = ""
+		} else {
+			if csMap := e.CityBriefMap[*city]; csMap != nil {
+				if csMap.P.Brief == *area && csMap.Name != "" {
+					*city = csMap.Name
+				} else {
+					*city = ""
+					*district = ""
+				}
+			} else {
+				if e.CityMap[*city] == "" {
+					*city = ""
+					*district = ""
+				}
+			}
+			if *district != "" {
+				citysArr := e.DistrictSimAndAll[*district]
+				if len(citysArr) == 1 {
+					full_city := citysArr[0]
+					for d, _ := range full_city {
+						*district = d
+					}
+				} else if len(citysArr) > 1 {
+					*district = ""
+				} else if len(citysArr) == 0 {
+					fullArr := e.DistrictCityMap[*district]
+					if len(fullArr) == 0 {
+						*district = ""
+					}
+				} else {
+
+				}
+			}
+		}
+	}
+}
+
+//站点取值   from 1-省  2-省市
+func (e *ExtractTask) GetRegionBySite(site string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} {
+	regionArr := []map[string]interface{}{}
 	area, city, district := "", "", ""
-	site, _ := (*j.Data)["site"].(string)
 	if scMap := e.SiteCityMap[site]; scMap != nil {
 		if scMap.P != "" && scMap.P != "全国" && scMap.P != "null" {
 			area = scMap.P
@@ -237,56 +310,23 @@ func (e *ExtractTask) GetRegionByTentativeSite(j *ju.Job, all_regions *map[strin
 		if scMap.C != "" && scMap.C != "null" && area != "" {
 			city = scMap.C
 		}
-		//if scMap.D != "" && scMap.D != "null" && city != "" {
-		//	district = scMap.D
-		//}
 	}
-
-	//对省市区进行标准化校验~简称全程的问题
 	e.StandardizedegionInfo(&area, &city, &district)
+	if from == 1 && area != "" && area != "全国" {
+		UpdateRegionsInfo(area, "", "", regions)
+		regionArr = append(regionArr, map[string]interface{}{"area": area, "city": "", "district": ""})
 
-	//取出唯一数据
-	j_area, j_city, j_district := "", "", ""
-	is_adjust := false
-	if len(*all_regions) == 1 { //有值~只进行补充操作
-		for k, v := range *all_regions {
-			j_area = k
-			for k1, v1 := range v {
-				j_city = k1
-				for k2, _ := range v1 {
-					j_district = k2
-				}
-			}
-		}
-		if j_area == area && area != "" {
-			if city != "" {
-				if j_city == "" {
-					is_adjust = true
-				} else if j_city == city {
-					if district != "" && j_district == "" {
-						is_adjust = true
-					}
-				}
-			}
-		}
-	} else {
-		is_adjust = true
 	}
-	if is_adjust && area != "" { //进行调整
-		city_info := map[string]map[string]string{}
-		district_info := map[string]string{}
-		if city != "" {
-			if district != "" {
-				district_info[district] = district
-			}
-			city_info[city] = district_info
-		}
-		(*all_regions)[area] = city_info
+	if from == 2 && area != "" && area != "全国" && city != "" {
+		UpdateRegionsInfo(area, city, "", regions)
+		regionArr = append(regionArr, map[string]interface{}{"area": area, "city": city, "district": ""})
 	}
+
+	return regionArr
 }
 
-//新疆兵团映射
-func (e *ExtractTask) CheckingXjbtCity(buyer string) (new_a, new_c, new_d string, ok bool) {
+//新疆兵团
+func (e *ExtractTask) NewVerifyXjCorpsInfo(buyer string) (new_a, new_c, new_d string, ok bool) {
 	buyer = strings.ReplaceAll(buyer, "新疆兵团", "新疆生产建设兵团")
 	ok = false
 	for _, info := range e.XjbtCityArr {
@@ -315,94 +355,145 @@ func (e *ExtractTask) CheckingXjbtCity(buyer string) (new_a, new_c, new_d string
 	return new_a, new_c, new_d, ok
 }
 
-//敏感词识别~~~
-func (e *ExtractTask) SensitiveCityData(detail string, area string) string {
-	//采用正文
+//敏感词识别
+func (e *ExtractTask) NewVerifySensitiveInfo(detail string, area *string, city *string, district *string) bool {
 	detail = sensitiveReg.ReplaceAllString(detail, "")
-	//删除表格相关-文本
 	detail = TextAfterRemoveTable(detail)
-
-	sim_arr := e.SensitiveSimCity.FindAll(detail)
-	full_arr := e.SensitiveFullCity.FindAll(detail)
-	if len(full_arr) < 3 {
-		for _, v := range full_arr {
+	//全称城市
+	fullCityArr := e.SensitiveFullCity.FindAll(detail)
+	if len(fullCityArr) == 1 {
+		for _, v := range fullCityArr {
 			if cityMap := e.CityFullMap[v]; cityMap != nil {
-				if cityMap.P.Brief == area {
-					return cityMap.Name
+				if *area == "" || *area == "全国" || cityMap.P.Brief == *area {
+					*area = cityMap.P.Brief
+					*city = cityMap.Name
+					return true
 				}
 			}
 		}
 	}
-	if len(sim_arr) < 3 {
-		for _, v := range sim_arr {
-			if cityMap := e.CityBriefMap[v]; cityMap != nil {
-				if cityMap.P.Brief == area && !strings.Contains(area, v) {
-					return cityMap.Name
+	//全称区县
+	fullDistrictArr := e.SensitiveFullDistrict.FindAll(detail)
+	if len(fullDistrictArr) == 1 {
+		for _, v := range fullDistrictArr {
+			if citys := e.DistrictCityMap[v]; len(citys) == 1 {
+				if *area == "" || *area == "全国" || citys[0].P.Brief == *area {
+					*area = citys[0].P.Brief
+					*city = citys[0].Name
+					*district = v
+					return true
 				}
 			}
 		}
 	}
-	return ""
-}
-
-//临时调试属性
-func TextGroupInfo(keyArr []string, tmp map[string]interface{}) (bool, []string) {
-	isvalid := false
-	dataArr := []string{}
-	for _, v := range keyArr {
-		text := qu.ObjToString(tmp[v])
-		if text != "" {
-			isvalid = true
+	//简称城市
+	simCityArr := e.SensitiveSimCity.FindAll(detail)
+	if len(simCityArr) == 1 {
+		for _, v := range simCityArr {
+			if cityMap := e.CityBriefMap[v]; cityMap != nil {
+				if *area == "" || *area == "全国" {
+					*area = cityMap.P.Brief
+					if !strings.Contains(*area, v) {
+						*city = cityMap.Name
+					}
+					return true
+				}
+				if cityMap.P.Brief == *area && !strings.Contains(*area, v) {
+					*area = cityMap.P.Brief
+					*city = cityMap.Name
+					return true
+				}
+			}
 		}
-		dataArr = append(dataArr, qu.ObjToString(tmp[v]))
 	}
-	return isvalid, dataArr
-}
 
-//简称全程标准化的校验~
-func (e *ExtractTask) StandardizedegionInfo(area *string, city *string, district *string) {
-	//特殊市补充
-	if *area == "北京" {
-		*city = "北京市"
-	} else if *area == "天津" {
-		*city = "天津市"
-	} else if *area == "上海" {
-		*city = "上海市"
-	} else if *area == "重庆" {
-		*city = "重庆市"
-	}
-	//非空与空~是否标准校验
-	if *area == "" {
-		*city = ""
-		*district = ""
-	} else {
-		if province := e.ProvinceMap[*area]; province != "" {
-			*area = province
-		}
-		if *city == "" {
-			*district = ""
-		} else {
-			if csMap := e.CityBriefMap[*city]; csMap != nil {
-				if csMap.P.Brief == *area && csMap.Name != "" {
-					*city = csMap.Name
-				} else {
-					*city = ""
-					*district = ""
+	//疑似固话提取~
+	fixedTelArr := FixedTelReg.FindAllString(detail, -1)
+	if len(fixedTelArr) > 0 {
+		codeArr := resetFixedTelInfo(fixedTelArr)
+		if len(codeArr) == 1 {
+			for _, v := range codeArr {
+				if ac := e.AreaCodeMap[v]; ac != nil {
+					*area = ac.P
+					return true
 				}
 			}
-			if *district != "" {
-				citysArr := e.DistrictSimAndAll[*district]
-				if len(citysArr) == 1 {
-					full_city := citysArr[0]
-					for d, _ := range full_city {
-						*district = d
-					}
-				} else if len(citysArr) > 1 {
-					*district = ""
-				} else {
+		}
+	}
+	return false
+}
 
-				}
+func resetFixedTelInfo(telArr []string) []string {
+	codeArr := []string{}
+	telsMap := map[string]string{}
+	for _, v := range telArr {
+		if v != "" {
+			arr := strings.Split(v, "-")
+			code := qu.ObjToString(arr[0])
+			if telsMap[code] == "" {
+				telsMap[code] = code
+				codeArr = append(codeArr, code)
 			}
 		}
 	}
+	return codeArr
 }
+
+//初步确认~站点
+//func (e *ExtractTask) GetRegionByTentativeSite(j *ju.Job, all_regions *map[string]map[string]map[string]string) {
+//	area, city, district := "", "", ""
+//	site, _ := (*j.Data)["site"].(string)
+//	if scMap := e.SiteCityMap[site]; scMap != nil {
+//		if scMap.P != "" && scMap.P != "全国" && scMap.P != "null" {
+//			area = scMap.P
+//		}
+//		if scMap.C != "" && scMap.C != "null" && area != "" {
+//			city = scMap.C
+//		}
+//		//if scMap.D != "" && scMap.D != "null" && city != "" {
+//		//	district = scMap.D
+//		//}
+//	}
+//
+//	//对省市区进行标准化校验~简称全程的问题
+//	e.StandardizedegionInfo(&area, &city, &district)
+//
+//	//取出唯一数据
+//	j_area, j_city, j_district := "", "", ""
+//	is_adjust := false
+//	if len(*all_regions) == 1 { //有值~只进行补充操作
+//		for k, v := range *all_regions {
+//			j_area = k
+//			for k1, v1 := range v {
+//				j_city = k1
+//				for k2, _ := range v1 {
+//					j_district = k2
+//				}
+//			}
+//		}
+//		if j_area == area && area != "" {
+//			if city != "" {
+//				if j_city == "" {
+//					is_adjust = true
+//				} else if j_city == city {
+//					if district != "" && j_district == "" {
+//						is_adjust = true
+//					}
+//				}
+//			}
+//		}
+//	} else {
+//		is_adjust = true
+//	}
+//	if is_adjust && area != "" { //进行调整
+//		city_info := map[string]map[string]string{}
+//		district_info := map[string]string{}
+//		if city != "" {
+//			if district != "" {
+//				district_info[district] = district
+//			}
+//			city_info[city] = district_info
+//		}
+//		(*all_regions)[area] = city_info
+//	}
+//}

+ 60 - 0
src/jy/extract/extractcity_old.go

@@ -992,3 +992,63 @@ func PCDScore(j *ju.Job, stype, text string, score float64, isfull bool) {
 		}
 	}
 }
+
+//新疆兵团~~~旧版
+func (e *ExtractTask) CheckingXjbtCity(buyer string) (new_a, new_c, new_d string, ok bool) {
+	buyer = strings.ReplaceAll(buyer, "新疆兵团", "新疆生产建设兵团")
+	ok = false
+	for _, info := range e.XjbtCityArr {
+		name := qu.ObjToString(info["name"])
+		alias := qu.ObjToString(info["alias"])
+		if strings.Contains(buyer, name) || strings.Contains(buyer, alias) {
+			new_a = qu.ObjToString(info["area"])
+			new_c = qu.ObjToString(info["city"])
+			new_d = qu.ObjToString(info["district"])
+			ok = true
+			if res, ok := info["list"].([]interface{}); ok {
+				list := qu.ObjArrToMapArr(res)
+				for _, c := range list {
+					c_name := qu.ObjToString(c["name"])
+					if strings.Contains(buyer, name+c_name) || strings.Contains(buyer, alias+c_name) {
+						new_a = qu.ObjToString(c["area"])
+						new_c = qu.ObjToString(c["city"])
+						new_d = qu.ObjToString(c["district"])
+						break
+					}
+				}
+			}
+			break
+		}
+	}
+	return new_a, new_c, new_d, ok
+}
+
+//敏感词识别~~~旧版
+func (e *ExtractTask) SensitiveCityData(detail string, area string) string {
+	//采用正文
+	detail = sensitiveReg.ReplaceAllString(detail, "")
+	//删除表格相关-文本
+	detail = TextAfterRemoveTable(detail)
+
+	sim_arr := e.SensitiveSimCity.FindAll(detail)
+	full_arr := e.SensitiveFullCity.FindAll(detail)
+	if len(full_arr) < 3 {
+		for _, v := range full_arr {
+			if cityMap := e.CityFullMap[v]; cityMap != nil {
+				if cityMap.P.Brief == area {
+					return cityMap.Name
+				}
+			}
+		}
+	}
+	if len(sim_arr) < 3 {
+		for _, v := range sim_arr {
+			if cityMap := e.CityBriefMap[v]; cityMap != nil {
+				if cityMap.P.Brief == area && !strings.Contains(area, v) {
+					return cityMap.Name
+				}
+			}
+		}
+	}
+	return ""
+}

+ 330 - 40
src/jy/extract/extractcity_way.go

@@ -1,9 +1,35 @@
 package extract
 
+import "C"
 import (
 	qu "qfw/util"
+	"regexp"
+	"strings"
 )
 
+var FilialeReg1 = regexp.MustCompile("(.{1,3})分(公司|院|校|行)$")
+var FilialeReg2 = regexp.MustCompile(".*[((](.*)[))].*")
+var FilialeReg3 = regexp.MustCompile(".*(集团|公司|大学)(.*)(公司|院|所|校)")
+
+var CleanRegionReg1 = regexp.MustCompile(".*公司")
+
+var FixedTelReg = regexp.MustCompile("0[0-9]{2,3}\\-[2-9][0-9]{6,7}")
+
+//取特殊类数据
+func GetFilialeByBuyerInfo(buyer string) string {
+	if FilialeReg1.MatchString(buyer) {
+		return FilialeReg1.FindString(buyer)
+	}
+	if FilialeReg2.MatchString(buyer) {
+		return FilialeReg2.ReplaceAllString(buyer, "${1}")
+	}
+	if FilialeReg3.MatchString(buyer) {
+		return FilialeReg3.ReplaceAllString(buyer, "${2}")
+	}
+
+	return ""
+}
+
 //最终确认~指定地域
 func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool {
 	if len(regions) > 1 || len(regions) == 0 {
@@ -50,66 +76,113 @@ func CompleteRegionInfo(area *string, city *string, district *string) {
 }
 
 //根据词获取所有的地域 ~ 暂时不采用三级简称提取城市
-func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
-	regions := []map[string]string{}
+func (e *ExtractTask) takeRegionsFromWords(text string, isAddress bool, isBrief bool, regionsArr *[]map[string]string) {
 	//全称匹配
 	for pos_full, trie_full := range e.Trie_Fulls {
 		if trie_full.Get(text) {
+			infoArr := []map[string]string{}
 			if pos_full == 0 {
 				if province := e.ProvinceMap[text]; province != "" {
-					regions = append(regions, map[string]string{"area": province, "city": "", "district": ""})
+					infoArr = append(infoArr, map[string]string{"area": province, "city": "", "district": ""})
+					SplicingRegionsInfo(isAddress, regionsArr, infoArr)
 				}
 			} else if pos_full == 1 {
 				if data := e.CityFullMap[text]; data != nil {
 					if data.P.Brief != "" && data.Name != "" {
-						regions = append(regions, map[string]string{"area": data.P.Brief, "city": data.Name, "district": ""})
+						infoArr = append(infoArr, map[string]string{"area": data.P.Brief, "city": data.Name, "district": ""})
+						SplicingRegionsInfo(isAddress, regionsArr, infoArr)
 					}
 				}
 			} else if pos_full == 2 {
 				citys := e.DistrictCityMap[text]
 				for _, c := range citys {
 					if c.P.Brief != "" && c.Name != "" && text != "" {
-						regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": text})
+						infoArr = append(infoArr, map[string]string{"area": c.P.Brief, "city": c.Name, "district": text})
 					}
 				}
+				SplicingRegionsInfo(isAddress, regionsArr, infoArr)
 			}
+			break
 		}
 	}
 	//简称匹配
 	for pos_sim, trie_sim := range e.Trie_Sims {
 		if trie_sim.Get(text) {
+			infoArr := []map[string]string{}
 			if pos_sim == 0 {
 				if text != "" {
-					regions = append(regions, map[string]string{"area": text, "city": "", "district": ""})
+					infoArr = append(infoArr, map[string]string{"area": text, "city": "", "district": ""})
+					SplicingRegionsInfo(isAddress, regionsArr, infoArr)
 				}
 			} else if pos_sim == 1 {
 				if csMap := e.CityBriefMap[text]; csMap != nil {
 					if csMap.P.Brief != "" && csMap.Name != "" {
-						regions = append(regions, map[string]string{"area": csMap.P.Brief, "city": csMap.Name, "district": ""})
+						infoArr = append(infoArr, map[string]string{"area": csMap.P.Brief, "city": csMap.Name, "district": ""})
+						SplicingRegionsInfo(isAddress, regionsArr, infoArr)
 					}
 				}
 			} else if pos_sim == 2 {
-				//if is_brief_district {
-				//	citysArr := e.DistrictSimAndAll[text]
-				//	for _, full_citys := range citysArr {
-				//		for district, c := range full_citys {
-				//			if c == nil || c.P == nil || c.Name == "" {
-				//				continue
-				//			}
-				//			if c.P.Brief != "" && c.Name != "" && district != "" {
-				//				regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
-				//			}
-				//		}
-				//	}
-				//}
+				if isBrief {
+					citysArr := e.DistrictSimAndAll[text]
+					for _, full_citys := range citysArr {
+						for district, c := range full_citys {
+							if c == nil || c.P == nil || c.Name == "" {
+								continue
+							}
+							if c.P.Brief != "" && c.Name != "" && district != "" {
+								infoArr = append(infoArr, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
+							}
+						}
+						SplicingRegionsInfo(isAddress, regionsArr, infoArr)
+					}
+				}
+			}
+			break
+		}
+	}
+}
+
+//是否拼接数据~且根据地址类~首地域判断
+func SplicingRegionsInfo(isAddress bool, regionsArr *[]map[string]string, infoArr []map[string]string) {
+	if isAddress {
+		if len(*regionsArr) == 0 { //第一次
+			for _, info := range infoArr {
+				*regionsArr = append(*regionsArr, info)
+			}
+		} else {
+			for _, info := range infoArr {
+				area := qu.ObjToString(info["area"])
+				city := qu.ObjToString(info["city"])
+				isUseful := false
+				for _, v := range *regionsArr {
+					v_area := qu.ObjToString(v["area"])
+					v_city := qu.ObjToString(v["city"])
+					if area == v_area {
+						if v_city != "" {
+							if v_city == city {
+								isUseful = true
+								break
+							}
+						} else {
+							isUseful = true
+							break
+						}
+					}
+				}
+				if isUseful {
+					*regionsArr = append(*regionsArr, info)
+				}
 			}
 		}
+	} else {
+		for _, info := range infoArr {
+			*regionsArr = append(*regionsArr, info)
+		}
 	}
-	return regions
 }
 
 //文本取地域   from  1~jsondata文本   2~其他文本
-func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, isAddress bool, from int) []map[string]interface{} {
+func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, isAddress bool, isBrief bool, from int) []map[string]interface{} {
 	regionValues := []map[string]interface{}{}
 	if text == "" {
 		return regionValues
@@ -120,26 +193,11 @@ func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[str
 	} else if from == 2 {
 		wordsArr = e.Seg_SV.Cut(text, true)
 	}
+	//词组清洗
+	wordsArr = CleanRegionTextWords(wordsArr)
 	regionsArr := []map[string]string{}
-	first_province := ""
 	for _, word := range wordsArr {
-		infos := e.takeRegionsFromWords(word)
-		for _, v := range infos {
-			area := qu.ObjToString(v["area"])
-			if area == "" {
-				continue
-			}
-			if first_province == "" {
-				first_province = area
-			}
-			if isAddress {
-				if area == first_province {
-					regionsArr = append(regionsArr, v)
-				}
-			} else {
-				regionsArr = append(regionsArr, v)
-			}
-		}
+		e.takeRegionsFromWords(word, isAddress, isBrief, &regionsArr)
 	}
 	for _, v := range regionsArr {
 		area := qu.ObjToString(v["area"])
@@ -372,6 +430,162 @@ func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyIn
 	*logRecordInfo = append(*logRecordInfo, data)
 }
 
+//重构地域逻辑顺序
+func (e *ExtractTask) IsConsecutionRegion(site string) bool {
+	isReset := false
+	if tmp := e.SiteCityMap[site]; tmp != nil {
+		if tmp.T == "学校" || tmp.T == "军队" || tmp.T == "政府采购" || tmp.T == "公共资源" ||
+			tmp.T == "人民政府" || tmp.T == "政府门户" || tmp.T == "在线审批平台" {
+			isReset = true
+		}
+	}
+	return isReset
+}
+
+//清洗项目名称
+func CleanRegionProjectNameInfo(projectname string, buyer string) string {
+	new_str := projectname
+	if new_str == "" {
+		return new_str
+	}
+	if buyer != "" {
+		new_str = strings.ReplaceAll(projectname, buyer, "")
+	}
+	if CleanRegionReg1.MatchString(new_str) {
+		new_str = CleanRegionReg1.ReplaceAllString(new_str, "")
+	}
+	return new_str
+}
+
+//清洗文本词组
+func CleanRegionTextWords(wordsArr []string) []string {
+	if len(wordsArr) <= 1 {
+		return wordsArr
+	}
+	newArr, index := []string{}, 0
+	for k, v := range wordsArr {
+		if k > 0 && (v == "路" || v == "街道") {
+			index = k
+			break
+		}
+	}
+	if index > 0 {
+		for k, v := range wordsArr {
+			if k == index || k == index-1 {
+				continue
+			}
+			newArr = append(newArr, v)
+		}
+		return newArr
+	}
+
+	//清除特殊词组~城区
+	for _, v := range wordsArr {
+		if v == "城区" {
+			continue
+		}
+		newArr = append(newArr, v)
+	}
+	return newArr
+}
+
+//链路补充~全称类
+func (e *ExtractTask) LinkSpecialRuleFullStep(text string, area *string, city *string, district *string) {
+	regions := map[string]map[string]map[string]string{}
+	wordsArr := e.Seg_SV.Cut(text, true)
+	for _, word := range wordsArr {
+		for pos_full, trie_full := range e.Trie_Fulls {
+			if pos_full == 3 {
+				if trie_full.Get(word) {
+					districts := e.StreetDistrictMap[word]
+					for _, d := range districts {
+						v_area, v_city, v_district := d.C.P.Brief, d.C.Name, d.Name
+						UpdateRegionsInfo(v_area, v_city, v_district, &regions)
+					}
+				}
+			}
+		}
+	}
+	if len(regions) > 0 {
+		if *area == "" || *area == "全国" { //新增原则
+			LinkAddedRules(regions, area, city, district)
+		} else { //补充原则
+			LinkSuppleRules(regions, area, city, district)
+		}
+	}
+}
+
+//链路补充~简称类
+func (e *ExtractTask) LinkSpecialRuleBriefStep(text string, area *string, city *string, district *string) {
+	regions := map[string]map[string]map[string]string{}
+	wordsArr := e.Seg_SV.Cut(text, true)
+	for _, word := range wordsArr {
+		for pos_sim, trie_sim := range e.Trie_Sims {
+			if pos_sim == 2 {
+				if trie_sim.Get(word) {
+					citysArr := e.DistrictSimAndAll[word]
+					for _, full_citys := range citysArr {
+						for d, c := range full_citys {
+							if c == nil || c.P == nil || c.Name == "" {
+								continue
+							}
+							if c.P.Brief != "" && c.Name != "" && d != "" {
+								v_area, v_city, v_district := c.P.Brief, c.Name, d
+								UpdateRegionsInfo(v_area, v_city, v_district, &regions)
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	if len(regions) > 0 {
+		if *area == "" || *area == "全国" { //新增原则
+			LinkAddedRules(regions, area, city, district)
+		} else { //补充原则
+			LinkSuppleRules(regions, area, city, district)
+		}
+	}
+}
+
+//新增原则
+func LinkAddedRules(regions map[string]map[string]map[string]string, area *string, city *string, district *string) {
+	if len(regions) == 1 {
+		for k, v := range regions {
+			*area = k
+			if len(v) == 1 {
+				for k1, v1 := range v {
+					*city = k1
+					if len(v1) == 1 {
+						for k2, _ := range v1 {
+							*district = k2
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+//补充原则
+func LinkSuppleRules(regions map[string]map[string]map[string]string, area *string, city *string, district *string) {
+	for k, v := range regions {
+		if *area == k {
+			if len(v) == 1 {
+				for k1, v1 := range v {
+					*city = k1
+					if len(v1) == 1 {
+						for k2, _ := range v1 {
+							*district = k2
+						}
+					}
+				}
+			}
+		}
+	}
+
+}
+
 //同组合并的地域数据
 //func MergeGroupRegionInfo(datas_1, datas_2 map[string]map[string]map[string]string) map[string]map[string]map[string]string {
 //	regions := map[string]map[string]map[string]string{}
@@ -422,3 +636,79 @@ func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyIn
 //	}
 //	return regions
 //}
+
+//临时修复校验~~~
+//func (e *ExtractTask) temporaryRepairRegionSite() {
+//	sess := ju.Site_Mgo.GetMgoConn()
+//	defer ju.Site_Mgo.DestoryMongoConn(sess)
+//	q, total, isok := map[string]interface{}{}, 0, 0
+//	it := sess.DB(ju.Site_Mgo.DbName).C("site").Find(&q).Sort("_id").Select(map[string]interface{}{
+//		"area":     1,
+//		"city":     1,
+//		"district": 1,
+//	}).Iter()
+//	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+//		if total%10000 == 0 {
+//			log.Debug("cur index ", total, "~", isok)
+//		}
+//		area := qu.ObjToString(tmp["area"])
+//		city := qu.ObjToString(tmp["city"])
+//		district := qu.ObjToString(tmp["district"])
+//		isUpdate := false
+//		if area == "" {
+//			isUpdate = true
+//			area = "全国"
+//			city = ""
+//			district = ""
+//		} else {
+//			if province := e.ProvinceMap[area]; province != "" {
+//				isUpdate = true
+//				area = province
+//			}
+//			if city == "" {
+//				district = ""
+//			} else {
+//				if csMap := e.CityBriefMap[city]; csMap != nil {
+//					if csMap.P.Brief == area && csMap.Name != "" {
+//						isUpdate = true
+//						city = csMap.Name
+//					}
+//				} else { //市区~为省份的情况
+//					if e.CityMap[city] == "" {
+//						fmt.Println("市异常~", tmp["_id"])
+//					}
+//				}
+//				if district != "" {
+//					citysArr := e.DistrictSimAndAll[district]
+//					if len(citysArr) == 1 {
+//						full_city := citysArr[0]
+//						for d, _ := range full_city {
+//							isUpdate = true
+//							district = d
+//						}
+//					} else if len(citysArr) == 0 {
+//						fullArr := e.DistrictCityMap[district]
+//						if len(fullArr) == 0 {
+//							fmt.Println("县异常~", tmp["_id"])
+//						}
+//					} else {
+//
+//					}
+//				}
+//			}
+//		}
+//		if isUpdate {
+//			isok++
+//			ju.Site_Mgo.UpdateById("site", BsonTOStringId(tmp["_id"]), map[string]interface{}{
+//				"$set": map[string]interface{}{
+//					"area":     area,
+//					"city":     city,
+//					"district": district,
+//				},
+//			})
+//		}
+//		tmp = make(map[string]interface{})
+//	}
+//	log.Debug("监测修复完毕~ ", total, "~", isok)
+//
+//}

+ 18 - 0
src/jy/util/util.go

@@ -3,6 +3,7 @@ package util
 import (
 	"fmt"
 	"github.com/cron"
+	"go.mongodb.org/mongo-driver/bson/primitive"
 	"gopkg.in/mgo.v2"
 	. "jy/mongodbutil"
 	qu "qfw/util"
@@ -25,6 +26,7 @@ type Trie struct {
 
 var syncint chan bool //获取下标锁
 var Config map[string]interface{}
+var RegionsConfig map[string]interface{}
 var Se = qu.SimpleEncrypt{Key: "topnet@extract"}
 
 var BrandRules map[string]map[string]string
@@ -44,6 +46,7 @@ var AddrsSess, QyxySess *mgo.Collection
 var Site_Mgo *MongodbSim
 
 var IsUpdateSite bool
+var DefaultRegions, AdjustmentRegions = []string{}, []string{}
 
 func init() {
 	syncint = make(chan bool, 1)
@@ -79,6 +82,9 @@ func UtilInit() {
 		PriceNumberReg[k] = regexp.MustCompile(v)
 	}
 
+	DefaultRegions = convertInterface(RegionsConfig["default_regions"])
+	AdjustmentRegions = convertInterface(RegionsConfig["adjustment_regions"])
+
 	//定时更新站点信息
 	IsUpdateSite = false
 	c := cron.New()
@@ -230,3 +236,15 @@ func FloatFormat(val float64, length int) float64 {
 		return tmp
 	}
 }
+
+func convertInterface(t interface{}) []string {
+	p_list := []string{}
+	if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
+		p_list = qu.ObjArrToStringArr(yl_list_1)
+	} else {
+		if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
+			p_list = qu.ObjArrToStringArr(yl_list_2)
+		}
+	}
+	return p_list
+}

+ 23 - 8
src/main.go

@@ -21,6 +21,7 @@ func init() {
 	log.SetLevel(log.DEBUG)
 	log.SetRollingDaily("./", "out.log")
 	qu.ReadConfig(&u.Config)
+	qu.ReadConfig("./res/regions.json", &u.RegionsConfig)
 	//抽取price和number相关
 	qu.ReadConfig("./res/pricenumber.json", &u.PriceNumberConfig)
 	//初始化util
@@ -29,6 +30,7 @@ func init() {
 }
 
 func main() {
+
 	extract.ExtractUdpUpdateMachine() //节点上传~构建
 	extract.ExtractUdp()              //udp通知抽取
 	go Router.Run(":" + qu.ObjToString(u.Config["port"]))
@@ -42,13 +44,26 @@ func main() {
 
 //验证规则
 func testMain() {
-	con := ``
-	var formattext12 = regexp.MustCompile("(工程业绩|投标文件中填报的单位项目业绩名称)[::]1[、.].*\n2[、.].*\n(3[、.].*\n)?")
-	if formattext12.MatchString(con) {
-		log.Debug("匹配")
-		con = formattext12.ReplaceAllString(con, "\n")
-		log.Debug(con)
-	} else {
-		log.Debug("不匹配")
+	var FilialeReg1 = regexp.MustCompile("(.{1,3})分(公司|院|校|行)$")
+	var FilialeReg2 = regexp.MustCompile(".*[((](.*)[))].*")
+	var FilialeReg3 = regexp.MustCompile(".*(集团|公司)(.*)公司")
+
+	/*
+		1.**分公司、**分院、**分校、**分行前三个字符
+		2.括号里的字符
+		3.**集团**公司中间的字符、**公司**公司中间的字符
+	*/
+	buyer := "**公司河北公司"
+	if FilialeReg1.MatchString(buyer) {
+		new_str := FilialeReg1.FindString(buyer)
+		log.Debug(new_str)
+	}
+	if FilialeReg2.MatchString(buyer) {
+		new_str := FilialeReg2.ReplaceAllString(buyer, "${1}")
+		log.Debug(new_str)
+	}
+	if FilialeReg3.MatchString(buyer) {
+		new_str := FilialeReg3.ReplaceAllString(buyer, "${2}")
+		log.Debug(new_str)
 	}
 }

+ 0 - 1
src/res/pcd.txt

@@ -2095,7 +2095,6 @@
 色达县 4 n
 雷山县 4 n
 新兴县 4 n
-城区 4 n
 石阡县 4 n
 厦门市 4 n
 开化县 4 n

+ 26 - 0
src/res/regions.json

@@ -0,0 +1,26 @@
+{
+    "default_regions": [
+        "site_area",
+        "addressing,projectaddr",
+        "projectname",
+        "buyer_filiale",
+        "approvedepartment",
+        "buyeraddr",
+        "site_city",
+        "buyer",
+        "title",
+        "buyerzipcode,buyertel,bidopenaddress"
+    ],
+    "adjustment_regions" : [
+        "site_area",
+        "addressing,projectaddr",
+        "projectname",
+        "buyer_filiale",
+        "approvedepartment",
+        "buyer",
+        "buyeraddr",
+        "site_city",
+        "title",
+        "buyerzipcode,buyertel,bidopenaddress"
+    ]
+}

+ 2 - 0
src/res/sv.txt

@@ -3332,6 +3332,8 @@
 渭源县 4 n
 播州区 4 n
 禄劝彝族苗族自治县 4 n
+复合肥 4 n
+南阳坡 4 n
 蚌山 3 n
 诸城 3 n
 伽师 3 n