Răsfoiți Sursa

1、去除三级~简称提取 2、地址字段~过滤末尾提取的省份

zhengkun 2 ani în urmă
părinte
comite
fb84caa25d
2 a modificat fișierele cu 49 adăugiri și 52 ștergeri
  1. 9 10
      src/jy/extract/extractcity_new.go
  2. 40 42
      src/jy/extract/extractcity_way.go

+ 9 - 10
src/jy/extract/extractcity_new.go

@@ -125,12 +125,12 @@ func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, isUseful bool, tmp m
 		} else if key == "buyertel" {
 			valuesArr = e.GetRegionByTelNumber(text, &old_regions)
 		} else {
-			//如果为标题或者项目名称
-			if (key == "title" || key == "projectname") && !isUseful {
-				valuesArr = e.GetRegionFromTPText(text, &old_regions)
-			} else {
-				valuesArr = e.GetRegionFromText(text, &old_regions, 2)
+			//地址类~采用优先级模式~不能多省
+			isAddress := false
+			if key == "projectaddr" || key == "addressing" || key == "bidopenaddress" || key == "buyeraddr" {
+				isAddress = true
 			}
+			valuesArr = e.GetRegionFromText(text, &old_regions, isAddress, 2)
 		}
 		field_regions[key] = valuesArr
 	}
@@ -191,7 +191,7 @@ func (e *ExtractTask) GetRegionByTentativeJsonData(j *ju.Job, all_regions *map[s
 	if j.Jsondata != nil {
 		jsondata := *j.Jsondata
 		if a_c_d, ok := jsondata["area_city_district"].(string); ok && a_c_d != "" {
-			e.GetRegionFromText(a_c_d, &regions, 1)
+			e.GetRegionFromText(a_c_d, &regions, false, 1)
 		}
 	}
 	if len(regions) == 1 {
@@ -237,9 +237,9 @@ func (e *ExtractTask) GetRegionByTentativeSite(j *ju.Job, all_regions *map[strin
 		if scMap.C != "" && scMap.C != "null" && area != "" {
 			city = scMap.C
 		}
-		if scMap.D != "" && scMap.D != "null" && city != "" {
-			district = scMap.D
-		}
+		//if scMap.D != "" && scMap.D != "null" && city != "" {
+		//	district = scMap.D
+		//}
 	}
 
 	//对省市区进行标准化校验~简称全程的问题
@@ -405,5 +405,4 @@ func (e *ExtractTask) StandardizedegionInfo(area *string, city *string, district
 			}
 		}
 	}
-
 }

+ 40 - 42
src/jy/extract/extractcity_way.go

@@ -4,7 +4,7 @@ import (
 	qu "qfw/util"
 )
 
-//最终确认确认指定地域
+//最终确认~指定地域
 func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool {
 	if len(regions) > 1 || len(regions) == 0 {
 		return false
@@ -49,8 +49,8 @@ func CompleteRegionInfo(area *string, city *string, district *string) {
 	}
 }
 
-//根据词获取所有的地域 is_brief_district 是否进行简称区划提取
-func (e *ExtractTask) takeRegionsFromWords(text string, is_brief_district bool) []map[string]string {
+//根据词获取所有的地域 ~ 暂时不采用三级简称提取城市
+func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
 	regions := []map[string]string{}
 	//全称匹配
 	for pos_full, trie_full := range e.Trie_Fulls {
@@ -89,19 +89,19 @@ func (e *ExtractTask) takeRegionsFromWords(text string, is_brief_district bool)
 					}
 				}
 			} else if pos_sim == 2 {
-				if is_brief_district {
-					citysArr := e.DistrictSimAndAll[text]
-					for _, full_citys := range citysArr {
-						for district, c := range full_citys {
-							if c == nil || c.P == nil || c.Name == "" {
-								continue
-							}
-							if c.P.Brief != "" && c.Name != "" && district != "" {
-								regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
-							}
-						}
-					}
-				}
+				//if is_brief_district {
+				//	citysArr := e.DistrictSimAndAll[text]
+				//	for _, full_citys := range citysArr {
+				//		for district, c := range full_citys {
+				//			if c == nil || c.P == nil || c.Name == "" {
+				//				continue
+				//			}
+				//			if c.P.Brief != "" && c.Name != "" && district != "" {
+				//				regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
+				//			}
+				//		}
+				//	}
+				//}
 			}
 		}
 	}
@@ -109,7 +109,7 @@ func (e *ExtractTask) takeRegionsFromWords(text string, is_brief_district bool)
 }
 
 //文本取地域   from  1~jsondata文本   2~其他文本
-func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} {
+func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, isAddress bool, from int) []map[string]interface{} {
 	regionValues := []map[string]interface{}{}
 	if text == "" {
 		return regionValues
@@ -120,35 +120,33 @@ func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[str
 	} else if from == 2 {
 		wordsArr = e.Seg_SV.Cut(text, true)
 	}
+	regionsArr := []map[string]string{}
+	first_province := ""
 	for _, word := range wordsArr {
-		regionArr := e.takeRegionsFromWords(word, true)
-		for _, v := range regionArr {
+		infos := e.takeRegionsFromWords(word)
+		for _, v := range infos {
 			area := qu.ObjToString(v["area"])
-			city := qu.ObjToString(v["city"])
-			district := qu.ObjToString(v["district"])
-			UpdateRegionsInfo(area, city, district, regions)
-			regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
+			if area == "" {
+				continue
+			}
+			if first_province == "" {
+				first_province = area
+			}
+			if isAddress {
+				if area == first_province {
+					regionsArr = append(regionsArr, v)
+				}
+			} else {
+				regionsArr = append(regionsArr, v)
+			}
 		}
 	}
-	return regionValues
-}
-
-func (e *ExtractTask) GetRegionFromTPText(text string, regions *map[string]map[string]map[string]string) []map[string]interface{} {
-	regionValues := []map[string]interface{}{}
-	if text == "" {
-		return regionValues
-	}
-	wordsArr := []string{}
-	wordsArr = e.Seg_SV.Cut(text, true)
-	for _, word := range wordsArr {
-		regionArr := e.takeRegionsFromWords(word, false)
-		for _, v := range regionArr {
-			area := qu.ObjToString(v["area"])
-			city := qu.ObjToString(v["city"])
-			district := qu.ObjToString(v["district"])
-			UpdateRegionsInfo(area, city, district, regions)
-			regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
-		}
+	for _, v := range regionsArr {
+		area := qu.ObjToString(v["area"])
+		city := qu.ObjToString(v["city"])
+		district := qu.ObjToString(v["district"])
+		UpdateRegionsInfo(area, city, district, regions)
+		regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
 	}
 	return regionValues
 }