瀏覽代碼

城市修改

maxiaoshan 5 年之前
父節點
當前提交
d79c6dec77
共有 7 個文件被更改,包括 413 次插入291 次删除
  1. 2 2
      src/config.json
  2. 70 52
      src/jy/extract/extractcity.go
  3. 299 198
      src/jy/extract/newextractcity.go
  4. 33 29
      src/jy/util/article.go
  5. 1 1
      src/main_test.go
  6. 0 2
      src/res/pcd.txt
  7. 8 7
      src/res/sv.txt

+ 2 - 2
src/config.json

@@ -8,9 +8,9 @@
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
     "qualityaudit": false,
-    "saveblock": true,
+    "saveblock": false,
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,

+ 70 - 52
src/jy/extract/extractcity.go

@@ -77,14 +77,14 @@ func (e *ExtractTask) ExtractCity(j *ju.Job, resulttmp map[string]interface{}, i
 	*/
 	defer qu.Catch()
 	//初始化
-	if j.AreaScore == nil {
-		j.AreaScore = make(map[string]int)
+	if j.FullAreaScore == nil {
+		j.FullAreaScore = make(map[string]float64)
 	}
-	if j.CityScore == nil {
-		j.CityScore = make(map[string]int)
+	if j.FullCityScore == nil {
+		j.FullCityScore = make(map[string]float64)
 	}
-	if j.DistrictScore == nil {
-		j.DistrictScore = make(map[string]int)
+	if j.FullDistrictScore == nil {
+		j.FullDistrictScore = make(map[string]float64)
 	}
 	sm := NewSortMap()
 	//高精度抽取city
@@ -149,9 +149,9 @@ func (e *ExtractTask) ExtractCity(j *ju.Job, resulttmp map[string]interface{}, i
 	//	resulttmp["lowprecity"] = LowPreCity
 	//qu.Debug("最终打分---", j.AreaScore, j.CityScore, j.DistrictScore)
 	//最终抽取结果
-	finishP := HighestScoreArr(j.AreaScore)
-	finishC := HighestScoreArr(j.CityScore)
-	finishD := HighestScoreArr(j.DistrictScore)
+	finishP := HighestScoreArr(j.FullAreaScore)
+	finishC := HighestScoreArr(j.FullCityScore)
+	finishD := HighestScoreArr(j.FullDistrictScore)
 
 	//	area, _ := resulttmp["area"].(string)
 	//	city, _ := resulttmp["city"].(string)
@@ -212,7 +212,7 @@ func (e *ExtractTask) GetCityByJsonData(j *ju.Job) (province, city, district, p,
 		province, _ = jsondata["area"].(string)     //province简称
 		district, _ = jsondata["district"].(string) //district全称
 	}
-	PCDScore(j, "district", district, 5) //district打分
+	PCDScore(j, "district", district, 5, true) //district打分
 	bp := false
 	if province != "" {
 		if e.ProvinceBriefMap[province] != nil { //判断爬虫的省份是否正确 (全国)
@@ -234,16 +234,16 @@ func (e *ExtractTask) GetCityByJsonData(j *ju.Job) (province, city, district, p,
 	}
 	if bp {
 		if pbrief == province { //爬虫的province和city匹配
-			PCDScore(j, "city", city, 5)
+			PCDScore(j, "city", city, 5, true)
 		} else { //pbrief不匹配province(此时city为空或者错误)
 			city = ""
 		}
-		PCDScore(j, "province", province, 5)
+		PCDScore(j, "province", province, 5, true)
 	} else { //省份错误或为空,取city的对应的pbrief为province
 		if pbrief != "" {
 			province = pbrief
-			PCDScore(j, "province", province, 5)
-			PCDScore(j, "city", city, 5)
+			PCDScore(j, "province", province, 5, true)
+			PCDScore(j, "city", city, 5, true)
 		} else {
 			province = ""
 			city = ""
@@ -265,10 +265,10 @@ func (e *ExtractTask) GetCityByPostCode(j *ju.Job, postcode string) (province, c
 		districtTmp := pc.D
 		if len(districtTmp) == 1 { //对应多个district舍去
 			district = districtTmp[0]
-			PCDScore(j, "district", district, 5)
+			PCDScore(j, "district", district, 5, true)
 		}
-		PCDScore(j, "province", province, 5)
-		PCDScore(j, "city", city, 5)
+		PCDScore(j, "province", province, 5, true)
+		PCDScore(j, "city", city, 5, true)
 	}
 	return
 }
@@ -285,9 +285,9 @@ func (e *ExtractTask) GetCityByAreaCode(j *ju.Job, buyertel string) (province, c
 				citytmp := ac.C
 				if len(citytmp) == 1 { //对应多个city舍去
 					city = citytmp[0]
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 				}
-				PCDScore(j, "province", province, 5)
+				PCDScore(j, "province", province, 5, true)
 			} else {
 				n = n - 1
 				if n >= 3 {
@@ -297,8 +297,8 @@ func (e *ExtractTask) GetCityByAreaCode(j *ju.Job, buyertel string) (province, c
 		} else if buyertel[:3] == "853" { //澳门
 			province = "澳门"
 			city = "澳门"
-			PCDScore(j, "province", province, 5)
-			PCDScore(j, "city", city, 5)
+			PCDScore(j, "province", province, 5, true)
+			PCDScore(j, "city", city, 5, true)
 		}
 	}
 	return
@@ -375,13 +375,13 @@ func (e *ExtractTask) GetCityByOthers(j *ju.Job, sm *SortMap) ([]map[string]stri
 		//buyeraddr,title,projectname匹配对应的结果加入最终得分
 		if isMatch {
 			if from == "buyeraddr" || from == "buyer" { //全称匹配,buyeraddr和buyer3分,title和projectname2分
-				PCDScore(j, "province", ph1, 3)
-				PCDScore(j, "city", ch1, 3)
-				PCDScore(j, "district", dh1, 3)
+				PCDScore(j, "province", ph1, 3, true)
+				PCDScore(j, "city", ch1, 3, true)
+				PCDScore(j, "district", dh1, 3, true)
 			} else {
-				PCDScore(j, "province", ph1, 2)
-				PCDScore(j, "city", ch1, 2)
-				PCDScore(j, "district", dh1, 2)
+				PCDScore(j, "province", ph1, 2, true)
+				PCDScore(j, "city", ch1, 2, true)
+				PCDScore(j, "district", dh1, 2, true)
 			}
 		}
 
@@ -428,13 +428,13 @@ func (e *ExtractTask) GetCityByOthers(j *ju.Job, sm *SortMap) ([]map[string]stri
 			district2 = append(district2, map[string]string{from + "_sim": dh2})
 			//buyeraddr,title,projectname匹配对应的结果加入最终得分
 			if from == "buyeraddr" {
-				PCDScore(j, "province", ph2, 2)
-				PCDScore(j, "city", ch2, 2)
-				PCDScore(j, "district", dh2, 2)
+				PCDScore(j, "province", ph2, 2, true)
+				PCDScore(j, "city", ch2, 2, true)
+				PCDScore(j, "district", dh2, 2, true)
 			} else {
-				PCDScore(j, "province", ph2, 1)
-				PCDScore(j, "city", ch2, 1)
-				PCDScore(j, "district", dh2, 1)
+				PCDScore(j, "province", ph2, 1, true)
+				PCDScore(j, "city", ch2, 1, true)
+				PCDScore(j, "district", dh2, 1, true)
 			}
 		}
 	}
@@ -456,18 +456,36 @@ func IsMatch(p, c string, e *ExtractTask) bool {
 }
 
 //计算province,city,district得分
-func PCDScore(j *ju.Job, stype, text string, score int) {
+func PCDScore(j *ju.Job, stype, text string, score float64, isfull bool) {
 	defer qu.Catch()
 	if text != "" {
 		if stype == "district" {
-			scoretmp := j.DistrictScore[text]
-			j.DistrictScore[text] = scoretmp + score
+			tmpdistrict := make(map[string]float64)
+			if isfull {
+				tmpdistrict = j.FullDistrictScore
+			} else {
+				tmpdistrict = j.SimDistrictScore
+			}
+			scoretmp := tmpdistrict[text]
+			tmpdistrict[text] = scoretmp + score
 		} else if stype == "city" {
-			scoretmp := j.CityScore[text]
-			j.CityScore[text] = scoretmp + score
+			tmpcity := make(map[string]float64)
+			if isfull {
+				tmpcity = j.FullCityScore
+			} else {
+				tmpcity = j.SimCityScore
+			}
+			scoretmp := tmpcity[text]
+			tmpcity[text] = scoretmp + score
 		} else if stype == "province" {
-			scoretmp := j.AreaScore[text]
-			j.AreaScore[text] = scoretmp + score
+			tmpprovince := make(map[string]float64)
+			if isfull {
+				tmpprovince = j.FullAreaScore
+			} else {
+				tmpprovince = j.SimAreaScore
+			}
+			scoretmp := tmpprovince[text]
+			tmpprovince[text] = scoretmp + score
 		}
 	}
 }
@@ -503,9 +521,9 @@ func HighestScore(m map[string]int) string {
 	return result
 }
 
-func HighestScoreArr(m map[string]int) []string {
-	result := make(map[int][]string)
-	tmpscore := 0
+func HighestScoreArr(m map[string]float64) []string {
+	result := make(map[float64][]string)
+	tmpscore := 0.0
 	for str, score := range m {
 		if str != "" && tmpscore <= score {
 			if result[tmpscore] != nil && tmpscore != score {
@@ -562,7 +580,7 @@ func GetPCDByAreaDFA(province, acd string, e *ExtractTask, j *ju.Job, flag bool)
 			if province == acd || pbMap.Name == acd { //用于判断area_city_district是否只有省份信息,flag为true就不在匹配area_city_district中的city和district
 				flag = true
 			}
-			PCDScore(j, "province", province, 5)
+			PCDScore(j, "province", province, 5, true)
 		}
 	}
 	return province, flag
@@ -581,12 +599,12 @@ func GetPCDByCityDFA(province, city, acd string, e *ExtractTask, j *ju.Job, flag
 					} else if province == "" { //acd有city;city和district信息
 						city = cfMap.Name
 						province = cfMap.P.Brief
-						PCDScore(j, "province", province, 5)
+						PCDScore(j, "province", province, 5, true)
 						if acd == city {
 							flag = true
 						}
 					}
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 					break
 				}
 			} else { //简称
@@ -599,12 +617,12 @@ func GetPCDByCityDFA(province, city, acd string, e *ExtractTask, j *ju.Job, flag
 					} else if province == "" {
 						city = cbMap.Name
 						province = cbMap.P.Brief
-						PCDScore(j, "province", province, 5)
+						PCDScore(j, "province", province, 5, true)
 						if acd == city {
 							flag = true
 						}
 					}
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 					break
 				}
 			}
@@ -623,14 +641,14 @@ func GetPCDByDistrictDFA(province, city, district, acd string, e *ExtractTask, j
 				}
 				if city == "" && dcMap.P.Brief == province { //只有province和district(are_city_district:河南省二七区)
 					city = dcMap.Name
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 				} else if province == "" { //province和city都没有(are_city_district:二七区)
 					city = dcMap.Name
 					province = dcMap.P.Brief
-					PCDScore(j, "city", city, 5)
-					PCDScore(j, "province", province, 5)
+					PCDScore(j, "city", city, 5, true)
+					PCDScore(j, "province", province, 5, true)
 				}
-				PCDScore(j, "district", district, 5)
+				PCDScore(j, "district", district, 5, true)
 				break
 			}
 		}

+ 299 - 198
src/jy/extract/newextractcity.go

@@ -8,7 +8,10 @@ import (
 	"strings"
 )
 
-var AgencyReg = regexp.MustCompile("((代理机构|中标供应商).{0,30}|.{2,15}((招标)?代理|咨询|政府采购))")
+var AgencyReg = []*regexp.Regexp{
+	regexp.MustCompile("(?s)(代理(机构|人|单位|公司)|中标供应商).{0,30}"),
+	regexp.MustCompile(".{2,15}((招标)?代理|咨询|政府采购)"),
+}
 
 //抽取city
 func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}, id string) {
@@ -26,37 +29,46 @@ func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}
 	defer qu.Catch()
 
 	//初始化
-	if j.AreaScore == nil {
-		j.AreaScore = make(map[string]int)
+	if j.FullAreaScore == nil {
+		j.FullAreaScore = make(map[string]float64)
+	}
+	if j.FullCityScore == nil {
+		j.FullCityScore = make(map[string]float64)
+	}
+	if j.FullDistrictScore == nil {
+		j.FullDistrictScore = make(map[string]float64)
+	}
+	if j.SimAreaScore == nil {
+		j.SimAreaScore = make(map[string]float64)
 	}
-	if j.CityScore == nil {
-		j.CityScore = make(map[string]int)
+	if j.SimCityScore == nil {
+		j.SimCityScore = make(map[string]float64)
 	}
-	if j.DistrictScore == nil {
-		j.DistrictScore = make(map[string]int)
+	if j.SimDistrictScore == nil {
+		j.SimDistrictScore = make(map[string]float64)
 	}
 	//记录区或县简称匹配的p、c、d的得分;如果全称匹配和p、c简称匹配的有结果,再将得分合并,否则舍弃
-	pscore := make(map[string]int)
-	cscore := make(map[string]int)
-	dscore := make(map[string]int)
+	pscore := make(map[string]float64)
+	cscore := make(map[string]float64)
+	dscore := make(map[string]float64)
 
 	sm := NewSortMap()
 	//1.jsondata抽取
 	e.NewGetCityByJsonData(j)
-	//qu.Debug("jsondata打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//qu.Debug("jsondata打分后结果---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
 	//2.site库抽取
 	e.NewGetCityBySite(j)
-	//qu.Debug("site打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//qu.Debug("site打分后结果---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
 	//3.采购单位库抽取(暂时没有采购单位库)
 	//buyer, _ := resulttmp["buyer"].(string)
 	//4.postcode邮编抽取
 	buyerzipcode, _ := resulttmp["buyerzipcode"].(string)
 	e.NewGetCityByPostCode(j, buyerzipcode)
-	//qu.Debug("邮编打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//qu.Debug("邮编打分后结果---", buyerzipcode, j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
 	//5.areacode固话区号抽取
 	buyertel, _ := resulttmp["buyertel"].(string)
 	e.NewGetCityByAreaCode(j, buyertel)
-	//qu.Debug("固话打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//qu.Debug("固话打分后结果---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
 	//6.buyeraddr,title,projectname抽取
 	buyeraddr, _ := resulttmp["buyeraddr"].(string)
 	title, _ := resulttmp["title"].(string)
@@ -64,24 +76,40 @@ func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}
 	buyer, _ := resulttmp["buyer"].(string)
 	//qu.Debug("buyeraddr--", buyeraddr, "--buyer--", buyer, "--title--", title, "--projectname--", projectname)
 	sm.AddKey("buyeraddr", buyeraddr)
+	sm.AddKey("buyer", buyer)
 	sm.AddKey("title", title)
 	sm.AddKey("projectname", projectname)
-	sm.AddKey("buyer", buyer)
+	//7.buyeraddr buyer title projectname抽取
 	e.NewGetCityByOthers(j, sm, &pscore, &cscore, &dscore)
-	//qu.Debug("打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
-	//7.detail抽取
-	if len(j.AreaScore) > 0 {
+	//qu.Debug("全称打分后结果---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
+	//qu.Debug("简称打分后结果---", j.SimAreaScore, j.SimCityScore, j.SimDistrictScore)
+	//全称简称得分合并
+	MergeFullSimScore(j) //合并buyer buyeraddr title projectname全称简称
+	//qu.Debug("全称简称合并后---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
+	//合并区简称得分
+	//qu.Debug("pcd=====", pscore, cscore, dscore)
+	MergeScores(j, &pscore, &cscore, &dscore) //合并区简称匹配的pcd
+	//qu.Debug("合并区简称打分后结果---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
+
+	j.SimAreaScore = map[string]float64{}
+	j.SimCityScore = map[string]float64{}
+	j.SimDistrictScore = map[string]float64{}
+
+	//8.detail抽取
+	if len(j.FullAreaScore) > 0 && len(j.FullCityScore) > 0 { //以上抽取有省有市再从detail中抽取进行判断
 		e.NewGetCityByDetail(j)
 	}
-	//qu.Debug("detail打分后---", j.AreaScore, j.CityScore, j.DistrictScore)
-	//合并得分
-	//qu.Debug("pcd=====", pscore, cscore, dscore)
-	MergeScores(j, &pscore, &cscore, &dscore)
-	//qu.Debug("合并打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//qu.Debug("detail打分后全称---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
+	//qu.Debug("detail打分后简称---", j.SimAreaScore, j.SimCityScore, j.SimDistrictScore)
+	MergeFullSimScore(j) //合并detail的全简称
+	//qu.Debug("detail合并后---", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
 
-	finishP := HighestScoreArr(j.AreaScore)
-	finishC := HighestScoreArr(j.CityScore)
-	finishD := HighestScoreArr(j.DistrictScore)
+	finishP := HighestScoreArr(j.FullAreaScore) //获取最高分的省
+	e.RemoveCD(finishP, j)                      //将city中所属干扰项省的city去除,同时去除district.5d2bd4aba5cb26b9b769d18e
+	//qu.Debug("去除干扰项后的city和district得分---", finishP, j.FullCityScore, j.FullDistrictScore)
+	//获取结果
+	finishC := HighestScoreArr(j.FullCityScore)
+	finishD := HighestScoreArr(j.FullDistrictScore)
 	arearesult := ""
 	cityresult := ""
 	districtresult := ""
@@ -112,6 +140,9 @@ func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}
 	//直辖市
 	if arearesult == "北京" {
 		cityresult = "北京市"
+		if districtresult == "北京朝阳" { //特殊情况(北京朝阳中西医结合急诊抢救中心:5a84079740d2d9bbe88bad90)
+			districtresult = "朝阳区"
+		}
 	} else if arearesult == "天津" {
 		cityresult = "天津市"
 	} else if arearesult == "上海" {
@@ -146,7 +177,7 @@ func (e *ExtractTask) NewGetCityByJsonData(j *ju.Job) (province, city, district,
 		province, _ = jsondata["area"].(string)     //province简称
 		district, _ = jsondata["district"].(string) //district全称
 	}
-	PCDScore(j, "district", district, 5) //district打分
+	PCDScore(j, "district", district, 5, true) //district打分
 	bp := false
 	if province != "" {
 		if e.ProvinceBriefMap[province] != nil { //判断爬虫的省份是否正确 (全国)
@@ -168,16 +199,16 @@ func (e *ExtractTask) NewGetCityByJsonData(j *ju.Job) (province, city, district,
 	}
 	if bp {
 		if pbrief == province { //爬虫的province和city匹配
-			PCDScore(j, "city", city, 5)
+			PCDScore(j, "city", city, 5, true)
 		} else { //pbrief不匹配province(此时city为空或者错误)
 			city = ""
 		}
-		PCDScore(j, "province", province, 5)
+		PCDScore(j, "province", province, 5, true)
 	} else { //省份错误或为空,取city的对应的pbrief为province
 		if pbrief != "" {
 			province = pbrief
-			PCDScore(j, "province", province, 5)
-			PCDScore(j, "city", city, 5)
+			PCDScore(j, "province", province, 5, true)
+			PCDScore(j, "city", city, 5, true)
 		} else {
 			province = ""
 			city = ""
@@ -195,7 +226,7 @@ func GetByACDFullJb(pbrief, city, district, a_c_d string, e *ExtractTask, j *ju.
 		if e.Trie_Full_Province.Get(full) { //a_c_d有province全称
 			if tmpPbrief := e.ProvinceMap[full]; tmpPbrief != "" {
 				pbrief = tmpPbrief //省简称
-				PCDScore(j, "province", pbrief, 5)
+				PCDScore(j, "province", pbrief, 5, true)
 			}
 		} else if e.Trie_Full_City.Get(full) { //a_c_d有city全称
 			if cfMap := e.CityFullMap[full]; cfMap != nil {
@@ -203,35 +234,35 @@ func GetByACDFullJb(pbrief, city, district, a_c_d string, e *ExtractTask, j *ju.
 				tmpPbrief := cfMap.P.Brief               //省简称
 				if pbrief != "" && pbrief == tmpPbrief { //已获取省简称
 					city = tmpcity
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 				} else if pbrief == "" {
 					city = tmpcity
 					pbrief = tmpPbrief
-					PCDScore(j, "city", city, 5)
-					PCDScore(j, "province", pbrief, 5)
+					PCDScore(j, "city", city, 5, true)
+					PCDScore(j, "province", pbrief, 5, true)
 				}
 			}
 		} else if e.Trie_Full_District.Get(full) { //a_c_d有district全称(district可能对应多个城市)
 			carr := e.NewDistrictCityMap[full]
 			if len(carr) > 0 {
 				district = full
-				PCDScore(j, "district", district, 5)
+				PCDScore(j, "district", district, 5, true)
 				for _, c := range carr {
 					tmpcity := c.Name      //城市全称
 					tmpPbrief := c.P.Brief //省简称
 					if pbrief == "" {      //之前没有匹配到省份
-						PCDScore(j, "city", tmpcity, 5)
+						PCDScore(j, "city", tmpcity, 5, true)
 						if !repeatPb[tmpPbrief] {
-							PCDScore(j, "province", tmpPbrief, 5)
+							PCDScore(j, "province", tmpPbrief, 5, true)
 							repeatPb[tmpPbrief] = true
 						}
 					} else { //已有省份
 						if pbrief != tmpPbrief { //区对应的多个城市,与之前匹配结果不一致,认为是干扰项
-							PCDScore(j, "city", tmpcity, -5)
-							PCDScore(j, "province", tmpPbrief, -5)
+							PCDScore(j, "city", tmpcity, -5, true)
+							PCDScore(j, "province", tmpPbrief, -5, true)
 						} else { //与之前匹配结果一致
 							if city == "" { //这种情况是处理area_city_district:(河南省二七区),city在前两步匹配不到,在这里通过district补充
-								PCDScore(j, "city", tmpcity, 5)
+								PCDScore(j, "city", tmpcity, 5, true)
 							}
 						}
 					}
@@ -250,7 +281,7 @@ func GetByACDSimJb(pbrief, city, district, a_c_d string, e *ExtractTask, j *ju.J
 		if pbrief == "" && e.Trie_Sim_Province.Get(sim) { //全称未匹配到确定的province
 			if pbMap := e.ProvinceBriefMap[sim]; pbMap != nil {
 				pbrief = pbMap.Brief
-				PCDScore(j, "province", pbrief, 5) //打分
+				PCDScore(j, "province", pbrief, 5, true) //打分
 				//PCDSubtractScore(e, j, "province", pbrief, -5) //减分(area_city_district:河南鼓楼区)
 			}
 		} else if city == "" && e.Trie_Sim_City.Get(sim) { //全称未匹配到确定的city
@@ -259,36 +290,36 @@ func GetByACDSimJb(pbrief, city, district, a_c_d string, e *ExtractTask, j *ju.J
 				tmpPbrief := cbMap.P.Brief
 				if pbrief != "" && pbrief == tmpPbrief {
 					city = tmpcity
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 				} else if pbrief == "" {
 					city = tmpcity
 					pbrief = tmpPbrief
-					PCDScore(j, "city", city, 5)
-					PCDScore(j, "province", pbrief, 5)
+					PCDScore(j, "city", city, 5, true)
+					PCDScore(j, "province", pbrief, 5, true)
 					//PCDSubtractScore(e, j, "city", tmpcity, -5) //减分(area_city_district:开封鼓楼区)
 				}
 			}
 		} else if district == "" && e.Trie_Sim_District.Get(sim) { //全称未匹配到确定的district
 			dfullarr := e.NewDistrictSimAndAll[sim]
 			if len(dfullarr) > 0 {
-				PCDScore(j, "district", sim, 5)
+				PCDScore(j, "district", sim, 5, true)
 				for _, dfullAndCity := range dfullarr { //district简称对应的所有全称
 					for _, c := range dfullAndCity {
 						tmpcity := c.Name      //城市全称
 						tmpPbrief := c.P.Brief //省简称
 						if pbrief == "" {      //之前没有匹配到省份
-							PCDScore(j, "city", tmpcity, 5)
+							PCDScore(j, "city", tmpcity, 5, true)
 							if !repeatPb[tmpPbrief] {
-								PCDScore(j, "province", tmpPbrief, 5)
+								PCDScore(j, "province", tmpPbrief, 5, true)
 								repeatPb[tmpPbrief] = true
 							}
 						} else { //已有省份
 							if pbrief != tmpPbrief { //区对应的多个城市,与之前匹配结果不一致,认为是干扰项
-								PCDScore(j, "city", tmpcity, -5)
-								PCDScore(j, "province", tmpPbrief, -5)
+								PCDScore(j, "city", tmpcity, -5, true)
+								PCDScore(j, "province", tmpPbrief, -5, true)
 							} else { //与之前匹配结果一致
 								if city == "" { //这种情况是处理area_city_district:(河南省二七区),city在前两步匹配不到,在这里通过district补充
-									PCDScore(j, "city", tmpcity, 5)
+									PCDScore(j, "city", tmpcity, 5, true)
 								}
 							}
 						}
@@ -305,13 +336,13 @@ func (e *ExtractTask) NewGetCityBySite(j *ju.Job) {
 	//qu.Debug("site--------", site)
 	if scMap := e.SiteCityMap[site]; scMap != nil {
 		if scMap.P != "" && scMap.P != "全国" && scMap.P != "null" {
-			PCDScore(j, "province", scMap.P, 5)
+			PCDScore(j, "province", scMap.P, 5, true)
 		}
 		if scMap.C != "" && scMap.C != "null" {
-			PCDScore(j, "city", scMap.C, 5)
+			PCDScore(j, "city", scMap.C, 5, true)
 		}
 		if scMap.D != "" && scMap.D != "null" {
-			PCDScore(j, "district", scMap.D, 5)
+			PCDScore(j, "district", scMap.D, 5, true)
 		}
 	}
 }
@@ -324,15 +355,15 @@ func (e *ExtractTask) NewGetCityByPostCode(j *ju.Job, postcode string) (province
 		province = pc.P
 		city = pc.C
 		districtTmp := pc.D //邮编可能对应多个区
-		score := 3
+		score := 3.0
 		if len(districtTmp) == 1 && districtTmp[0] != "" {
-			score = 5
+			score = 5.0
 		}
 		for _, district := range districtTmp {
-			PCDScore(j, "district", district, score)
+			PCDScore(j, "district", district, score, true)
 		}
-		PCDScore(j, "province", province, 5)
-		PCDScore(j, "city", city, 5)
+		PCDScore(j, "province", province, 5, true)
+		PCDScore(j, "city", city, 5, true)
 	}
 	return
 }
@@ -351,36 +382,40 @@ func (e *ExtractTask) NewGetCityByAreaCode(j *ju.Job, buyertel string) (province
 				citytmp := ac.C
 				if len(citytmp) == 1 { //对应多个city舍去
 					city = citytmp[0]
-					PCDScore(j, "city", city, 5)
+					PCDScore(j, "city", city, 5, true)
 				}
-				PCDScore(j, "province", province, 5)
+				PCDScore(j, "province", province, 5, true)
 			} else {
 				n = n - 1
 				if n >= 3 {
 					goto L
 				}
 			}
-		} else if buyertel[:3] == "853" { //澳门
+		} /* else if buyertel[:3] == "853" { //澳门
 			province = "澳门"
 			city = "澳门"
-			PCDScore(j, "province", province, 5)
-			PCDScore(j, "city", city, 5)
-		}
+			PCDScore(j, "province", province, 5, true)
+			PCDScore(j, "city", city, 5, true)
+		}*/
 	}
 	return
 }
 
-func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore, dscore *map[string]int) {
+func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore, dscore *map[string]float64) {
 	/*
 		1.对字段进行分词
 		2.省、市、区、街道、居委会全称进行匹配打分
 		3.省、市、区简称进行匹配打分
 	*/
-	for _, from := range sm.Keys { //buyeraddr;title;projectname
+	ts := 0.5
+	for i, from := range sm.Keys { //buyer;buyeraddr;title;projectname
+		if i > 1 {
+			ts = 0.2
+		}
 		p_full, c_full, d_full, p_sim, c_sim, d_sim := "", "", "", "", "", "" //每个字段抽取的时候重新定义该字段抽取的province,city,district
 		str, _ := sm.Map[from].(string)
 		jbText := e.Seg_SV.Cut(str, true)
-		for _, text := range jbText { //结巴分词
+		for _, text := range jbText {
 			if len([]rune(text)) == 1 {
 				continue
 			}
@@ -391,7 +426,7 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
 					if pos_full == 0 && p_full == "" { //省全称
 						if tmpPbrief := e.ProvinceMap[text]; tmpPbrief != "" { //取简称
 							p_full = tmpPbrief
-							PCDScore(j, "province", p_full, 4)
+							PCDScore(j, "province", p_full, 4+ts, true)
 							break
 						}
 					} else if pos_full == 1 && c_full == "" { //市全称
@@ -400,12 +435,12 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
 							if p_full == "" {
 								p_full = tmpPbrief
 								c_full = cfMap.Name
-								PCDScore(j, "province", p_full, 4)
-								PCDScore(j, "city", c_full, 4)
+								PCDScore(j, "province", p_full, 4+ts, true)
+								PCDScore(j, "city", c_full, 4+ts, true)
 								break
 							} else if p_full == tmpPbrief {
 								c_full = cfMap.Name
-								PCDScore(j, "city", c_full, 4)
+								PCDScore(j, "city", c_full, 4+ts, true)
 								break
 							} else if p_full != "" && p_full != tmpPbrief {
 								//city不做处理
@@ -422,7 +457,7 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
 								d_full = text
 								if c_full == "" {
 									c_full = c.Name
-									PCDScore(j, "city", c_full, 4)
+									PCDScore(j, "city", c_full, 4+ts, true)
 								}
 								isOk = true
 								districtOk = true
@@ -432,30 +467,30 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
 									p_full = tmpPbrief
 									c_full = c.Name
 									d_full = text
-									PCDScore(j, "province", p_full, 4)
-									PCDScore(j, "city", c_full, 4)
+									PCDScore(j, "province", p_full, 4+ts, true)
+									PCDScore(j, "city", c_full, 4+ts, true)
 									isOk = true
 								} else { //多个city,只打分,不赋值
 									if !repeatPb[tmpPbrief] {
-										PCDScore(j, "province", tmpPbrief, 2)
+										PCDScore(j, "province", tmpPbrief, 2+ts, true)
 										repeatPb[tmpPbrief] = true
 									}
-									//PCDScore(j, "province", tmpPbrief, 2)
-									PCDScore(j, "city", c.Name, 2)
+									//PCDScore(j, "province", tmpPbrief, 2, true)
+									PCDScore(j, "city", c.Name, 2+ts, true)
 								}
 							} else if p_full != "" && p_full != tmpPbrief { //干扰项减分
 								if !repeatPb[tmpPbrief] {
-									PCDScore(j, "province", tmpPbrief, -5)
+									PCDScore(j, "province", tmpPbrief, -5, true)
 									repeatPb[tmpPbrief] = true
 								}
-								//PCDScore(j, "province", tmpPbrief, -5)
-								PCDScore(j, "city", c.Name, -5)
+								//PCDScore(j, "province", tmpPbrief, -5, true)
+								PCDScore(j, "city", c.Name, -5, true)
 							}
 						}
 						if districtOk {
-							PCDScore(j, "district", text, 4)
+							PCDScore(j, "district", text, 4+ts, true)
 						} else {
-							PCDScore(j, "district", text, -5)
+							PCDScore(j, "district", text, -5, true)
 						}
 						if isOk {
 							break
@@ -463,40 +498,46 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
 					} else if pos_full == 3 { //街道全称
 						districts := e.NewStreetDistrictMap[text]
 						if len(districts) == 1 { //街道唯一
-							DealMultipleDistrict(e, j, districts, 2, p_full, nil, nil, nil)
+							DealMultipleDistrict(e, j, districts, 2+ts, p_full, nil, nil, nil)
 						}
 					} else if pos_full == 4 { //居委会全称
 						districts := e.CommunityDistrictMap[text]
 						if len(districts) == 1 { //居委会唯一
-							DealMultipleDistrict(e, j, districts, 2, p_full, nil, nil, nil)
+							DealMultipleDistrict(e, j, districts, 2+ts, p_full, nil, nil, nil)
 						}
 					}
 				}
 			}
-			//qu.Debug("全称后--", j.AreaScore, j.CityScore, j.DistrictScore)
+			//qu.Debug("全称后--", j.FullAreaScore, j.FullCityScore, j.FullDistrictScore)
 			//简称匹配
 			for pos_sim, trie_sim := range e.Trie_Sims {
 				if trie_sim.Get(text) {
 					if pos_sim == 0 && p_sim == "" { //省简称
 						p_sim = text
-						PCDScore(j, "province", p_sim, 3)
+						PCDScore(j, "province", p_sim, 3+ts, false)
 						break
-					} else if pos_sim == 1 && c_sim == "" { //市简称
+					} else if pos_sim == 1 { //市简称
 						if cbMap := e.CityBriefMap[text]; cbMap != nil {
 							tmpPbrief := cbMap.P.Brief
 							if p_sim == "" {
+								score := 2.0 + ts
+								if tmpPbrief == p_full {
+									score += 1.0
+								}
 								p_sim = tmpPbrief
 								c_sim = cbMap.Brief
-								PCDScore(j, "province", p_sim, 2)
-								PCDScore(j, "city", cbMap.Name, 2)
+								PCDScore(j, "province", p_sim, score, false)
+								PCDScore(j, "city", cbMap.Name, score, false)
 								break
 							} else if p_sim == tmpPbrief {
 								c_sim = cbMap.Brief
-								PCDScore(j, "city", cbMap.Name, 3)
+								PCDScore(j, "city", cbMap.Name, 3+ts, false)
 								break
-							} else if p_sim != "" && p_sim != tmpPbrief { //北京师范大学广州实验学校
-								PCDScore(j, "province", tmpPbrief, 1)
-								PCDScore(j, "city", cbMap.Name, 1)
+							} else if p_sim != "" && p_sim != tmpPbrief { //上海宝冶集团有限公司南京分公司 北京朝阳中西医结合急诊抢救中心
+								delete(j.SimAreaScore, p_sim)
+								p_sim = text
+								PCDScore(j, "province", tmpPbrief, 3+ts, false)
+								PCDScore(j, "city", cbMap.Name, 3+ts, false)
 							}
 						}
 					} else if pos_sim == 2 && d_sim == "" { //区简称
@@ -506,49 +547,53 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
 						for _, dfull_city := range dfull_citys {
 							for dfull, c := range dfull_city { //dfull:简称对应的全称
 								tmpPbrief := c.P.Brief
-								if p_sim == tmpPbrief { //省份一致
+								if p_sim == tmpPbrief || p_full == tmpPbrief { //省份一致
 									d_sim = text
-									PCDScore(j, "district", dfull, 2)
+									PCDScore(j, "district", dfull, 2+ts, false)
 									if c_sim == "" {
 										c_sim = c.Brief
-										PCDScore(j, "city", c.Name, 2)
+										PCDScore(j, "city", c.Name, 2+ts, false)
 									}
 								} else if p_sim == "" {
 									if !repeatDb[dfull] {
-										PCDScoreByDistrictSim("d", dfull, 1, pscore, cscore, dscore)
+										PCDScoreByDistrictSim("d", dfull, 1+ts, pscore, cscore, dscore)
 										repeatDb[dfull] = true
 									}
 									if len(dfull_citys) == 1 {
-										PCDScoreByDistrictSim("p", tmpPbrief, 1, pscore, cscore, dscore)
-										PCDScoreByDistrictSim("c", c.Name, 1, pscore, cscore, dscore)
+										PCDScoreByDistrictSim("p", tmpPbrief, 1+ts, pscore, cscore, dscore)
+										PCDScoreByDistrictSim("c", c.Name, 1+ts, pscore, cscore, dscore)
 									} else {
 										if !repeatPb[tmpPbrief] {
-											PCDScoreByDistrictSim("p", tmpPbrief, 1, pscore, cscore, dscore)
+											PCDScoreByDistrictSim("p", tmpPbrief, 1+ts, pscore, cscore, dscore)
 											repeatPb[tmpPbrief] = true
 										}
-										PCDScoreByDistrictSim("c", c.Name, 1, pscore, cscore, dscore)
+										PCDScoreByDistrictSim("c", c.Name, 1+ts, pscore, cscore, dscore)
 									}
 								} else if p_sim != "" && p_sim != tmpPbrief {
 									if !repeatPb[tmpPbrief] {
-										PCDScoreByDistrictSim("p", tmpPbrief, -5, pscore, cscore, dscore)
+										PCDScoreByDistrictSim("p", tmpPbrief, ts, pscore, cscore, dscore)
 										repeatPb[tmpPbrief] = true
 									}
-									PCDScoreByDistrictSim("c", c.Name, -5, pscore, cscore, dscore)
+									PCDScoreByDistrictSim("c", c.Name, ts, pscore, cscore, dscore)
+									PCDScoreByDistrictSim("d", dfull, ts, pscore, cscore, dscore)
 								}
 							}
 						}
 					}
 				}
 			}
-			//qu.Debug("简称后--", j.AreaScore, j.CityScore, j.DistrictScore)
+			//qu.Debug("简称后--", j.SimAreaScore, j.SimCityScore, j.SimDistrictScore)
 		}
 	}
 }
 
 func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
-	repeatP := map[string]bool{}
-	repeatC := map[string]bool{}
-	repeatD := map[string]bool{}
+	repeatP_full := map[string]bool{}
+	repeatC_full := map[string]bool{}
+	repeatD_full := map[string]bool{}
+	repeatP_sim := map[string]bool{}
+	repeatC_sim := map[string]bool{}
+	repeatD_sim := map[string]bool{}
 	detailRune := []rune(j.Content)
 	detail := j.Content
 	if len(detailRune) > 600 {
@@ -556,45 +601,47 @@ func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
 		end := detailRune[len(detailRune)-300:]
 		detail = string(start) + string(end)
 	}
-	detail = AgencyReg.ReplaceAllString(detail, "")
+	for _, reg := range AgencyReg {
+		detail = reg.ReplaceAllString(detail, "")
+	}
 	for _, text := range e.Seg_SV.Cut(detail, true) {
 		if len([]rune(text)) > 1 {
 			//全称匹配
 			for pos_full, trie_full := range e.Trie_Fulls {
 				if trie_full.Get(text) {
 					if pos_full == 0 { //省全称
-						if tmpPbrief := e.ProvinceMap[text]; tmpPbrief != "" && !repeatP[tmpPbrief] { //取简称
-							PCDScore(j, "province", tmpPbrief, 1)
-							repeatP[tmpPbrief] = true
+						if tmpPbrief := e.ProvinceMap[text]; tmpPbrief != "" && !repeatP_full[tmpPbrief] { //取简称
+							PCDScore(j, "province", tmpPbrief, 1, true)
+							repeatP_full[tmpPbrief] = true
 							break
 						}
 					} else if pos_full == 1 { //市全称
 						if cfMap := e.CityFullMap[text]; cfMap != nil {
-							if !repeatP[cfMap.P.Brief] {
-								PCDScore(j, "province", cfMap.P.Brief, 1)
-								repeatP[cfMap.P.Brief] = true
+							if !repeatP_full[cfMap.P.Brief] {
+								PCDScore(j, "province", cfMap.P.Brief, 1, true)
+								repeatP_full[cfMap.P.Brief] = true
 							}
-							if !repeatC[cfMap.Name] {
-								PCDScore(j, "city", cfMap.Name, 1)
-								repeatC[cfMap.Name] = true
+							if !repeatC_full[cfMap.Name] {
+								PCDScore(j, "city", cfMap.Name, 1, true)
+								repeatC_full[cfMap.Name] = true
 							}
 							break
 						}
 					} else if pos_full == 2 { //区全称
 						citys := e.NewDistrictCityMap[text]
 						if len(citys) > 0 {
-							if !repeatD[text] {
-								PCDScore(j, "district", text, 1)
-								repeatD[text] = true
+							if !repeatD_full[text] {
+								PCDScore(j, "district", text, 1, true)
+								repeatD_full[text] = true
 							}
 							for _, c := range citys {
-								if !repeatC[c.Name] {
-									PCDScore(j, "city", c.Name, 1)
-									repeatC[c.Name] = true
+								if !repeatC_full[c.Name] {
+									PCDScore(j, "city", c.Name, 1, true)
+									repeatC_full[c.Name] = true
 								}
-								if !repeatP[c.P.Brief] {
-									PCDScore(j, "province", c.P.Brief, 1)
-									repeatP[c.P.Brief] = true
+								if !repeatP_full[c.P.Brief] {
+									PCDScore(j, "province", c.P.Brief, 1, true)
+									repeatP_full[c.P.Brief] = true
 								}
 							}
 							break
@@ -602,12 +649,12 @@ func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
 					} else if pos_full == 3 { //街道全称
 						districts := e.NewStreetDistrictMap[text]
 						if len(districts) == 1 {
-							DealMultipleDistrict(e, j, districts, 1, "", &repeatP, &repeatC, &repeatD)
+							DealMultipleDistrict(e, j, districts, 1, "", &repeatP_full, &repeatC_full, &repeatD_full)
 						}
 					} else if pos_full == 4 { //居委会全称
 						districts := e.CommunityDistrictMap[text]
 						if len(districts) == 1 {
-							DealMultipleDistrict(e, j, districts, 1, "", &repeatP, &repeatC, &repeatD)
+							DealMultipleDistrict(e, j, districts, 1, "", &repeatP_full, &repeatC_full, &repeatD_full)
 						}
 					}
 				}
@@ -616,34 +663,43 @@ func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
 			//简称匹配
 			for pos_sim, trie_sim := range e.Trie_Sims {
 				if trie_sim.Get(text) {
-					if pos_sim == 0 && !repeatP[text] { //省简称
-						PCDScore(j, "province", text, 1)
-						repeatP[text] = true
+					if pos_sim == 0 && !repeatP_sim[text] { //省简称
+						PCDScore(j, "province", text, 1, false)
+						repeatP_sim[text] = true
 						break
 					} else if pos_sim == 1 { //市简称
 						if cbMap := e.CityBriefMap[text]; cbMap != nil {
-							if !repeatP[cbMap.P.Brief] {
-								PCDScore(j, "province", cbMap.P.Brief, 1)
-								repeatP[cbMap.P.Brief] = true
+							if !repeatP_sim[cbMap.P.Brief] {
+								PCDScore(j, "province", cbMap.P.Brief, 1, false)
+								repeatP_sim[cbMap.P.Brief] = true
 							}
-							if !repeatC[cbMap.Name] {
-								PCDScore(j, "city", cbMap.Name, 1)
-								repeatC[cbMap.Name] = true
+							if !repeatC_sim[cbMap.Name] {
+								PCDScore(j, "city", cbMap.Name, 1, false)
+								repeatC_sim[cbMap.Name] = true
 							}
 							break
 						}
-					} /* else if pos_sim == 2 { //区简称
-						repeatDb := map[string]bool{}
+					} else if pos_sim == 2 { //区简称
 						dfull_citys := e.NewDistrictSimAndAll[text]
-						for _, dfull_city := range dfull_citys {
-							for dfull, _ := range dfull_city { //dfull:简称对应的全称
-								if !repeatDb[dfull] {
-									PCDScore(j, "district", dfull, 1)
-									repeatDb[dfull] = true
+						if len(dfull_citys) == 1 {
+							for _, dfull_city := range dfull_citys {
+								for dfull, ctmp := range dfull_city { //dfull:简称对应的全称
+									if !repeatD_sim[dfull] {
+										PCDScore(j, "district", dfull, 1, false)
+										repeatD_sim[dfull] = true
+									}
+									if !repeatC_sim[ctmp.Name] {
+										PCDScore(j, "city", ctmp.Name, 1, false)
+										repeatC_sim[ctmp.Name] = true
+									}
+									if !repeatP_sim[ctmp.P.Brief] {
+										PCDScore(j, "province", ctmp.P.Brief, 1, false)
+										repeatP_sim[ctmp.P.Brief] = true
+									}
 								}
 							}
 						}
-					}*/
+					}
 				}
 			}
 			//qu.Debug("detail 简称---", j.AreaScore, j.CityScore, j.DistrictScore)
@@ -652,57 +708,36 @@ func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
 }
 
 //街道、居委会对应多地市处理
-func DealMultipleDistrict(e *ExtractTask, j *ju.Job, districts []*District, score int, pbrief string, repeatP, repeatC, repeatD *map[string]bool) {
+func DealMultipleDistrict(e *ExtractTask, j *ju.Job, districts []*District, score float64, pbrief string, repeatP, repeatC, repeatD *map[string]bool) {
 	if len(districts) == 1 {
 		district := districts[0]
 		city := district.C.Name
 		tmpPbrief := district.C.P.Brief
 		if pbrief != "" && tmpPbrief == pbrief {
-			PCDScore(j, "province", tmpPbrief, score)
-			PCDScore(j, "city", city, score)
-			PCDScore(j, "district", district.Name, score)
+			PCDScore(j, "province", tmpPbrief, score, true)
+			PCDScore(j, "city", city, score, true)
+			PCDScore(j, "district", district.Name, score, true)
 		} else if pbrief == "" {
 			if repeatP != nil && !(*repeatP)[tmpPbrief] {
-				PCDScore(j, "province", tmpPbrief, score)
+				PCDScore(j, "province", tmpPbrief, score, true)
 				(*repeatP)[tmpPbrief] = true
 			} else if repeatP == nil {
-				PCDScore(j, "province", tmpPbrief, score)
+				PCDScore(j, "province", tmpPbrief, score, true)
 			}
 			if repeatC != nil && !(*repeatC)[city] {
-				PCDScore(j, "city", city, score)
+				PCDScore(j, "city", city, score, true)
 				(*repeatC)[city] = true
 			} else if repeatC == nil {
-				PCDScore(j, "city", city, score)
+				PCDScore(j, "city", city, score, true)
 			}
 			if repeatD != nil && !(*repeatD)[tmpPbrief] {
-				PCDScore(j, "district", district.Name, score)
+				PCDScore(j, "district", district.Name, score, true)
 				(*repeatD)[district.Name] = true
 			} else if repeatD == nil {
-				PCDScore(j, "district", district.Name, score)
+				PCDScore(j, "district", district.Name, score, true)
 			}
 		}
 	}
-
-	//	repeatPb := map[string]bool{}
-	//	repeatCb := map[string]bool{}
-	//	repeatDb := map[string]bool{}
-	//	for _, district := range districts {
-	//		tmpDistrict := district.Name
-	//		tmpCity := district.C.Name
-	//		tmpPbrief := district.C.P.Brief
-	//		if !repeatPb[tmpPbrief] {
-	//			PCDScore(j, "province", tmpPbrief, score)
-	//			repeatPb[tmpPbrief] = true
-	//		}
-	//		if !repeatCb[tmpCity] {
-	//			PCDScore(j, "city", tmpCity, score)
-	//			repeatCb[tmpCity] = true
-	//		}
-	//		if !repeatDb[tmpDistrict] {
-	//			PCDScore(j, "district", tmpDistrict, score)
-	//			repeatDb[tmpDistrict] = true
-	//		}
-	//	}
 }
 
 func NewGetCity(area, city string, e *ExtractTask, finishC, tmpcity []string) (string, []string) {
@@ -736,7 +771,7 @@ func NewGetDistrict(area, city, district string, e *ExtractTask, finishD, tmpcit
 					district = d
 					return city, district
 				}
-			} /*else { //多个city
+			} else { //多个city
 				for _, tc := range tmpcity { //多个city根据district最高分取
 					if tc == c.Name && len(finishD) == 1 {
 						city = c.Name
@@ -744,27 +779,14 @@ func NewGetDistrict(area, city, district string, e *ExtractTask, finishD, tmpcit
 						return city, district
 					}
 				}
-			}*/
-
-			//			if len(citys) == 1 { //区对应一个市
-			//				if c.P.Brief == area {
-			//					district = d
-			//					city = c.Name
-			//					return city, district
-			//				}
-			//			} else {
-			//				if c.P.Brief == area && c.Name == city {
-			//					district = d
-			//					return city, district
-			//				}
-			//			}
+			}
 		}
 	}
 	return city, district
 }
 
 //计算province,city,district区或县匹配的得分
-func PCDScoreByDistrictSim(stype, t string, score int, ps, cs, ds *map[string]int) {
+func PCDScoreByDistrictSim(stype, t string, score float64, ps, cs, ds *map[string]float64) {
 	defer qu.Catch()
 	if t != "" {
 		if stype == "d" {
@@ -780,19 +802,98 @@ func PCDScoreByDistrictSim(stype, t string, score int, ps, cs, ds *map[string]in
 	}
 }
 
-func MergeScores(j *ju.Job, pscore, cscore, dscore *map[string]int) {
-	if len(j.AreaScore) > 0 {
+func MergeScores(j *ju.Job, pscore, cscore, dscore *map[string]float64) {
+	if len(j.FullAreaScore) > 0 {
 		for pt, ps := range *pscore {
-			j.AreaScore[pt] = j.AreaScore[pt] + ps
+			j.FullAreaScore[pt] = j.FullAreaScore[pt] + ps
 		}
 		for ct, cs := range *cscore {
-			j.CityScore[ct] = j.CityScore[ct] + cs
+			j.FullCityScore[ct] = j.FullCityScore[ct] + cs
 		}
 		for dt, ds := range *dscore {
-			j.DistrictScore[dt] = j.DistrictScore[dt] + ds
+			j.FullDistrictScore[dt] = j.FullDistrictScore[dt] + ds
 		}
 	}
 }
+func MergeFullSimScore(j *ju.Job) {
+	if len(j.FullAreaScore) == 0 {
+		j.FullAreaScore = j.SimAreaScore
+	} else {
+		for p_text, p_score := range j.FullAreaScore {
+			j.FullAreaScore[p_text] = j.SimAreaScore[p_text] + p_score
+		}
+	}
+	for c_text, c_score := range j.SimCityScore {
+		j.FullCityScore[c_text] = j.FullCityScore[c_text] + c_score
+	}
+
+	for d_text, d_score := range j.SimDistrictScore {
+		j.FullDistrictScore[d_text] = j.FullDistrictScore[d_text] + d_score
+	}
+	//	if len(j.FullCityScore) == 0 {
+	//		j.FullCityScore = j.SimCityScore
+	//	} else {
+	//		for c_text, c_score := range j.FullCityScore {
+	//			j.FullCityScore[c_text] = j.SimCityScore[c_text] + c_score
+	//		}
+	//	}
+	//	if len(j.FullDistrictScore) == 0 {
+	//		j.FullDistrictScore = j.SimDistrictScore
+	//	} else {
+	//		for d_text, d_score := range j.FullDistrictScore {
+	//			j.FullDistrictScore[d_text] = j.SimDistrictScore[d_text] + d_score
+	//		}
+	//	}
+}
+
+func (e *ExtractTask) RemoveCD(finishP []string, j *ju.Job) {
+	if len(j.FullDistrictScore) > 0 {
+		for d, _ := range j.FullDistrictScore {
+			tmpCitys := e.NewDistrictCityMap[d]
+			for _, c := range tmpCitys {
+				if j.FullCityScore[c.Name] != 0 {
+					tmpPb := c.P.Brief
+					//if j.FullAreaScore[tmpPb] != 0 {
+					flag := false
+					for _, p := range finishP {
+						if tmpPb == p {
+							flag = true
+							break
+						}
+					}
+					if !flag {
+						delete(j.FullCityScore, c.Name)
+						delete(j.FullDistrictScore, d)
+					}
+					//}
+				}
+			}
+		}
+	}
+	if len(j.FullCityScore) > 0 {
+		for tmpcity, _ := range j.FullCityScore {
+			c := e.CityFullMap[tmpcity]
+			if c == nil {
+				qu.Debug("行政区划错误数据:", tmpcity, j.SourceMid)
+				continue
+			}
+			tmpPb := c.P.Brief
+			//if j.FullAreaScore[tmpPb] != 0 {
+			flag := false
+			for _, p := range finishP {
+				if tmpPb == p {
+					flag = true
+					break
+				}
+			}
+			if !flag {
+				delete(j.FullCityScore, tmpcity)
+			}
+			//}
+		}
+	}
+
+}
 
 //province,city,district干扰项减分
 //func PCDSubtractScore(e *ExtractTask, j *ju.Job, stype, text string, score int) {

+ 33 - 29
src/jy/util/article.go

@@ -6,35 +6,39 @@ import (
 
 //
 type Job struct {
-	SourceMid      string                            //数据源的MongoId
-	Category       string                            //类别
-	CategorySecond string                            //二级分类
-	Content        string                            //正文
-	Title          string                            //标题
-	SpiderCode     string                            //爬虫代码
-	Domain         string                            //网站域名
-	Href           string                            //原文链接
-	City           string                            //城市
-	Province       string                            //省份
-	Jsondata       *map[string]interface{}           //
-	Data           *map[string]interface{}           //数据库源数据
-	Block          []*Block                          //分块
-	Result         map[string][]*ExtField            //结果
-	BuyerAddr      string                            //采购单位地址
-	BlockPackage   map[string]*BlockPackage          //块中的分包
-	Winnerorder    []map[string]interface{}          //中标候选人排序
-	PackageInfo    map[string]map[string]interface{} //分包信息
-	RuleBlock      *RuleBlock                        //分块规则
-	BlockClassify  *BlockClassify                    //块分类
-	BrandData      [][]map[string]string             //
-	HasTable       int                               //有table
-	HasKey         int                               //是否匹配到table中的标题
-	HasBrand       int                               //有品牌
-	HasGoods       int                               //有商品
-	IsFile         bool                              //有附件
-	AreaScore      map[string]int                    //province得分
-	CityScore      map[string]int                    //city得分
-	DistrictScore  map[string]int                    //district得分
+	SourceMid         string                            //数据源的MongoId
+	Category          string                            //类别
+	CategorySecond    string                            //二级分类
+	Content           string                            //正文
+	Title             string                            //标题
+	SpiderCode        string                            //爬虫代码
+	Domain            string                            //网站域名
+	Href              string                            //原文链接
+	City              string                            //城市
+	Province          string                            //省份
+	Jsondata          *map[string]interface{}           //
+	Data              *map[string]interface{}           //数据库源数据
+	Block             []*Block                          //分块
+	Result            map[string][]*ExtField            //结果
+	BuyerAddr         string                            //采购单位地址
+	BlockPackage      map[string]*BlockPackage          //块中的分包
+	Winnerorder       []map[string]interface{}          //中标候选人排序
+	PackageInfo       map[string]map[string]interface{} //分包信息
+	RuleBlock         *RuleBlock                        //分块规则
+	BlockClassify     *BlockClassify                    //块分类
+	BrandData         [][]map[string]string             //
+	HasTable          int                               //有table
+	HasKey            int                               //是否匹配到table中的标题
+	HasBrand          int                               //有品牌
+	HasGoods          int                               //有商品
+	IsFile            bool                              //有附件
+	FullAreaScore     map[string]float64                //全称province得分
+	FullCityScore     map[string]float64                //全称city得分
+	FullDistrictScore map[string]float64                //全称district得分
+	SimAreaScore      map[string]float64                //简称province得分
+	SimCityScore      map[string]float64                //简称city得分
+	SimDistrictScore  map[string]float64                //简称district得分
+
 }
 
 type ExtField struct {

+ 1 - 1
src/main_test.go

@@ -28,7 +28,7 @@ func Test_han(t *testing.T) {
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
-	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5a83eab640d2d9bbe88b5711", "1", "mxs_v1", "mxs_v1")
+	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5d2bcde8a5cb26b9b7551802", "1", "mxs_v1", "mxs_v1")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }

+ 0 - 2
src/res/pcd.txt

@@ -3693,7 +3693,6 @@
 赫山 3 n
 普定 3 n
 芮城 3 n
-滨江 3 n
 黄南 3 n
 南木林 3 n
 集贤 3 n
@@ -3703,7 +3702,6 @@
 二道江 3 n
 前郭县 3 n
 噶尔 3 n
-莲花 3 n
 郑州 3 n
 托克逊 3 n
 比如 3 n

+ 8 - 7
src/res/sv.txt

@@ -3693,7 +3693,6 @@
 赫山 3 n
 普定 3 n
 芮城 3 n
-滨江 3 n
 黄南 3 n
 南木林 3 n
 集贤 3 n
@@ -3703,7 +3702,6 @@
 二道江 3 n
 前郭县 3 n
 噶尔 3 n
-莲花 3 n
 郑州 3 n
 托克逊 3 n
 比如 3 n
@@ -47260,7 +47258,7 @@
 甘竹山村委会 4 n
 三里墩社区居委会 4 n
 前从善楼村委会 4 n
-珍宝岛 4 n
+珍宝岛 4 n
 光辉村民委员会 4 n
 马双阜村委会 4 n
 五树村村民委员会 4 n
@@ -177504,6 +177502,7 @@
 大圩村委会 4 n
 打拉基庙村委会 4 n
 红旗农场 4 n
+六师红旗农场 4 n
 益农村委会 4 n
 国际港社区居委会 4 n
 榆树台村委会 4 n
@@ -206828,7 +206827,6 @@
 寺后村村民委员会 4 n
 东下肥地村民委员会 4 n
 群声村委会 4 n
-大学社区 4 n
 插花镇 4 n
 天津空港经济区三期社区 4 n
 那泔村委会 4 n
@@ -272958,7 +272956,6 @@
 董家院村委会 4 n
 环龙村委会 4 n
 瓮窑头村委会 4 n
-旅游度假区 4 n
 西高楼村民委员会 4 n
 拴马村委会 4 n
 杨英庄村委会 4 n
@@ -316738,7 +316735,8 @@
 三界镇 4 n
 明垭子村民委员会 4 n
 南梁上村委会 4 n
-外向型工业加工区 4 n
+海阳市外向型工业加工区 4 n
+海阳市旅游度假区 4 n
 业尔兴村民委员会 4 n
 三百户村村委会 4 n
 张峰社区居委会 4 n
@@ -409007,4 +409005,7 @@ IT家园社区居委会 4 n
 竹溪县原种场 4 n
 竹溪县种畜场 4 n
 余干县良种场 4 n
-德安县园艺场 4 n
+德安县园艺场 4 n
+长春西汀 4 n
+北京朝阳 5 n
+教育资源 5 n