소스 검색

加入结巴分词抽取city

maxiaoshan 6 년 전
부모
커밋
6a0f63f3f4

BIN
src/github.com/yanyiwu.zip


+ 1 - 0
src/github.com/yanyiwu/gojieba

@@ -0,0 +1 @@
+Subproject commit 52dd378dcbf762ba150a2ecd7e4208dbd710a9c3

+ 6 - 3
src/jy/extract/extract.go

@@ -52,7 +52,8 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitClearFn()
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
-		ext.InitCityDFA()
+		ext.InitCityInfo()
+		//ext.InitCityDFA()
 		ext.InitAreaCode()
 		ext.InitPostCode()
 	}
@@ -127,7 +128,8 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitClearFn()
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
-		ext.InitCityDFA()
+		//ext.InitCityDFA()
+		ext.InitCityInfo()
 		ext.InitAreaCode()
 		ext.InitPostCode()
 	}
@@ -1210,7 +1212,8 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			e.QualityAudit(tmp)
 		}
 		if e.IsExtractCity { //城市抽取
-			e.ExtractCity(j, tmp, _id)
+			//e.ExtractCity(j, tmp, _id)
+			e.NewExtractCity(j, tmp, _id)
 			//			b, p, c, d := e.TransmitData(tmp, _id) //抽取省份城市
 			//			// log.Debug("省份---", p, "城市---", c, "区---", d)
 			//			tmp["district"] = d

+ 244 - 8
src/jy/extract/extractInit.go

@@ -13,6 +13,7 @@ import (
 	"time"
 
 	log "github.com/donnie4w/go-logger/logger"
+	jb "github.com/yanyiwu/gojieba"
 )
 
 type RegLuaInfo struct { //正则或脚本信息
@@ -86,14 +87,19 @@ type ExtractTask struct {
 	CidRuleMap    map[string][]map[string]interface{} //规则
 	AuditFields   []string                            //需要审核的字段名称
 
-	ProvinceMap       map[string]string    //省全称简称(key:浙江省 val:浙江)
-	ProvinceBriefMap  map[string]*Province //省简称对应的省信息(key:浙江 val:&Province{})
-	CityMap           map[string]string    //市全称简称(key:杭州市 val:杭州)
-	CityBriefMap      map[string]*City     //市简称对应的市信息(key:杭州 val:&City{})
-	CityFullMap       map[string]*City     //市全称对应的市信息(key:杭州市 val:&City{})
-	DistrictCityMap   map[string]*City     //区或县对应的city
-	DistrictSimAndAll map[string]string    //区或县(key:简称 val:全称)
-	StreetDistrictMap map[string]*District //街道对应的区或县
+	SiteCityMap          map[string]*SiteCity //站点对应的省市区
+	ProvinceMap          map[string]string    //省全称简称(key:浙江省 val:浙江)
+	ProvinceBriefMap     map[string]*Province //省简称对应的省信息(key:浙江 val:&Province{})
+	CityMap              map[string]string    //市全称简称(key:杭州市 val:杭州)
+	CityBriefMap         map[string]*City     //市简称对应的市信息(key:杭州 val:&City{})
+	CityFullMap          map[string]*City     //市全称对应的市信息(key:杭州市 val:&City{})
+	DistrictCityMap      map[string]*City
+	NewDistrictCityMap   map[string][]*City            //区或县全称对应的city(全国有相同名称的区或县,这里对应的city用slice)
+	DistrictSimAndAll    map[string]string             //区或县(key:简称 val:全称)
+	NewDistrictSimAndAll map[string][]map[string]*City //区或县(key:简称 val:  相同简称的区全称:所在市)
+	StreetDistrictMap    map[string]*District          //街道对应的区或县
+	NewStreetDistrictMap map[string][]*District        //街道全称对应的区或县
+	CommunityDistrictMap map[string][]*District        //村、居委会对应的区或县
 
 	ProvinceAllGet *ju.DFA //省全称
 	ProvinceSimGet *ju.DFA //省简称
@@ -107,6 +113,27 @@ type ExtractTask struct {
 	AreaCodeMap map[string]*AreaCode //区号
 
 	InfoType []map[string]interface{}
+
+	Trie_Full_Province  *ju.Trie //省全称 省、直辖市、自治区
+	Trie_Full_City      *ju.Trie //市全称 地级市
+	Trie_Full_District  *ju.Trie //县全称 市辖区、县(旗)、县级市、自治县(自治旗)、特区、林区
+	Trie_Full_Street    *ju.Trie //街道、乡镇全称  镇、乡、民族乡、县辖区、街道
+	Trie_Full_Community *ju.Trie //村/委员会全称  村、居委会
+	Trie_Sim_Province   *ju.Trie //省简称
+	Trie_Sim_City       *ju.Trie //市简称
+	Trie_Sim_District   *ju.Trie //县简称
+	Trie_Fulls          []*ju.Trie
+	Trie_Sims           []*ju.Trie
+	JB_PCD              *jb.Jieba //省/市/县词典
+	JB_SV               *jb.Jieba //街道/村词典
+	//JB_Full             *jb.Jieba //全称的结巴分词
+	//JB_Sim              *jb.Jieba //简称的结巴分词
+}
+
+type SiteCity struct {
+	P string //省简称
+	C string //市全称
+	D string //区全称
 }
 
 type ClearTaskInfo struct {
@@ -646,6 +673,215 @@ func InitCityAll(version string) map[string]map[string]interface{} {
 	return fn
 }
 
+//加载站点库site城市信息
+func InitSite() []map[string]interface{} {
+	defer qu.Catch()
+	query := map[string]interface{}{
+		"depttype": map[string]interface{}{
+			"$ne": "代理机构",
+		},
+	}
+	list, _ := db.Mgo.Find("site", query, nil, `{"site":1,"area":1,"city":1,"district":1}`, false, -1, -1)
+	return *list
+}
+
+func (e *ExtractTask) InitCityInfo() {
+	defer qu.Catch()
+	e.InitVar() //初始化变量
+	//site站点信息
+	for _, v := range InitSite() {
+		site, _ := v["site"].(string)
+		area, _ := v["area"].(string)
+		city, _ := v["city"].(string)
+		district, _ := v["district"].(string)
+		if area != "" && area != "全国" && site != "" {
+			s := &SiteCity{
+				P: area,
+				C: city,
+				D: district,
+			}
+			e.SiteCityMap[site] = s
+		}
+	}
+	//初始化省信息
+	fn1 := InitProvince(e.TaskInfo.Version)
+	for k, v := range fn1 {
+		for _, p := range v.([]interface{}) {
+			p1, _ := p.(string)
+			e.Trie_Full_Province.AddWords(p1) //华中科技大学
+			e.ProvinceMap[p1] = k             //华中科技大学:湖北
+			//e.JB_Full.AddWord(p1)
+		}
+	}
+	//初始化城市全称
+	fn2 := InitCityAll(e.TaskInfo.Version)
+	for k, v := range fn2 {
+		//加载省信息
+		e.Trie_Full_Province.AddWords(k) //加入省全称Trie(k:浙江省)
+		//e.JB_Full.AddWord(k)             //加入省全称结巴分词
+		p := &Province{}
+		p.Name = k                    //省全称:浙江省
+		p.Brief = v["brief"].(string) //省简称:浙江
+		//e.JB_Sim.AddWord(p.Brief)             //加入省简称结巴分词
+		e.Trie_Sim_Province.AddWords(p.Brief) //加入省简称Trie(k:浙江)
+		e.ProvinceMap[k] = p.Brief            //浙江省:浙江
+		e.ProvinceBriefMap[p.Brief] = p       //浙江:省信息{}
+		p.Cap = v["captial"].(string)         //省会(杭州)
+		//加载市信息
+		city, _ := v["city"].(map[string]interface{})
+		for k1, v1 := range city {
+			e.Trie_Full_City.AddWords(k1) //加入市全称Trie(k:杭州市)
+			//e.JB_Full.AddWord(k1)         //加入市全称结巴分词
+			v1m, _ := v1.(map[string]interface{})
+			c := &City{}
+			c.Name = k1                       //市全称:杭州市
+			c.Brief = v1m["brief"].(string)   //市简称:杭州
+			e.Trie_Sim_City.AddWords(c.Brief) //加入市简称Trie(k:杭州)
+			//e.JB_Sim.AddWord(c.Brief)         //市简称加入结巴分词
+			e.CityMap[k1] = c.Brief     //杭州市:杭州
+			e.CityBriefMap[c.Brief] = c //杭州:市信息{}
+			e.CityFullMap[k1] = c       //杭州市:市信息{}
+			c.P = p
+			if c.Name == p.Cap {
+				p.Captial = c //加载province中的省会市信息{}
+			}
+			//区县
+			districtmap, _ := v1m["area"].(map[string]interface{}) //区或县
+			for district, streets := range districtmap {
+				d := &District{}
+				d.Name = district
+				d.C = c
+				//省直辖市,河南济源市没有区一级,目前区一级写的还是济源市
+				//匹配时,如果匹配到区,拿区和市比对,相同则代表是省直辖市,不要区一级
+				e.Trie_Full_District.AddWords(district) //加入区或县全称Trie
+				//e.JB_Full.AddWord(district)             //加入区或县全称结巴分词
+				ctmp := e.NewDistrictCityMap[district]
+				if len(ctmp) == 0 {
+					tmpcarr := []*City{c}
+					e.NewDistrictCityMap[district] = tmpcarr
+				} else {
+					e.NewDistrictCityMap[district] = append(e.NewDistrictCityMap[district], c)
+				}
+				//街道
+				streetmap, _ := streets.(map[string]interface{})
+				for street, communitys := range streetmap {
+					s := &Street{}
+					s.Name = street
+					s.D = d
+					e.Trie_Full_Street.AddWords(street) //加入街道全称Trie
+					//e.JB_Full.AddWord(street)           //加入街道全称结巴分词
+					dtmp := e.NewStreetDistrictMap[street]
+					if len(dtmp) == 0 {
+						tmpdarr := []*District{d}
+						e.NewStreetDistrictMap[street] = tmpdarr
+					} else {
+						e.NewStreetDistrictMap[street] = append(e.NewStreetDistrictMap[street], d)
+					}
+					//村、居委会
+					for _, ct := range qu.ObjArrToStringArr(communitys.([]interface{})) {
+						e.Trie_Full_Community.AddWords(ct) //加入居委会、村全称Trie
+						//e.JB_Full.AddWord(ct)              //加入居委会、村全称结巴分词
+						cttmp := e.CommunityDistrictMap[ct]
+						if len(cttmp) == 0 {
+							tmpdarr := []*District{d}
+							e.CommunityDistrictMap[ct] = tmpdarr
+						} else {
+							e.CommunityDistrictMap[ct] = append(e.CommunityDistrictMap[ct], d)
+						}
+					}
+				}
+			}
+		}
+	}
+
+	//初始化城市简称
+	fn3 := InitCitySim(e.TaskInfo.Version)
+	for _, v := range fn3 {
+		city, _ := v["city"].(map[string]interface{})
+		for _, v1 := range city {
+			v1m, _ := v1.(map[string]interface{})
+			cb := v1m["brief"].(string)                 //市简称
+			arr := v1m["area"].(map[string]interface{}) //区或县简称
+			for districtsim, districtall := range arr {
+				dfullstr, _ := districtall.(string)
+				e.Trie_Sim_District.AddWords(districtsim) //加入区或县简称Trie
+				//e.JB_Sim.AddWord(districtsim)             //加入区或县简称结巴分词
+				c := e.CityBriefMap[cb]
+				dfullarr := e.NewDistrictSimAndAll[districtsim]
+				dfullcity := map[string]*City{dfullstr: c}
+				if len(dfullarr) == 0 {
+					tmparr := []map[string]*City{dfullcity}
+					e.NewDistrictSimAndAll[districtsim] = tmparr
+				} else {
+					e.NewDistrictSimAndAll[districtsim] = append(e.NewDistrictSimAndAll[districtsim], dfullcity)
+				}
+			}
+		}
+	}
+
+	e.Trie_Fulls = []*ju.Trie{e.Trie_Full_Province, e.Trie_Full_City, e.Trie_Full_District, e.Trie_Full_Street, e.Trie_Full_Community}
+	e.Trie_Sims = []*ju.Trie{e.Trie_Sim_Province, e.Trie_Sim_City, e.Trie_Sim_District}
+}
+
+func (e *ExtractTask) InitVar() {
+	defer qu.Catch()
+	//初始化Trie
+	//全称
+	e.Trie_Full_Province = &ju.Trie{}
+	e.Trie_Full_City = &ju.Trie{}
+	e.Trie_Full_District = &ju.Trie{}
+	e.Trie_Full_Street = &ju.Trie{}
+	e.Trie_Full_Community = &ju.Trie{}
+	//简称
+	e.Trie_Sim_Province = &ju.Trie{}
+	e.Trie_Sim_City = &ju.Trie{}
+	e.Trie_Sim_District = &ju.Trie{}
+
+	//初始化结巴
+	e.JB_PCD = jb.NewJieba(jb.SELF_PCD_PATH)
+	e.JB_SV = jb.NewJieba(jb.SELF_SV_PATH)
+	//e.JB_Full = jb.NewJieba()
+	//e.JB_Sim = jb.NewJieba()
+
+	//初始化map
+	if e.SiteCityMap == nil {
+		e.SiteCityMap = make(map[string]*SiteCity)
+	}
+	if e.ProvinceMap == nil {
+		e.ProvinceMap = make(map[string]string)
+	}
+	if e.CityMap == nil {
+		e.CityMap = make(map[string]string)
+	}
+	if e.DistrictSimAndAll == nil {
+		e.DistrictSimAndAll = make(map[string]string)
+	}
+	if e.NewDistrictSimAndAll == nil {
+		e.NewDistrictSimAndAll = make(map[string][]map[string]*City)
+	}
+
+	if e.CityBriefMap == nil {
+		e.CityBriefMap = make(map[string]*City)
+	}
+	if e.CityFullMap == nil {
+		e.CityFullMap = make(map[string]*City)
+	}
+	if e.ProvinceBriefMap == nil {
+		e.ProvinceBriefMap = make(map[string]*Province)
+	}
+	if e.NewDistrictCityMap == nil {
+		e.NewDistrictCityMap = make(map[string][]*City)
+	}
+
+	if e.NewStreetDistrictMap == nil {
+		e.NewStreetDistrictMap = make(map[string][]*District)
+	}
+	if e.CommunityDistrictMap == nil {
+		e.CommunityDistrictMap = make(map[string][]*District)
+	}
+
+}
+
 //初始化城市省份敏感词
 func (e *ExtractTask) InitCityDFA() {
 	defer qu.Catch()

+ 25 - 9
src/jy/extract/extractcity.go

@@ -34,6 +34,19 @@ type Street struct {
 	D    *District
 }
 
+//村、社区、居委会
+type Community struct {
+	Name string
+	S    *Street
+}
+
+//区或县简称对应的全称和市信息
+type DistrictSimFull struct {
+	SimName  string
+	FullName string
+	C        *City
+}
+
 //邮编
 type PostCode struct {
 	Code string
@@ -57,6 +70,7 @@ func (e *ExtractTask) ExtractCity(j *ju.Job, resulttmp map[string]interface{}, i
 			2.采购单位库
 			3.邮编
 			4.固话
+			5.site(todo)
 		低准确率:(全称库匹配到不走简称库)
 			1.city全称库(buyeraddr;title,projectname)
 			2.city简称库(buyeraddr;title,projectname)
@@ -318,6 +332,7 @@ func (e *ExtractTask) GetCityByOthers(j *ju.Job, sm *SortMap) ([]map[string]stri
 				} else if pos == 2 { //district
 					p, c := "", ""
 					dcitymap := e.DistrictCityMap[word] //区对应的city
+
 					if dcitymap != nil {
 						c = dcitymap.Name    //city全称
 						p = dcitymap.P.Brief //province简称
@@ -330,6 +345,7 @@ func (e *ExtractTask) GetCityByOthers(j *ju.Job, sm *SortMap) ([]map[string]stri
 				} else if pos == 3 { //street
 					p, c, d := "", "", ""
 					sdmap := e.StreetDistrictMap[word] //对应的区
+
 					if sdmap != nil {
 						d = sdmap.Name
 						c = sdmap.C.Name
@@ -393,6 +409,7 @@ func (e *ExtractTask) GetCityByOthers(j *ju.Job, sm *SortMap) ([]map[string]stri
 					} else if pos == 2 { //district
 						p, c := "", ""
 						d := e.DistrictSimAndAll[word]
+
 						dcitymap := e.DistrictCityMap[word]
 						if dcitymap != nil {
 							c = dcitymap.Name
@@ -542,7 +559,7 @@ func GetPCDByAreaDFA(province, acd string, e *ExtractTask, j *ju.Job, flag bool)
 	if word := e.ProvinceSimGet.CheckSensitiveWord(acd); word != "" { //取省
 		if pbMap := e.ProvinceBriefMap[word]; pbMap != nil {
 			province = pbMap.Brief
-			if province == acd || pbMap.Name == acd {
+			if province == acd || pbMap.Name == acd { //用于判断area_city_district是否只有省份信息,flag为true就不在匹配area_city_district中的city和district
 				flag = true
 			}
 			PCDScore(j, "province", province, 5)
@@ -597,17 +614,17 @@ func GetPCDByCityDFA(province, city, acd string, e *ExtractTask, j *ju.Job, flag
 }
 func GetPCDByDistrictDFA(province, city, district, acd string, e *ExtractTask, j *ju.Job) (string, string, string) {
 	//area_city_district字段不会单独存区信息(省市,省,市,省区,省市区)
-	for _, GET := range []*ju.DFA{e.DistrictAllGet, e.DistrictSimGet} { //取区
+	for pos, GET := range []*ju.DFA{e.DistrictAllGet, e.DistrictSimGet} { //取区
 		if word := GET.CheckSensitiveWord(acd); word != "" {
 			if dcMap := e.DistrictCityMap[word]; dcMap != nil {
-				if city != "" && dcMap.Name == city { //有province和city
-					district = word
-				} else if city == "" && dcMap.P.Brief == province { //只有province
-					district = word
+				district = word
+				if pos == 1 { //简称换为全称
+					district = e.DistrictSimAndAll[district]
+				}
+				if city == "" && dcMap.P.Brief == province { //只有province和district(are_city_district:河南省二七区)
 					city = dcMap.Name
 					PCDScore(j, "city", city, 5)
-				} else if province == "" { //province和city都没有
-					district = word
+				} else if province == "" { //province和city都没有(are_city_district:二七区)
 					city = dcMap.Name
 					province = dcMap.P.Brief
 					PCDScore(j, "city", city, 5)
@@ -618,6 +635,5 @@ func GetPCDByDistrictDFA(province, city, district, acd string, e *ExtractTask, j
 			}
 		}
 	}
-
 	return province, city, district
 }

+ 2 - 1
src/jy/extract/extractudp.go

@@ -107,7 +107,8 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 		ext.InitClearFn()
 		if ext.IsExtractCity { //版本上控制是否开始城市抽取
 			//初始化城市DFA信息
-			ext.InitCityDFA()
+			//ext.InitCityDFA()
+			ext.InitCityInfo()
 			ext.InitAreaCode()
 			ext.InitPostCode()
 		}

+ 772 - 0
src/jy/extract/newextractcity.go

@@ -0,0 +1,772 @@
+package extract
+
+import (
+	. "jy/pretreated"
+	ju "jy/util"
+	qu "qfw/util"
+	"strings"
+)
+
+//抽取city
+func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}, id string) {
+	/*
+		高准确率:
+			1.爬虫数据jsondata
+			2.采购单位库
+			3.邮编
+			4.固话
+			5.site(todo)
+		低准确率:(全称库匹配到不走简称库)
+			1.city全称库(buyeraddr;title,projectname)
+			2.city简称库(buyeraddr;title,projectname)
+	*/
+	defer qu.Catch()
+
+	//初始化
+	if j.AreaScore == nil {
+		j.AreaScore = make(map[string]int)
+	}
+	if j.CityScore == nil {
+		j.CityScore = make(map[string]int)
+	}
+	if j.DistrictScore == nil {
+		j.DistrictScore = make(map[string]int)
+	}
+	//记录区或县简称匹配的p、c、d的得分;如果全称匹配和p、c简称匹配的有结果,再将得分合并,否则舍弃
+	pscore := make(map[string]int)
+	cscore := make(map[string]int)
+	dscore := make(map[string]int)
+
+	sm := NewSortMap()
+	//1.jsondata抽取
+	e.NewGetCityByJsonData(j)
+	//qu.Debug("jsondata打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//2.site库抽取
+	e.NewGetCityBySite(j)
+	//qu.Debug("site打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//3.采购单位库抽取(暂时没有采购单位库)
+	//buyer, _ := resulttmp["buyer"].(string)
+	//4.postcode邮编抽取
+	buyerzipcode, _ := resulttmp["buyerzipcode"].(string)
+	e.NewGetCityByPostCode(j, buyerzipcode)
+	//qu.Debug("邮编打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//5.areacode固话区号抽取
+	buyertel, _ := resulttmp["buyertel"].(string)
+	e.NewGetCityByAreaCode(j, buyertel)
+	//qu.Debug("固话打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//6.buyeraddr,title,projectname抽取
+	buyeraddr, _ := resulttmp["buyeraddr"].(string)
+	title, _ := resulttmp["title"].(string)
+	projectname, _ := resulttmp["projectname"].(string)
+	buyer, _ := resulttmp["buyer"].(string)
+	//qu.Debug("buyeraddr--", buyeraddr, "--buyer--", buyer, "--title--", title, "--projectname--", projectname)
+	sm.AddKey("buyeraddr", buyeraddr)
+	sm.AddKey("title", title)
+	sm.AddKey("projectname", projectname)
+	sm.AddKey("buyer", buyer)
+	e.NewGetCityByOthers(j, sm, &pscore, &cscore, &dscore)
+	//qu.Debug("打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//7.detail抽取
+	if len(j.AreaScore) > 0 {
+		e.NewGetCityByDetail(j)
+	}
+	//qu.Debug("detail打分后---", j.AreaScore, j.CityScore, j.DistrictScore)
+	//合并得分
+	//qu.Debug("pcd=====", pscore, cscore, dscore)
+	MergeScores(j, &pscore, &cscore, &dscore)
+	//qu.Debug("合并打分后结果---", j.AreaScore, j.CityScore, j.DistrictScore)
+
+	finishP := HighestScoreArr(j.AreaScore)
+	finishC := HighestScoreArr(j.CityScore)
+	finishD := HighestScoreArr(j.DistrictScore)
+	arearesult := ""
+	cityresult := ""
+	districtresult := ""
+	tmpcity := []string{}
+	if len(finishP) == 1 { //最高分一个
+		arearesult = finishP[0] //抽取结果直接赋值
+		cityresult, tmpcity = NewGetCity(arearesult, cityresult, e, finishC, tmpcity)
+		cityresult, districtresult = NewGetDistrict(arearesult, cityresult, districtresult, e, finishD, tmpcity)
+	} else if len(finishP) > 1 { //province最高分多个
+		if len(finishC) == 1 {
+			cityresult = finishC[0]
+			if cfMap := e.CityFullMap[cityresult]; cfMap != nil {
+				arearesult = cfMap.P.Brief
+				cityresult, districtresult = NewGetDistrict(arearesult, cityresult, districtresult, e, finishD, tmpcity)
+			}
+		} else { //对应的city有多个(多个province和city)
+			arearesult = finishP[0] //抽取结果直接赋值
+			cityresult, tmpcity = NewGetCity(arearesult, cityresult, e, finishC, tmpcity)
+			cityresult, districtresult = NewGetDistrict(arearesult, cityresult, districtresult, e, finishD, tmpcity)
+		}
+	}
+	//qu.Debug("结果===", arearesult, "--", cityresult, "--", districtresult)
+	if arearesult == "" {
+		arearesult = "全国"
+	} else if cityresult == "" {
+		if pbMap := e.ProvinceBriefMap[arearesult]; pbMap != nil {
+			cityresult = pbMap.Cap
+			resulttmp["defaultpcap"] = true
+		}
+	}
+	//qu.Debug("结果2===", arearesult, "--", cityresult, "--", districtresult)
+	resulttmp["area"] = arearesult
+	resulttmp["city"] = cityresult
+	resulttmp["district"] = districtresult
+}
+
+//jsondata中抽取城市
+func (e *ExtractTask) NewGetCityByJsonData(j *ju.Job) (province, city, district, p, c, d string) {
+	defer qu.Catch()
+	jsondata := *j.Jsondata
+	if jsondata != nil { //jsondata中获取province和city
+		if a_c_d, ok := jsondata["area_city_district"].(string); ok && a_c_d != "" {
+			p, c, d = GetByACDFullJb(p, c, d, a_c_d, e, j) //全称匹配
+			GetByACDSimJb(p, c, d, a_c_d, e, j)            //简称匹配
+		}
+		city, _ = jsondata["city"].(string)         //city全称或者简称
+		province, _ = jsondata["area"].(string)     //province简称
+		district, _ = jsondata["district"].(string) //district全称
+	}
+	PCDScore(j, "district", district, 5) //district打分
+	bp := false
+	if province != "" {
+		if e.ProvinceBriefMap[province] != nil { //判断爬虫的省份是否正确 (全国)
+			bp = true //省份正确
+		}
+	}
+	pbrief := ""
+	if city != "" {
+		cityfullmap := e.CityFullMap[city] //判断city全称是否正确
+		if cityfullmap != nil {
+			pbrief = cityfullmap.P.Brief //province简称
+		} else {
+			citybriefmap := e.CityBriefMap[city] //判断city简称是否正确
+			if citybriefmap != nil {
+				city = citybriefmap.Name //city简称替换为全称
+				pbrief = citybriefmap.P.Brief
+			}
+		}
+	}
+	if bp {
+		if pbrief == province { //爬虫的province和city匹配
+			PCDScore(j, "city", city, 5)
+		} else { //pbrief不匹配province(此时city为空或者错误)
+			city = ""
+		}
+		PCDScore(j, "province", province, 5)
+	} else { //省份错误或为空,取city的对应的pbrief为province
+		if pbrief != "" {
+			province = pbrief
+			PCDScore(j, "province", province, 5)
+			PCDScore(j, "city", city, 5)
+		} else {
+			province = ""
+			city = ""
+		}
+	}
+	return
+
+}
+
+//全称从area_city_district中抽城市
+func GetByACDFullJb(pbrief, city, district, a_c_d string, e *ExtractTask, j *ju.Job) (string, string, string) {
+	text := e.JB_PCD.Cut(a_c_d, true)
+	//qu.Debug("Full----", text)
+	repeatPb := map[string]bool{}
+	for _, full := range text {
+		if e.Trie_Full_Province.Get(full) { //a_c_d有province全称
+			if tmpPbrief := e.ProvinceMap[full]; tmpPbrief != "" {
+				pbrief = tmpPbrief //省简称
+				PCDScore(j, "province", pbrief, 5)
+			}
+		} else if e.Trie_Full_City.Get(full) { //a_c_d有city全称
+			if cfMap := e.CityFullMap[full]; cfMap != nil {
+				tmpcity := cfMap.Name                    //城市全称
+				tmpPbrief := cfMap.P.Brief               //省简称
+				if pbrief != "" && pbrief == tmpPbrief { //已获取省简称
+					city = tmpcity
+					PCDScore(j, "city", city, 5)
+				} else if pbrief == "" {
+					city = tmpcity
+					pbrief = tmpPbrief
+					PCDScore(j, "city", city, 5)
+					PCDScore(j, "province", pbrief, 5)
+				}
+			}
+		} else if e.Trie_Full_District.Get(full) { //a_c_d有district全称(district可能对应多个城市)
+			carr := e.NewDistrictCityMap[full]
+			if len(carr) > 0 {
+				district = full
+				PCDScore(j, "district", district, 5)
+				for _, c := range carr {
+					tmpcity := c.Name      //城市全称
+					tmpPbrief := c.P.Brief //省简称
+					if pbrief == "" {      //之前没有匹配到省份
+						PCDScore(j, "city", tmpcity, 5)
+						if !repeatPb[tmpPbrief] {
+							PCDScore(j, "province", tmpPbrief, 5)
+							repeatPb[tmpPbrief] = true
+						}
+					} else { //已有省份
+						if pbrief != tmpPbrief { //区对应的多个城市,与之前匹配结果不一致,认为是干扰项
+							PCDScore(j, "city", tmpcity, -5)
+							PCDScore(j, "province", tmpPbrief, -5)
+						} else { //与之前匹配结果一致
+							if city == "" { //这种情况是处理area_city_district:(河南省二七区),city在前两步匹配不到,在这里通过district补充
+								PCDScore(j, "city", tmpcity, 5)
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	return pbrief, city, district
+}
+
+//简称从area_city_district中抽城市
+func GetByACDSimJb(pbrief, city, district, a_c_d string, e *ExtractTask, j *ju.Job) {
+	text := e.JB_PCD.Cut(a_c_d, true)
+	repeatPb := map[string]bool{}
+	for _, sim := range text {
+		if pbrief == "" && e.Trie_Sim_Province.Get(sim) { //全称未匹配到确定的province
+			if pbMap := e.ProvinceBriefMap[sim]; pbMap != nil {
+				pbrief = pbMap.Brief
+				PCDScore(j, "province", pbrief, 5) //打分
+				//PCDSubtractScore(e, j, "province", pbrief, -5) //减分(area_city_district:河南鼓楼区)
+			}
+		} else if city == "" && e.Trie_Sim_City.Get(sim) { //全称未匹配到确定的city
+			if cbMap := e.CityBriefMap[sim]; cbMap != nil {
+				tmpcity := cbMap.Name
+				tmpPbrief := cbMap.P.Brief
+				if pbrief != "" && pbrief == tmpPbrief {
+					city = tmpcity
+					PCDScore(j, "city", city, 5)
+				} else if pbrief == "" {
+					city = tmpcity
+					pbrief = tmpPbrief
+					PCDScore(j, "city", city, 5)
+					PCDScore(j, "province", pbrief, 5)
+					//PCDSubtractScore(e, j, "city", tmpcity, -5) //减分(area_city_district:开封鼓楼区)
+				}
+			}
+		} else if district == "" && e.Trie_Sim_District.Get(sim) { //全称未匹配到确定的district
+			dfullarr := e.NewDistrictSimAndAll[sim]
+			if len(dfullarr) > 0 {
+				PCDScore(j, "district", sim, 5)
+				for _, dfullAndCity := range dfullarr { //district简称对应的所有全称
+					for _, c := range dfullAndCity {
+						tmpcity := c.Name      //城市全称
+						tmpPbrief := c.P.Brief //省简称
+						if pbrief == "" {      //之前没有匹配到省份
+							PCDScore(j, "city", tmpcity, 5)
+							if !repeatPb[tmpPbrief] {
+								PCDScore(j, "province", tmpPbrief, 5)
+								repeatPb[tmpPbrief] = true
+							}
+						} else { //已有省份
+							if pbrief != tmpPbrief { //区对应的多个城市,与之前匹配结果不一致,认为是干扰项
+								PCDScore(j, "city", tmpcity, -5)
+								PCDScore(j, "province", tmpPbrief, -5)
+							} else { //与之前匹配结果一致
+								if city == "" { //这种情况是处理area_city_district:(河南省二七区),city在前两步匹配不到,在这里通过district补充
+									PCDScore(j, "city", tmpcity, 5)
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+//通过site提取城市
+func (e *ExtractTask) NewGetCityBySite(j *ju.Job) {
+	site, _ := (*j.Data)["site"].(string)
+	//qu.Debug("site--------", site)
+	if scMap := e.SiteCityMap[site]; scMap != nil {
+		if scMap.P != "" && scMap.P != "全国" && scMap.P != "null" {
+			PCDScore(j, "province", scMap.P, 5)
+		}
+		if scMap.C != "" && scMap.C != "null" {
+			PCDScore(j, "city", scMap.C, 5)
+		}
+		if scMap.D != "" && scMap.D != "null" {
+			PCDScore(j, "district", scMap.D, 5)
+		}
+	}
+}
+
+//通过邮编提取城市
+func (e *ExtractTask) NewGetCityByPostCode(j *ju.Job, postcode string) (province, city, district string) {
+	defer qu.Catch()
+	pc := e.PostCodeMap[postcode]
+	if pc != nil {
+		province = pc.P
+		city = pc.C
+		districtTmp := pc.D //邮编可能对应多个区
+		score := 3
+		if len(districtTmp) == 1 && districtTmp[0] != "" {
+			score = 5
+		}
+		for _, district := range districtTmp {
+			PCDScore(j, "district", district, score)
+		}
+		PCDScore(j, "province", province, 5)
+		PCDScore(j, "city", city, 5)
+	}
+	return
+}
+
+//固话区号提取城市
+func (e *ExtractTask) NewGetCityByAreaCode(j *ju.Job, buyertel string) (province, city, district string) {
+	defer qu.Catch()
+	if len(buyertel) >= 11 {
+		if strings.HasPrefix(buyertel, "0") { //区号除了澳门853其他都是以0开头
+			n := 4
+		L:
+			areacode := buyertel[:n]
+			ac := e.AreaCodeMap[areacode]
+			if ac != nil {
+				province = ac.P
+				citytmp := ac.C
+				if len(citytmp) == 1 { //对应多个city舍去
+					city = citytmp[0]
+					PCDScore(j, "city", city, 5)
+				}
+				PCDScore(j, "province", province, 5)
+			} else {
+				n = n - 1
+				if n >= 3 {
+					goto L
+				}
+			}
+		} else if buyertel[:3] == "853" { //澳门
+			province = "澳门"
+			city = "澳门"
+			PCDScore(j, "province", province, 5)
+			PCDScore(j, "city", city, 5)
+		}
+	}
+	return
+}
+
+func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore, dscore *map[string]int) {
+	/*
+		1.对字段进行分词
+		2.省、市、区、街道、居委会全称进行匹配打分
+		3.省、市、区简称进行匹配打分
+	*/
+	for _, from := range sm.Keys { //buyeraddr;title;projectname
+		p_full, c_full, d_full, p_sim, c_sim, d_sim := "", "", "", "", "", "" //每个字段抽取的时候重新定义该字段抽取的province,city,district
+		str, _ := sm.Map[from].(string)
+		//qu.Debug(str, "---分词结果---", e.JB_SV.Cut(str, true), p_full, c_full, d_full, p_sim, c_sim, d_sim)
+		jbText := e.JB_SV.Cut(str, true)
+		for _, text := range jbText { //结巴分词
+			if len([]rune(text)) == 1 {
+				continue
+			}
+			//全称匹配
+			//qu.Debug("text------", text)
+			for pos_full, trie_full := range e.Trie_Fulls {
+				if trie_full.Get(text) {
+					if pos_full == 0 && p_full == "" { //省全称
+						if tmpPbrief := e.ProvinceMap[text]; tmpPbrief != "" { //取简称
+							p_full = tmpPbrief
+							PCDScore(j, "province", p_full, 4)
+							break
+						}
+					} else if pos_full == 1 && c_full == "" { //市全称
+						if cfMap := e.CityFullMap[text]; cfMap != nil {
+							tmpPbrief := cfMap.P.Brief
+							//qu.Debug("市--------", text, tmpPbrief, p_full)
+							if p_full == "" {
+								p_full = tmpPbrief
+								c_full = cfMap.Name
+								PCDScore(j, "province", p_full, 4)
+								PCDScore(j, "city", c_full, 4)
+								break
+							} else if p_full == tmpPbrief {
+								c_full = cfMap.Name
+								PCDScore(j, "city", c_full, 4)
+								break
+							} else if p_full != "" && p_full != tmpPbrief {
+								//city不做处理
+							}
+						}
+					} else if pos_full == 2 && d_full == "" { //区全称
+						//qu.Debug("区全称===========")
+						repeatPb := map[string]bool{}
+						isOk := false
+						districtOk := false
+						citys := e.NewDistrictCityMap[text]
+						for _, c := range citys {
+							tmpPbrief := c.P.Brief
+							if p_full == tmpPbrief { //省份一致
+								d_full = text
+								if c_full == "" {
+									c_full = c.Name
+									PCDScore(j, "city", c_full, 4)
+								}
+								isOk = true
+								districtOk = true
+							} else if p_full == "" { //省份不存在
+								districtOk = true
+								if len(citys) == 1 { //对应一个city
+									p_full = tmpPbrief
+									c_full = c.Name
+									d_full = text
+									PCDScore(j, "province", p_full, 4)
+									PCDScore(j, "city", c_full, 4)
+									isOk = true
+								} else { //多个city,只打分,不赋值
+									if !repeatPb[tmpPbrief] {
+										PCDScore(j, "province", tmpPbrief, 2)
+										repeatPb[tmpPbrief] = true
+									}
+									//PCDScore(j, "province", tmpPbrief, 2)
+									PCDScore(j, "city", c.Name, 2)
+								}
+							} else if p_full != "" && p_full != tmpPbrief { //干扰项减分
+								if !repeatPb[tmpPbrief] {
+									PCDScore(j, "province", tmpPbrief, -5)
+									repeatPb[tmpPbrief] = true
+								}
+								//PCDScore(j, "province", tmpPbrief, -5)
+								PCDScore(j, "city", c.Name, -5)
+							}
+						}
+						if districtOk {
+							PCDScore(j, "district", text, 4)
+						} else {
+							PCDScore(j, "district", text, -5)
+						}
+						if isOk {
+							break
+						}
+					} else if pos_full == 3 { //街道全称
+						districts := e.NewStreetDistrictMap[text]
+						DealMultipleDistrict(e, j, districts, 2)
+					} else if pos_full == 4 { //居委会全称
+						districts := e.CommunityDistrictMap[text]
+						DealMultipleDistrict(e, j, districts, 2)
+					}
+				}
+			}
+			//qu.Debug("全称后--", j.AreaScore, j.CityScore, j.DistrictScore)
+			//简称匹配
+			for pos_sim, trie_sim := range e.Trie_Sims {
+				if trie_sim.Get(text) {
+					if pos_sim == 0 && p_sim == "" { //省简称
+						p_sim = text
+						PCDScore(j, "province", p_sim, 3)
+						break
+					} else if pos_sim == 1 && c_sim == "" { //市简称
+						if cbMap := e.CityBriefMap[text]; cbMap != nil {
+							tmpPbrief := cbMap.P.Brief
+							if p_sim == "" {
+								p_sim = tmpPbrief
+								c_sim = cbMap.Brief
+								PCDScore(j, "province", p_sim, 2)
+								PCDScore(j, "city", cbMap.Name, 2)
+								break
+							} else if p_sim == tmpPbrief {
+								c_sim = cbMap.Brief
+								PCDScore(j, "city", cbMap.Name, 3)
+								break
+							} else if p_sim != "" && p_sim != tmpPbrief {
+								//city不做处理
+							}
+						}
+					} else if pos_sim == 2 && d_sim == "" { //区简称
+						repeatPb := map[string]bool{}
+						repeatDb := map[string]bool{}
+						dfull_citys := e.NewDistrictSimAndAll[text]
+						//qu.Debug(text, dfull_citys, p_sim)
+						for _, dfull_city := range dfull_citys {
+							for dfull, c := range dfull_city { //dfull:简称对应的全称
+								tmpPbrief := c.P.Brief
+								if p_sim == tmpPbrief { //省份一致
+									d_sim = text
+									//PCDScoreByDistrictSim("d", dfull, 1, pscore, cscore, dscore)
+									PCDScore(j, "district", dfull, 2)
+									if c_sim == "" {
+										c_sim = c.Brief
+										//PCDScoreByDistrictSim("c", c.Name, 2, pscore, cscore, dscore)
+										PCDScore(j, "city", c.Name, 2)
+									}
+								} else if p_sim == "" {
+									if !repeatDb[dfull] {
+										PCDScoreByDistrictSim("d", dfull, 1, pscore, cscore, dscore)
+										//PCDScore(j, "district", dfull, 1)
+										repeatDb[dfull] = true
+									}
+									if len(dfull_citys) == 1 {
+										//p_sim = tmpPbrief
+										//c_sim = c.Brief
+										//d_sim = text
+										PCDScoreByDistrictSim("p", tmpPbrief, 1, pscore, cscore, dscore)
+										PCDScoreByDistrictSim("c", c.Name, 1, pscore, cscore, dscore)
+										//PCDScore(j, "province", p_sim, 2)
+										//PCDScore(j, "city", c.Name, 2)
+									} else {
+										if !repeatPb[tmpPbrief] {
+											PCDScoreByDistrictSim("p", tmpPbrief, 1, pscore, cscore, dscore)
+											//PCDScore(j, "province", tmpPbrief, 1)
+											repeatPb[tmpPbrief] = true
+										}
+										//PCDScore(j, "city", c.Name, 1)
+										PCDScoreByDistrictSim("c", c.Name, 1, pscore, cscore, dscore)
+									}
+								} else if p_sim != "" && p_sim != tmpPbrief {
+									if !repeatPb[tmpPbrief] {
+										PCDScoreByDistrictSim("p", tmpPbrief, -5, pscore, cscore, dscore)
+										//PCDScore(j, "province", tmpPbrief, -5)
+										repeatPb[tmpPbrief] = true
+									}
+									PCDScoreByDistrictSim("c", c.Name, -5, pscore, cscore, dscore)
+									//PCDScore(j, "city", c.Name, -5)
+								}
+							}
+						}
+					}
+				}
+			}
+			//qu.Debug("简称后--", j.AreaScore, j.CityScore, j.DistrictScore)
+		}
+	}
+}
+
+func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
+	detailRune := []rune(j.Content)
+	detail := j.Content
+	if len(detailRune) > 600 {
+		start := detailRune[:300]
+		end := detailRune[len(detailRune)-300:]
+		detail = string(start) + string(end)
+	}
+	for _, text := range e.JB_SV.Cut(detail, true) {
+		if len([]rune(text)) > 1 {
+			//qu.Debug("text---", text)
+			//全称匹配
+			for pos_full, trie_full := range e.Trie_Fulls {
+				if trie_full.Get(text) {
+					if pos_full == 0 { //省全称
+						if tmpPbrief := e.ProvinceMap[text]; tmpPbrief != "" { //取简称
+							PCDScore(j, "province", tmpPbrief, 1)
+							break
+						}
+					} else if pos_full == 1 { //市全称
+						if cfMap := e.CityFullMap[text]; cfMap != nil {
+							PCDScore(j, "province", cfMap.P.Brief, 1)
+							PCDScore(j, "city", cfMap.Name, 1)
+							break
+						}
+					} else if pos_full == 2 { //区全称
+						citys := e.NewDistrictCityMap[text]
+						if len(citys) > 0 {
+							repeatPb := map[string]bool{}
+							PCDScore(j, "district", text, 1)
+							for _, c := range citys {
+								PCDScore(j, "city", c.Name, 1)
+								if !repeatPb[text] {
+									PCDScore(j, "province", c.P.Brief, 1)
+									repeatPb[text] = true
+								}
+							}
+							break
+						}
+					} else if pos_full == 3 { //街道全称
+						districts := e.NewStreetDistrictMap[text]
+						DealMultipleDistrict(e, j, districts, 1)
+					} else if pos_full == 4 { //居委会全称
+						districts := e.CommunityDistrictMap[text]
+						DealMultipleDistrict(e, j, districts, 1)
+					}
+				}
+			}
+			//qu.Debug("detail 全称---", j.AreaScore, j.CityScore, j.DistrictScore)
+			//简称匹配
+			for pos_sim, trie_sim := range e.Trie_Sims {
+				if trie_sim.Get(text) {
+					if pos_sim == 0 { //省简称
+						PCDScore(j, "province", text, 1)
+						break
+					} else if pos_sim == 1 { //市简称
+						if cbMap := e.CityBriefMap[text]; cbMap != nil {
+							PCDScore(j, "city", cbMap.Name, 1)
+							PCDScore(j, "province", cbMap.P.Brief, 1)
+							break
+						}
+					} /* else if pos_sim == 2 { //区简称
+						repeatDb := map[string]bool{}
+						dfull_citys := e.NewDistrictSimAndAll[text]
+						for _, dfull_city := range dfull_citys {
+							for dfull, _ := range dfull_city { //dfull:简称对应的全称
+								if !repeatDb[dfull] {
+									PCDScore(j, "district", dfull, 1)
+									repeatDb[dfull] = true
+								}
+							}
+						}
+					}*/
+				}
+			}
+			//qu.Debug("detail 简称---", j.AreaScore, j.CityScore, j.DistrictScore)
+		}
+	}
+}
+
+//街道、居委会对应多地市处理
+func DealMultipleDistrict(e *ExtractTask, j *ju.Job, districts []*District, score int) {
+	repeatPb := map[string]bool{}
+	repeatCb := map[string]bool{}
+	repeatDb := map[string]bool{}
+	for _, district := range districts {
+		tmpDistrict := district.Name
+		if !repeatDb[tmpDistrict] {
+			PCDScore(j, "district", tmpDistrict, score)
+			repeatDb[tmpDistrict] = true
+		}
+		citys := e.NewDistrictCityMap[tmpDistrict]
+		for _, c := range citys {
+			tmpCity := c.Name
+			tmpPbrief := c.P.Brief
+			if !repeatPb[tmpPbrief] {
+				PCDScore(j, "province", tmpPbrief, score)
+				repeatPb[tmpPbrief] = true
+			}
+			if !repeatCb[tmpCity] {
+				PCDScore(j, "city", tmpCity, score)
+				repeatCb[tmpCity] = true
+			}
+		}
+	}
+}
+
+func NewGetCity(area, city string, e *ExtractTask, finishC, tmpcity []string) (string, []string) {
+	for _, c := range finishC { //取最高分与province匹配的city
+		if cfMap := e.CityFullMap[c]; cfMap != nil {
+			if cfMap.P.Brief == area {
+				//				city = c
+				//				break
+				tmpcity = append(tmpcity, c)
+			}
+		}
+	}
+	if len(tmpcity) == 1 {
+		city = tmpcity[0]
+	}
+	return city, tmpcity
+}
+func NewGetDistrict(area, city, district string, e *ExtractTask, finishD, tmpcity []string) (string, string) {
+	for _, d := range finishD { //取最高分与province匹配的district
+		citys := e.NewDistrictCityMap[d]
+		for _, c := range citys {
+			if len(tmpcity) == 0 { //没有city
+				if c.P.Brief == area {
+					city = c.Name
+					district = d
+					return city, district
+				}
+			} else if len(tmpcity) == 1 { //一个city
+				if c.Name == city && c.P.Brief == area {
+					district = d
+					return city, district
+				}
+			} else { //多个city
+				for _, tc := range tmpcity {
+					if tc == c.Name {
+						city = c.Name
+						district = d
+						return city, district
+					}
+				}
+			}
+
+			//			if len(citys) == 1 { //区对应一个市
+			//				if c.P.Brief == area {
+			//					district = d
+			//					city = c.Name
+			//					return city, district
+			//				}
+			//			} else {
+			//				if c.P.Brief == area && c.Name == city {
+			//					district = d
+			//					return city, district
+			//				}
+			//			}
+		}
+	}
+	return city, district
+}
+
+//计算province,city,district区或县匹配的得分
+func PCDScoreByDistrictSim(stype, t string, score int, ps, cs, ds *map[string]int) {
+	defer qu.Catch()
+	if t != "" {
+		if stype == "d" {
+			tmpscore := (*ds)[t]
+			(*ds)[t] = tmpscore + score
+		} else if stype == "c" {
+			tmpscore := (*cs)[t]
+			(*cs)[t] = tmpscore + score
+		} else if stype == "p" {
+			tmpscore := (*ps)[t]
+			(*ps)[t] = tmpscore + score
+		}
+	}
+}
+
+func MergeScores(j *ju.Job, pscore, cscore, dscore *map[string]int) {
+	if len(j.AreaScore) > 0 {
+		for pt, ps := range *pscore {
+			j.AreaScore[pt] = j.AreaScore[pt] + ps
+		}
+		for ct, cs := range *cscore {
+			j.CityScore[ct] = j.CityScore[ct] + cs
+		}
+		for dt, ds := range *dscore {
+			j.DistrictScore[dt] = j.DistrictScore[dt] + ds
+		}
+	}
+}
+
+//province,city,district干扰项减分
+//func PCDSubtractScore(e *ExtractTask, j *ju.Job, stype, text string, score int) {
+//	defer qu.Catch()
+//	if text != "" {
+//		if stype == "city" {
+//			for cn, cscore := range j.CityScore {
+//				if cn != text {
+//					j.CityScore[cn] = cscore + score
+//					//错误的city减分后对应的province也减分
+//					for pb, pscore := range j.AreaScore {
+//						if cfMap := e.CityFullMap[cn]; cfMap != nil && cfMap.P.Brief == pb {
+//							j.AreaScore[pb] = pscore + score
+//						}
+//					}
+//				}
+//			}
+//		} else if stype == "province" {
+//			for pb, pscore := range j.AreaScore {
+//				if pb != text {
+//					j.AreaScore[pb] = pscore + score
+//					//错误的province减分后对应的city也要减分
+//					for cn, cscore := range j.CityScore {
+//						if cfMap := e.CityFullMap[cn]; cfMap != nil && cfMap.P.Brief == pb {
+//							j.CityScore[cn] = cscore + score
+//						}
+//					}
+//				}
+//			}
+//		}
+//		//		for name, tmpscore := range *whichMap {
+//		//			if name != text {
+//		//				(*whichMap)[name] = tmpscore + score
+//		//			}
+//		//		}
+//	}
+//}

+ 1 - 1
src/jy/pretreated/analytable.go

@@ -96,7 +96,7 @@ var (
 	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?((电话([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|(项目)?经办)人)|采购方代表")
 	ContactInfoMustReg  = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
 	ContactType         = map[string]*regexp.Regexp{
-		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|甲|招标(服务)?|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人|采购部"),
+		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人|采购部"),
 		"代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
 	}
 	ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")

+ 37 - 0
src/jy/util/util.go

@@ -14,6 +14,12 @@ type DFA struct {
 	Link map[string]interface{}
 }
 
+//定义字典树
+type Trie struct {
+	y bool
+	c map[rune]*Trie
+}
+
 var syncint chan bool //获取下标锁
 var Config map[string]interface{}
 var Se = qu.SimpleEncrypt{Key: "topnet@extract"}
@@ -127,6 +133,37 @@ func (d *DFA) CheckSensitiveWord(src string) string {
 	return res
 }
 
+func (t *Trie) AddWords(words ...string) {
+	cur := t
+	for _, v := range words {
+		for _, one := range v {
+			if cur.c == nil {
+				cur.c = map[rune]*Trie{}
+			}
+			n := cur.c[one]
+			if n == nil {
+				n = &Trie{}
+				cur.c[one] = n
+			}
+			cur = n
+		}
+		cur.y = true
+		cur = t
+	}
+}
+
+func (t *Trie) Get(word string) bool {
+	cur := t
+	for _, one := range word {
+		n := cur.c[one]
+		if n == nil {
+			return false
+		}
+		cur = n
+	}
+	return cur.y
+}
+
 //初始化商品
 func InitGoods() {
 	GoodsGet = &DFA{}

+ 1 - 1
src/main_test.go

@@ -28,7 +28,7 @@ func Test_han(t *testing.T) {
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
-	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5736324a61a0721f15f73188", "1", "mxs_v1", "mxs_v1")
+	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5d2bd878a5cb26b9b77628ee", "1", "mxs_v1", "mxs_v1")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }

BIN
src/tdm64-gcc-5.1.0-2.zip