|
@@ -8,7 +8,7 @@ import (
|
|
|
"strings"
|
|
|
)
|
|
|
|
|
|
-var AgencyReg = regexp.MustCompile("(代理机构.{0,30}|.{2,15}((招标)?代理|咨询|政府采购))")
|
|
|
+var AgencyReg = regexp.MustCompile("((代理机构|中标供应商).{0,30}|.{2,15}((招标)?代理|咨询|政府采购))")
|
|
|
|
|
|
//抽取city
|
|
|
func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}, id string) {
|
|
@@ -109,6 +109,16 @@ func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}
|
|
|
districtresult = ""
|
|
|
}
|
|
|
//qu.Debug("结果===", arearesult, "--", cityresult, "--", districtresult)
|
|
|
+ //直辖市
|
|
|
+ if arearesult == "北京" {
|
|
|
+ cityresult = "北京市"
|
|
|
+ } else if arearesult == "天津" {
|
|
|
+ cityresult = "天津市"
|
|
|
+ } else if arearesult == "上海" {
|
|
|
+ cityresult = "上海市"
|
|
|
+ } else if arearesult == "重庆" {
|
|
|
+ cityresult = "重庆市"
|
|
|
+ }
|
|
|
if arearesult == "" {
|
|
|
arearesult = "全国"
|
|
|
} /* else if cityresult == "" {
|
|
@@ -118,9 +128,9 @@ func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp map[string]interface{}
|
|
|
}
|
|
|
}*/
|
|
|
//qu.Debug("结果2===", arearesult, "--", cityresult, "--", districtresult)
|
|
|
- resulttmp["area10"] = arearesult
|
|
|
- resulttmp["city10"] = cityresult
|
|
|
- resulttmp["district10"] = districtresult
|
|
|
+ resulttmp["area"] = arearesult
|
|
|
+ resulttmp["city"] = cityresult
|
|
|
+ resulttmp["district"] = districtresult
|
|
|
}
|
|
|
|
|
|
//jsondata中抽取城市
|
|
@@ -387,7 +397,6 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
|
|
|
} else if pos_full == 1 && c_full == "" { //市全称
|
|
|
if cfMap := e.CityFullMap[text]; cfMap != nil {
|
|
|
tmpPbrief := cfMap.P.Brief
|
|
|
- //qu.Debug("市--------", text, tmpPbrief, p_full)
|
|
|
if p_full == "" {
|
|
|
p_full = tmpPbrief
|
|
|
c_full = cfMap.Name
|
|
@@ -403,7 +412,6 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
|
|
|
}
|
|
|
}
|
|
|
} else if pos_full == 2 && d_full == "" { //区全称
|
|
|
- //qu.Debug("区全称===========")
|
|
|
repeatPb := map[string]bool{}
|
|
|
isOk := false
|
|
|
districtOk := false
|
|
@@ -486,58 +494,46 @@ func (e *ExtractTask) NewGetCityByOthers(j *ju.Job, sm *SortMap, pscore, cscore,
|
|
|
c_sim = cbMap.Brief
|
|
|
PCDScore(j, "city", cbMap.Name, 3)
|
|
|
break
|
|
|
- } else if p_sim != "" && p_sim != tmpPbrief {
|
|
|
- //city不做处理
|
|
|
+ } else if p_sim != "" && p_sim != tmpPbrief { //北京师范大学广州实验学校
|
|
|
+ PCDScore(j, "province", tmpPbrief, 1)
|
|
|
+ PCDScore(j, "city", cbMap.Name, 1)
|
|
|
}
|
|
|
}
|
|
|
} else if pos_sim == 2 && d_sim == "" { //区简称
|
|
|
repeatPb := map[string]bool{}
|
|
|
repeatDb := map[string]bool{}
|
|
|
dfull_citys := e.NewDistrictSimAndAll[text]
|
|
|
- //qu.Debug(text, dfull_citys, p_sim)
|
|
|
for _, dfull_city := range dfull_citys {
|
|
|
for dfull, c := range dfull_city { //dfull:简称对应的全称
|
|
|
tmpPbrief := c.P.Brief
|
|
|
if p_sim == tmpPbrief { //省份一致
|
|
|
d_sim = text
|
|
|
- //PCDScoreByDistrictSim("d", dfull, 1, pscore, cscore, dscore)
|
|
|
PCDScore(j, "district", dfull, 2)
|
|
|
if c_sim == "" {
|
|
|
c_sim = c.Brief
|
|
|
- //PCDScoreByDistrictSim("c", c.Name, 2, pscore, cscore, dscore)
|
|
|
PCDScore(j, "city", c.Name, 2)
|
|
|
}
|
|
|
} else if p_sim == "" {
|
|
|
if !repeatDb[dfull] {
|
|
|
PCDScoreByDistrictSim("d", dfull, 1, pscore, cscore, dscore)
|
|
|
- //PCDScore(j, "district", dfull, 1)
|
|
|
repeatDb[dfull] = true
|
|
|
}
|
|
|
if len(dfull_citys) == 1 {
|
|
|
- //p_sim = tmpPbrief
|
|
|
- //c_sim = c.Brief
|
|
|
- //d_sim = text
|
|
|
PCDScoreByDistrictSim("p", tmpPbrief, 1, pscore, cscore, dscore)
|
|
|
PCDScoreByDistrictSim("c", c.Name, 1, pscore, cscore, dscore)
|
|
|
- //PCDScore(j, "province", p_sim, 2)
|
|
|
- //PCDScore(j, "city", c.Name, 2)
|
|
|
} else {
|
|
|
if !repeatPb[tmpPbrief] {
|
|
|
PCDScoreByDistrictSim("p", tmpPbrief, 1, pscore, cscore, dscore)
|
|
|
- //PCDScore(j, "province", tmpPbrief, 1)
|
|
|
repeatPb[tmpPbrief] = true
|
|
|
}
|
|
|
- //PCDScore(j, "city", c.Name, 1)
|
|
|
PCDScoreByDistrictSim("c", c.Name, 1, pscore, cscore, dscore)
|
|
|
}
|
|
|
} else if p_sim != "" && p_sim != tmpPbrief {
|
|
|
if !repeatPb[tmpPbrief] {
|
|
|
PCDScoreByDistrictSim("p", tmpPbrief, -5, pscore, cscore, dscore)
|
|
|
- //PCDScore(j, "province", tmpPbrief, -5)
|
|
|
repeatPb[tmpPbrief] = true
|
|
|
}
|
|
|
PCDScoreByDistrictSim("c", c.Name, -5, pscore, cscore, dscore)
|
|
|
- //PCDScore(j, "city", c.Name, -5)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -620,13 +616,20 @@ func (e *ExtractTask) NewGetCityByDetail(j *ju.Job) {
|
|
|
//简称匹配
|
|
|
for pos_sim, trie_sim := range e.Trie_Sims {
|
|
|
if trie_sim.Get(text) {
|
|
|
- if pos_sim == 0 { //省简称
|
|
|
+ if pos_sim == 0 && !repeatP[text] { //省简称
|
|
|
PCDScore(j, "province", text, 1)
|
|
|
+ repeatP[text] = true
|
|
|
break
|
|
|
} else if pos_sim == 1 { //市简称
|
|
|
if cbMap := e.CityBriefMap[text]; cbMap != nil {
|
|
|
- PCDScore(j, "city", cbMap.Name, 1)
|
|
|
- PCDScore(j, "province", cbMap.P.Brief, 1)
|
|
|
+ if !repeatP[cbMap.P.Brief] {
|
|
|
+ PCDScore(j, "province", cbMap.P.Brief, 1)
|
|
|
+ repeatP[cbMap.P.Brief] = true
|
|
|
+ }
|
|
|
+ if !repeatC[cbMap.Name] {
|
|
|
+ PCDScore(j, "city", cbMap.Name, 1)
|
|
|
+ repeatC[cbMap.Name] = true
|
|
|
+ }
|
|
|
break
|
|
|
}
|
|
|
} /* else if pos_sim == 2 { //区简称
|
|
@@ -733,7 +736,7 @@ func NewGetDistrict(area, city, district string, e *ExtractTask, finishD, tmpcit
|
|
|
district = d
|
|
|
return city, district
|
|
|
}
|
|
|
- } else { //多个city
|
|
|
+ } /*else { //多个city
|
|
|
for _, tc := range tmpcity { //多个city根据district最高分取
|
|
|
if tc == c.Name && len(finishD) == 1 {
|
|
|
city = c.Name
|
|
@@ -741,7 +744,7 @@ func NewGetDistrict(area, city, district string, e *ExtractTask, finishD, tmpcit
|
|
|
return city, district
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
+ }*/
|
|
|
|
|
|
// if len(citys) == 1 { //区对应一个市
|
|
|
// if c.P.Brief == area {
|