|
@@ -2,6 +2,7 @@
|
|
package extract
|
|
package extract
|
|
|
|
|
|
import (
|
|
import (
|
|
|
|
+ "fmt"
|
|
"github.com/sensitive"
|
|
"github.com/sensitive"
|
|
"gopkg.in/mgo.v2/bson"
|
|
"gopkg.in/mgo.v2/bson"
|
|
db "jy/mongodbutil"
|
|
db "jy/mongodbutil"
|
|
@@ -112,7 +113,12 @@ type ExtractTask struct {
|
|
DistrictSimGet *ju.DFA //区或县简称
|
|
DistrictSimGet *ju.DFA //区或县简称
|
|
StreetGet *ju.DFA //街道
|
|
StreetGet *ju.DFA //街道
|
|
|
|
|
|
- XjbtCityArr []map[string]interface{} //新疆兵团相关数据
|
|
|
|
|
|
+ XjbtCityArr []map[string]interface{} //新疆兵团相关数据
|
|
|
|
+ //标准化地域信息
|
|
|
|
+ S_ProvinceDict map[string][]S_Province //标准省份-map
|
|
|
|
+ S_CityDict map[string][]S_City //标准城市-map
|
|
|
|
+ S_DistrictDict map[string][]S_District //标准区县-map
|
|
|
|
+
|
|
SensitiveFullCity *sensitive.Filter
|
|
SensitiveFullCity *sensitive.Filter
|
|
SensitiveSimCity *sensitive.Filter
|
|
SensitiveSimCity *sensitive.Filter
|
|
SensitiveFullDistrict *sensitive.Filter
|
|
SensitiveFullDistrict *sensitive.Filter
|
|
@@ -995,10 +1001,9 @@ func InitProvince(version string) map[string]interface{} {
|
|
//加载所有
|
|
//加载所有
|
|
func InitProvincesx() []map[string]interface{} {
|
|
func InitProvincesx() []map[string]interface{} {
|
|
defer qu.Catch()
|
|
defer qu.Catch()
|
|
- provinces := make([]map[string]interface{}, 0)
|
|
|
|
- ju.AddrsSess.Find(map[string]interface{}{
|
|
|
|
|
|
+ provinces, _ := ju.Qyxy_Mgo.Find("address_new_2020", map[string]interface{}{
|
|
"Remarks": nil,
|
|
"Remarks": nil,
|
|
- }).All(&provinces)
|
|
|
|
|
|
+ }, nil, nil)
|
|
return provinces
|
|
return provinces
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1033,6 +1038,61 @@ func (e *ExtractTask) InitXjbtCityInfo() {
|
|
e.XjbtCityArr = arr
|
|
e.XjbtCityArr = arr
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+//初始化标准地域信息
|
|
|
|
+func (e *ExtractTask) InitRegionInfo() {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ e.S_ProvinceDict = make(map[string][]S_Province, 0)
|
|
|
|
+ e.S_CityDict = make(map[string][]S_City, 0)
|
|
|
|
+ e.S_DistrictDict = make(map[string][]S_District, 0)
|
|
|
|
+ q := map[string]interface{}{
|
|
|
|
+ "town_code": map[string]interface{}{
|
|
|
|
+ "$exists": 0,
|
|
|
|
+ },
|
|
|
|
+ }
|
|
|
|
+ dataArr, _ := ju.Qyxy_Mgo.Find("address_jy_2022", q, nil, nil)
|
|
|
|
+ for _, tmp := range dataArr {
|
|
|
|
+ district_code := qu.IntAll(tmp["district_code"])
|
|
|
|
+ city_code := qu.IntAll(tmp["city_code"])
|
|
|
|
+ if district_code > 0 {
|
|
|
|
+ province := qu.ObjToString(tmp["province"])
|
|
|
|
+ city := qu.ObjToString(tmp["city"])
|
|
|
|
+ district := qu.ObjToString(tmp["district"])
|
|
|
|
+ data := S_District{province, city, district}
|
|
|
|
+ if e.S_DistrictDict[district] == nil {
|
|
|
|
+ e.S_DistrictDict[district] = []S_District{data}
|
|
|
|
+ } else {
|
|
|
|
+ arr := e.S_DistrictDict[district]
|
|
|
|
+ arr = append(arr, data)
|
|
|
|
+ e.S_DistrictDict[district] = arr
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ if city_code > 0 {
|
|
|
|
+ province := qu.ObjToString(tmp["province"])
|
|
|
|
+ city := qu.ObjToString(tmp["city"])
|
|
|
|
+ data := S_City{province, city}
|
|
|
|
+ if e.S_CityDict[city] == nil {
|
|
|
|
+ e.S_CityDict[city] = []S_City{data}
|
|
|
|
+ } else {
|
|
|
|
+ arr := e.S_CityDict[city]
|
|
|
|
+ arr = append(arr, data)
|
|
|
|
+ e.S_CityDict[city] = arr
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ province := qu.ObjToString(tmp["province"])
|
|
|
|
+ data := S_Province{province}
|
|
|
|
+ if e.S_ProvinceDict[province] == nil {
|
|
|
|
+ e.S_ProvinceDict[province] = []S_Province{data}
|
|
|
|
+ } else {
|
|
|
|
+ arr := e.S_ProvinceDict[province]
|
|
|
|
+ arr = append(arr, data)
|
|
|
|
+ e.S_ProvinceDict[province] = arr
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ log.Debug(fmt.Sprintf("城市配置加载完毕...省~%d 市~%d 区~%d", len(e.S_ProvinceDict), len(e.S_CityDict), len(e.S_DistrictDict)))
|
|
|
|
+}
|
|
|
|
+
|
|
//站点加载...
|
|
//站点加载...
|
|
func (e *ExtractTask) InitUpdateSite() {
|
|
func (e *ExtractTask) InitUpdateSite() {
|
|
defer qu.Catch()
|
|
defer qu.Catch()
|
|
@@ -1056,15 +1116,16 @@ func (e *ExtractTask) InitCityInfo() {
|
|
e.InitVar() //初始化变量
|
|
e.InitVar() //初始化变量
|
|
//新疆兵团数据
|
|
//新疆兵团数据
|
|
e.InitXjbtCityInfo()
|
|
e.InitXjbtCityInfo()
|
|
|
|
+ //标准地域信息
|
|
|
|
+ e.InitRegionInfo()
|
|
//site站点信息
|
|
//site站点信息
|
|
e.InitUpdateSite()
|
|
e.InitUpdateSite()
|
|
//初始化省信息
|
|
//初始化省信息
|
|
fn1 := InitProvince(e.TaskInfo.Version)
|
|
fn1 := InitProvince(e.TaskInfo.Version)
|
|
for k, v := range fn1 {
|
|
for k, v := range fn1 {
|
|
- for _, p := range v.([]interface{}) {
|
|
|
|
- p1, _ := p.(string)
|
|
|
|
- e.Trie_Full_Province.AddWords(p1) //华中科技大学
|
|
|
|
- e.ProvinceMap[p1] = k //华中科技大学:湖北
|
|
|
|
|
|
+ for _, p := range ju.ConvertInterface(v) {
|
|
|
|
+ e.Trie_Full_Province.AddWords(p) //华中科技大学
|
|
|
|
+ e.ProvinceMap[p] = k //华中科技大学:湖北
|
|
}
|
|
}
|
|
}
|
|
}
|
|
alldata := InitProvincesx()
|
|
alldata := InitProvincesx()
|
|
@@ -1123,10 +1184,9 @@ func (e *ExtractTask) InitCityInfo() {
|
|
e.Trie_Sim_Province.AddWords(jc_province) //加入省简称Trie(k:浙江)
|
|
e.Trie_Sim_Province.AddWords(jc_province) //加入省简称Trie(k:浙江)
|
|
e.ProvinceMap[all_province] = jc_province //浙江省:浙江
|
|
e.ProvinceMap[all_province] = jc_province //浙江省:浙江
|
|
e.ProvinceBriefMap[jc_province] = p //浙江:省信息{}
|
|
e.ProvinceBriefMap[jc_province] = p //浙江:省信息{}
|
|
- if province_alias, ok := provinces["province_alias"].([]interface{}); ok {
|
|
|
|
- for _, vprovince_alias := range province_alias {
|
|
|
|
- e.ProvinceBriefMap[qu.ObjToString(vprovince_alias)] = p
|
|
|
|
- }
|
|
|
|
|
|
+ province_alias := ju.ConvertInterface(provinces["province_alias"])
|
|
|
|
+ for _, vprovince_alias := range province_alias {
|
|
|
|
+ e.ProvinceBriefMap[qu.ObjToString(vprovince_alias)] = p
|
|
}
|
|
}
|
|
//加载市信息
|
|
//加载市信息
|
|
citys := citys_maps[jc_province]
|
|
citys := citys_maps[jc_province]
|
|
@@ -1147,17 +1207,17 @@ func (e *ExtractTask) InitCityInfo() {
|
|
e.CityBriefMap[c.Brief] = c //杭州:市信息{}
|
|
e.CityBriefMap[c.Brief] = c //杭州:市信息{}
|
|
}
|
|
}
|
|
c.P = p
|
|
c.P = p
|
|
- if city_alias, ok := vcity["city_alias"].([]interface{}); ok {
|
|
|
|
- for _, vcity_alias := range city_alias {
|
|
|
|
- strvcity_alias := qu.ObjToString(vcity_alias)
|
|
|
|
- if isok[jc_province+"_"+strvcity_alias] {
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
- e.CityBriefMap[strvcity_alias] = c
|
|
|
|
- e.initDistricts(jc_province, strvcity_alias, c, jc_city, districts_maps, towns_maps, jwhs_maps)
|
|
|
|
- isok[jc_province+"_"+strvcity_alias] = true
|
|
|
|
|
|
+ city_alias := ju.ConvertInterface(vcity["city_alias"])
|
|
|
|
+ for _, vcity_alias := range city_alias {
|
|
|
|
+ strvcity_alias := qu.ObjToString(vcity_alias)
|
|
|
|
+ if isok[jc_province+"_"+strvcity_alias] {
|
|
|
|
+ continue
|
|
}
|
|
}
|
|
|
|
+ e.CityBriefMap[strvcity_alias] = c
|
|
|
|
+ e.initDistricts(jc_province, strvcity_alias, c, jc_city, districts_maps, towns_maps, jwhs_maps)
|
|
|
|
+ isok[jc_province+"_"+strvcity_alias] = true
|
|
}
|
|
}
|
|
|
|
+
|
|
if isok[jc_province+"_"+qc_city] {
|
|
if isok[jc_province+"_"+qc_city] {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
@@ -1203,17 +1263,17 @@ func (e *ExtractTask) initDistricts(jc_province string, qc_city string, c *City,
|
|
} else {
|
|
} else {
|
|
e.DistrictCityMap[qc_district] = append(e.DistrictCityMap[qc_district], c)
|
|
e.DistrictCityMap[qc_district] = append(e.DistrictCityMap[qc_district], c)
|
|
}
|
|
}
|
|
- if district_alias, ok := vdistricts["district_alias"].([]interface{}); ok {
|
|
|
|
- for _, vdistrict_alias := range district_alias {
|
|
|
|
- strvdistrict_alias := qu.ObjToString(vdistrict_alias)
|
|
|
|
- e.Trie_Full_District.AddWords(strvdistrict_alias) //加入区或县全称Trie
|
|
|
|
- c_tmp := e.DistrictCityMap[strvdistrict_alias]
|
|
|
|
- if len(c_tmp) == 0 {
|
|
|
|
- tmpcarr := []*City{c}
|
|
|
|
- e.DistrictCityMap[strvdistrict_alias] = tmpcarr
|
|
|
|
- } else {
|
|
|
|
- e.DistrictCityMap[strvdistrict_alias] = append(e.DistrictCityMap[strvdistrict_alias], c)
|
|
|
|
- }
|
|
|
|
|
|
+
|
|
|
|
+ district_alias := ju.ConvertInterface(vdistricts["district_alias"])
|
|
|
|
+ for _, vdistrict_alias := range district_alias {
|
|
|
|
+ strvdistrict_alias := qu.ObjToString(vdistrict_alias)
|
|
|
|
+ e.Trie_Full_District.AddWords(strvdistrict_alias) //加入区或县全称Trie
|
|
|
|
+ c_tmp := e.DistrictCityMap[strvdistrict_alias]
|
|
|
|
+ if len(c_tmp) == 0 {
|
|
|
|
+ tmpcarr := []*City{c}
|
|
|
|
+ e.DistrictCityMap[strvdistrict_alias] = tmpcarr
|
|
|
|
+ } else {
|
|
|
|
+ e.DistrictCityMap[strvdistrict_alias] = append(e.DistrictCityMap[strvdistrict_alias], c)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//街道乡镇
|
|
//街道乡镇
|
|
@@ -1281,6 +1341,12 @@ func (e *ExtractTask) InitVar() {
|
|
e.StreetDistrictMap = make(map[string][]*District)
|
|
e.StreetDistrictMap = make(map[string][]*District)
|
|
//新疆兵团-数组
|
|
//新疆兵团-数组
|
|
e.XjbtCityArr = make([]map[string]interface{}, 0)
|
|
e.XjbtCityArr = make([]map[string]interface{}, 0)
|
|
|
|
+
|
|
|
|
+ //标准化地域信息
|
|
|
|
+ e.S_ProvinceDict = make(map[string][]S_Province, 0)
|
|
|
|
+ e.S_CityDict = make(map[string][]S_City, 0)
|
|
|
|
+ e.S_DistrictDict = make(map[string][]S_District, 0)
|
|
|
|
+
|
|
//敏感词-筛选
|
|
//敏感词-筛选
|
|
e.SensitiveFullCity = sensitive.New()
|
|
e.SensitiveFullCity = sensitive.New()
|
|
e.SensitiveSimCity = sensitive.New()
|
|
e.SensitiveSimCity = sensitive.New()
|