|
@@ -2,7 +2,6 @@
|
|
|
package extract
|
|
|
|
|
|
import (
|
|
|
- "fmt"
|
|
|
db "jy/mongodbutil"
|
|
|
"log"
|
|
|
qu "qfw/util"
|
|
@@ -379,7 +378,13 @@ func (e *ExtractTask) InitProvince() {
|
|
|
list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
for _, v := range *list {
|
|
|
name := qu.ObjToString(v["s_name"])
|
|
|
- fn[name] = qu.ObjArrToStringArr(v["content"].([]interface{}))
|
|
|
+ content := v["content"]
|
|
|
+ switch content.(type) {
|
|
|
+ case string:
|
|
|
+ fn[name] = []interface{}{content.(string)}
|
|
|
+ case []interface{}:
|
|
|
+ fn[name] = content
|
|
|
+ }
|
|
|
}
|
|
|
ProviceConfig = fn
|
|
|
}
|
|
@@ -401,9 +406,9 @@ func (e *ExtractTask) InitCitySim() {
|
|
|
func (e *ExtractTask) InitCityAll() {
|
|
|
defer qu.Catch()
|
|
|
list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
|
|
|
- if len(*list) != 34 {
|
|
|
- fmt.Println("加载城市配置文件出错", len(*list))
|
|
|
- }
|
|
|
+ // if len(*list) != 34 {
|
|
|
+ // fmt.Println("加载城市配置文件出错", len(*list))
|
|
|
+ // }
|
|
|
fn := map[string]map[string]interface{}{}
|
|
|
for _, v := range *list {
|
|
|
name := qu.ObjToString(v["s_name"])
|
|
@@ -414,25 +419,31 @@ func (e *ExtractTask) InitCityAll() {
|
|
|
}
|
|
|
|
|
|
//初始化城市省份敏感词
|
|
|
-func InitDfa() {
|
|
|
+func InitDFA() {
|
|
|
AreaGet = DFA{}
|
|
|
AreaProvinceGet = DFA{}
|
|
|
for k, v := range ProviceConfig {
|
|
|
+ log.Println(k, "----------", v)
|
|
|
for _, p := range v.([]interface{}) {
|
|
|
+ log.Println("ppppp", p)
|
|
|
p1, _ := p.(string)
|
|
|
AreaProvinceGet.AddWord(p1)
|
|
|
ProvinceMap[p1] = k
|
|
|
}
|
|
|
}
|
|
|
+ log.Println("ProvinceMap11----", ProvinceMap)
|
|
|
for k, v := range CityAllConfig {
|
|
|
- AreaProvinceGet.AddWord(k)
|
|
|
+ AreaProvinceGet.AddWord(k) //省全称
|
|
|
p := &Province{}
|
|
|
p.Name = k
|
|
|
p.Brief = v["brief"].(string)
|
|
|
ProvinceMap[k] = p.Brief
|
|
|
+ log.Println("ProvinceMap22----", ProvinceMap)
|
|
|
ProvinceBrief[p.Brief] = p
|
|
|
p.Cap = v["captial"].(string)
|
|
|
+ log.Println("ProvinceBrief11====", p.Brief, ProvinceBrief[p.Brief].Name, ProvinceBrief[p.Brief].Brief, "==", ProvinceBrief[p.Brief].Cap)
|
|
|
city, _ := v["city"].(map[string]interface{})
|
|
|
+ log.Println("======================================================")
|
|
|
for k1, v1 := range city {
|
|
|
v1m, _ := v1.(map[string]interface{})
|
|
|
c := &City{}
|
|
@@ -447,15 +458,17 @@ func InitDfa() {
|
|
|
if c.Brief == p.Cap {
|
|
|
p.Captial = c
|
|
|
}
|
|
|
+ log.Println("CityBrief11+++", k1, "---", CityBrief[c.Brief].Name, CityBrief[c.Brief].Brief, "===", CityBrief[c.Brief].P.Captial, "===", CityBrief[c.Brief].P.Name)
|
|
|
//加入到城市map中
|
|
|
cs := AreaToCity[k1]
|
|
|
- AreaGet.AddWord(k1)
|
|
|
+ AreaGet.AddWord(k1) //市全称
|
|
|
if cs != nil {
|
|
|
cs = append(cs, c)
|
|
|
} else {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
AreaToCity[k1] = cs
|
|
|
+ log.Println("市---", k1, AreaToCity[k1][0].Brief, AreaToCity[k1][0].Name, AreaToCity[k1][0].P.Name)
|
|
|
|
|
|
/*
|
|
|
AreaToCity["衢州市"] = []interface{}{
|
|
@@ -471,25 +484,28 @@ func InitDfa() {
|
|
|
for _, k2 := range arr {
|
|
|
s := k2.(string)
|
|
|
cs := AreaToCity[s]
|
|
|
- AreaGet.AddWord(s)
|
|
|
+ AreaGet.AddWord(s) //街道全称
|
|
|
if cs != nil {
|
|
|
cs = append(cs, c)
|
|
|
} else {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
AreaToCity[s] = cs
|
|
|
+ log.Println("街道===", k2, AreaToCity)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ log.Println("======================================================")
|
|
|
//加载简称
|
|
|
AreaSimGet = DFA{}
|
|
|
//util.ReadConfig("./city_sim.json", &CitySimConfig)
|
|
|
- if len(CitySimConfig) != 34 {
|
|
|
- log.Println("加载简称配置文件出错", len(CitySimConfig))
|
|
|
- }
|
|
|
+ // if len(CitySimConfig) != 34 {
|
|
|
+ // log.Println("加载简称配置文件出错", len(CitySimConfig))
|
|
|
+ // }
|
|
|
for k, v := range CitySimConfig {
|
|
|
pb := v["brief"].(string)
|
|
|
p := ProvinceBrief[pb]
|
|
|
+ log.Println("++++++++++++++++++", p)
|
|
|
//加载
|
|
|
for _, ss := range []string{k, pb} {
|
|
|
cs := AreaToCity[ss]
|
|
@@ -499,7 +515,8 @@ func InitDfa() {
|
|
|
cs = []*City{p.Captial}
|
|
|
}
|
|
|
AreaToCity[ss] = cs
|
|
|
- AreaSimGet.AddWord(ss)
|
|
|
+ log.Println("+++", ss, AreaToCity)
|
|
|
+ AreaSimGet.AddWord(ss) //省全称和省简称
|
|
|
}
|
|
|
city, _ := v["city"].(map[string]interface{})
|
|
|
for k1, v1 := range city {
|
|
@@ -510,7 +527,7 @@ func InitDfa() {
|
|
|
cb := v1m["brief"].(string)
|
|
|
c := AreaToCity[k1][0]
|
|
|
//加入到城市map中
|
|
|
- for _, ss := range []string{cb, k + cb, pb + cb} {
|
|
|
+ for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州 浙江杭州
|
|
|
AreaSimGet.AddWord(ss)
|
|
|
cs := AreaToCity[ss]
|
|
|
if cs != nil {
|
|
@@ -519,11 +536,12 @@ func InitDfa() {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
AreaToCity[ss] = cs
|
|
|
+ log.Println("+-+-", ss, AreaToCity)
|
|
|
}
|
|
|
arr := v1m["area"].([]interface{})
|
|
|
for _, k2 := range arr {
|
|
|
s := k2.(string)
|
|
|
- for _, ss := range []string{s, cb + s, pb + s, k + s} {
|
|
|
+ for _, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安
|
|
|
cs := AreaToCity[ss]
|
|
|
AreaSimGet.AddWord(ss)
|
|
|
if cs != nil {
|
|
@@ -532,10 +550,12 @@ func InitDfa() {
|
|
|
cs = []*City{c}
|
|
|
}
|
|
|
AreaToCity[ss] = cs
|
|
|
+ log.Println("-+-+", ss, AreaToCity)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ log.Println(AreaToCity)
|
|
|
}
|
|
|
|
|
|
func (d *DFA) AddWord(keys ...string) {
|
|
@@ -568,7 +588,6 @@ func (d *DFA) AddWordAll(haskey bool, keys ...string) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//匹配最长
|
|
|
func (d *DFA) CheckSensitiveWord(src string) string {
|
|
|
pos := 0
|
|
|
nowMap := &d.Link
|