package extract import ( "fmt" "log" qu "qfw/util" "strings" ) //省 type Province struct { Name string Brief string Cap string Captial *City } //市 type City struct { Name string Brief string P *Province } //区或县 type District struct { Name string C *City } //街道 type Street struct { Name string D *District } //敏感词 type DFA struct { Link map[string]interface{} } var SortField []string func init() { qu.ReadConfig("./extractcity.json", &SortField) } func (e *ExtractTask) TransmitData(resulttmp map[string]interface{}, id string) (bres bool, p, c, d string) { defer qu.Catch() province := fmt.Sprint(resulttmp["area"]) city := fmt.Sprint(resulttmp["city"]) fieldval := make([]string, 0) for _, f := range SortField { // val := resulttmp[f] if val == nil { fieldval = append(fieldval, "") } else { fieldval = append(fieldval, fmt.Sprint(val)) } } //log.Println("field========", fieldval) bres, c, p = e.ExtractProvinceCity(province, city, id, fieldval) //抽取省和市 //log.Println("b--------", bres, "p---------", p, "c-------------", c) bres, p, c, d = e.ExtractDistrict(fieldval, bres, c, p, id) //抽取区或县 //log.Println("bres========", bres, "p===========", p, "c=========", c, "d=============", d) return } //抽取区或县(从配置的字段信息中抽取区或县) func (e *ExtractTask) ExtractDistrict(field []string, bres bool, c, p, id string) (bool, string, string, string) { d := "" for _, str := range field { //log.Println("field===========", str) for pos, GET := range []DFA{e.AreaDistrict, e.AreaStreet} { //先匹配区或县再匹配街道 word := GET.CheckSensitiveWord(str) //log.Println("word================", word) if word != "" { if pos == 0 { //区或县匹配 //log.Println("县直接匹配到====", word) lock.Lock() city := e.DistrictCityMap[word] lock.Unlock() //log.Println("city================", city) if city != nil { d = word ctmp := city.Brief ptmp := city.P.Brief //log.Println("ctmpptmp================", ptmp, ctmp) if !bres { //城市省份没有抽到,通过区或县定位市和省 c = ctmp p = ptmp bres = true } else { //对比抽到的城市省份是否一致 if c != ctmp || p != ptmp { //log.Println("str---", str, "====", word) //log.Println("district: City And Province, Inconsistent Before And After,Id:", id, c, p, ctmp, ptmp, d) c = ctmp p = ptmp } } } } else { //街道匹配 //log.Println("匹配到街道====", word) lock.Lock() district := e.StreetDistrictMap[word] lock.Unlock() //log.Println("district================", district) if district != nil { d = district.Name ctmp := district.C.Brief ptmp := district.C.P.Brief //log.Println("districtptmp================", ctmp, ptmp) if !bres { //城市省份没有抽到,通过区或县定位市和省 c = ctmp p = ptmp bres = true } else { //对比抽到的城市省份是否一致 if c != ctmp || p != ptmp { //log.Println("street: City And Province, Inconsistent Before And After,Id:", id, c, p, ctmp, ptmp, d) c = ctmp p = ptmp } } } } return bres, p, c, d } } } return bres, p, c, d } //抽取城市、省份 func (e *ExtractTask) ExtractProvinceCity(province, city, id string, text []string) (bres bool, c, p string) { defer qu.Catch() bc := true //是否继续抽取 if city != "" { lock.Lock() citybrief := e.CityBrief[city] //log.Println("citybrief========", citybrief) lock.Unlock() if citybrief == nil { //简称不存在 log.Println("city err:", city, id) } else { //简称存在 lock.Lock() pbrief := e.CityBrief[city].P.Brief //log.Println("pbrief========", pbrief) lock.Unlock() if province != pbrief { //省份不配对 log.Println("province err:", city, province, id) } else { bc = false //城市省份都正确 } } } //有省份 bp := false lock.Lock() provincebrief := e.ProvinceBrief[province] //log.Println("provincebrief========", provincebrief) lock.Unlock() if provincebrief != nil { //省份简称正确 bp = true } else { //没有省份,先识别省份 for _, str := range text { //没有省的简称,从配置的字段信息中抽取省 word := e.AreaProvinceGet.CheckSensitiveWord(str) //省全称DFA中匹配 if word != "" { lock.Lock() province = e.ProvinceMap[word] lock.Unlock() bp = true break } } } //匹配城市 if bc { //城市简称不存在CityBrief[city]==nil,或城市简称存在但省份不配对,继续抽取 for pos, GET := range []DFA{e.AreaGet, e.AreaSimGet} { //AreaGet市全称,AreaSimGet省全称和简称 ws := make([]string, 5) for n, str := range text { if str != "" { word := GET.CheckSensitiveWord(str) if pos == 1 { //用简称 后辍为路、集团替换 str1 := strings.Replace(str, word+"路", "", 1) if str1 != str { word = GET.CheckSensitiveWord(str1) } } ws[n] = word if word != "" { lock.Lock() res := e.AreaToCity[word] lock.Unlock() if len(res) == 1 { //判断省份 if !bp || province == res[0].P.Brief { //省份不存在或一致直接返回(!bp:省的简称) bres = true c = res[0].Brief p = res[0].P.Brief break } else { //不一致时。。暂时不处理 } } else { //多个时(出现这种情况是多个省中的市,市名相同。现在的配置文件中已经将市名,县名重复的全部去掉) } } } } if !bres { //没有匹配到 mc := map[string]int{} for _, w := range ws { lock.Lock() res := e.AreaToCity[w] lock.Unlock() for _, ct := range res { if ct == nil { continue } if bp { //有省份 if ct.P != nil && ct.P.Brief == province { mc[ct.Brief]++ } } else { //没有省份 mc[ct.Brief]++ } } } //计算mc中最大值且大于1 max := 1 v := "" for mk, mv := range mc { if mv > max { v = mk } } if v != "" { bres = true lock.Lock() ctb := e.CityBrief[v] lock.Unlock() c = ctb.Brief p = ctb.P.Brief } else if len(mc) > 0 { //取级别更大的 v := "" for mk, _ := range mc { lock.Lock() cb := e.CityBrief[mk] lock.Unlock() if cb.P.Cap == mk { bres = true c = cb.Brief p = cb.P.Brief break } else { v = mk } } if !bres { bres = true lock.Lock() cbb := e.CityBrief[v] c = cbb.Brief p = cbb.P.Brief lock.Unlock() } } } if bres { break } } } else { return } if !bres { //取默认省会 lock.Lock() pbp := e.ProvinceBrief[province] lock.Unlock() if pbp != nil { bres = true c = pbp.Cap p = province } } return } func (d *DFA) AddWord(keys ...string) { d.AddWordAll(true, keys...) } func (d *DFA) AddWordAll(haskey bool, keys ...string) { if d.Link == nil { d.Link = make(map[string]interface{}) } for _, key := range keys { nowMap := &d.Link for i := 0; i < len(key); i++ { kc := key[i : i+1] if v, ok := (*nowMap)[kc]; ok { nowMap, _ = v.(*map[string]interface{}) } else { newMap := map[string]interface{}{} newMap["YN"] = "0" (*nowMap)[kc] = &newMap nowMap = &newMap } if i == len(key)-1 { (*nowMap)["YN"] = "1" if haskey { (*nowMap)["K"] = key } } } } } func (d *DFA) CheckSensitiveWord(src string) string { pos := 0 nowMap := &d.Link res := "" for i := 0; i < len(src); i++ { word := src[i : i+1] nowMap, _ = (*nowMap)[word].(*map[string]interface{}) if nowMap != nil { // 存在,则判断是否为最后一个 if pos == 0 { pos = i } if "1" == qu.ObjToString((*nowMap)["YN"]) { // 如果为最后一个匹配规则,结束循环,返回匹配标识数 res = qu.ObjToString((*nowMap)["K"]) //pos = 0 //break } } else { if res != "" { break } else { nowMap = &d.Link if pos > 0 { i = pos pos = 0 } } } } return res }