package extract import ( //"encoding/json" "fmt" "jy/clear" db "jy/mongodbutil" "jy/pretreated" ju "jy/util" "log" qu "qfw/util" "regexp" "strconv" "strings" "sync" "time" "gopkg.in/mgo.v2/bson" ) var ( lock sync.RWMutex cut = ju.NewCut() //获取正文并清理 ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志 TaskList map[string]*ExtractTask //任务列表 saveLimit = 200 //抽取日志批量保存 AreaGet DFA //敏感词 AreaProvinceGet DFA //敏感词 AreaSimGet DFA //敏感词 Fields = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"area":1,"city":1}` ) var CitySimConfig map[string]map[string]interface{} = make(map[string]map[string]interface{}) //城市简称 var CityAllConfig map[string]map[string]interface{} = make(map[string]map[string]interface{}) //城市全称 var ProviceConfig map[string]interface{} = make(map[string]interface{}) //省份 var ProvinceMap map[string]string = make(map[string]string) var CityBrief map[string]*City = make(map[string]*City) //只加载一次即可 var ProvinceBrief map[string]*Province = make(map[string]*Province) //只加载一次 var AreaToCity map[string][]*City = make(map[string][]*City) //两个文件共用 //启动测试抽取 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool { defer qu.Catch() ext := &ExtractTask{} ext.Id = taskId ext.IsRun = true ext.InitTestTaskInfo(resultcoll, trackcoll) ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB) ext.InitRulePres() ext.InitRuleBacks() ext.InitRuleCore() ext.InitTag() ext.InitClearFn() return RunExtractTestTask(ext, startId, num) } func IdTrans(startId string) bson.ObjectId { defer qu.Catch() return bson.ObjectIdHex(startId) } //开始测试任务抽取 func RunExtractTestTask(ext *ExtractTask, startId, num string) bool { n, _ := strconv.Atoi(num) id := IdTrans(startId) if id.Valid() { query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(startId)}} list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, n) for _, v := range *list { j := PreInfo(v) ext.TaskInfo.ProcessPool <- true go ext.ExtractProcess(j) } return true } else { return false } } //启动抽取 func StartExtractTaskId(taskId string) bool { isgo := false ext := TaskList[taskId] if ext == nil { ext = &ExtractTask{} ext.Id = taskId ext.InitTaskInfo() isgo = true } else { ext.Id = taskId ext.InitTaskInfo() } ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB) ext.InitRulePres() ext.InitRuleBacks() ext.InitRuleCore() ext.InitTag() ext.InitClearFn() // ext.InitProvince() // ext.InitCityAll() // ext.InitCitySim() ext.IsRun = true if isgo { go RunExtractTask(taskId) } TaskList[taskId] = ext return true } //停止抽取 func StopExtractTaskId(taskId string) bool { ext := TaskList[taskId] if ext != nil { ext.IsRun = false TaskList[taskId] = ext } //更新task.s_extlastid db.Mgo.UpdateById("task", taskId, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`) return true } //开始抽取 func RunExtractTask(taskId string) { ext := TaskList[taskId] query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}} list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, -1, -1) for k, v := range *list { log.Println(k, v["_id"]) if !ext.IsRun { break } j := PreInfo(v) ext.TaskInfo.ProcessPool <- true go ext.ExtractProcess(j) ext.TaskInfo.LastExtId = qu.BsonIdToSId(v["_id"]) } //更新task.s_extlastid db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`) time.AfterFunc(1*time.Minute, func() { RunExtractTask(taskId) }) } //信息预处理 func PreInfo(doc map[string]interface{}) *ju.Job { detail := "" d1, _ := doc["detail"].(string) d2, _ := doc["contenthtml"].(string) if len(d1) >= len(d2) || d2 == "" { detail = d1 } else { detail = d2 } detail = ju.CutLableStr(detail) detail = cut.ClearHtml(detail) doc["detail"] = detail href := qu.ObjToString(doc["href"]) if strings.HasPrefix(href, "http://") { href = href[7:] } else if strings.HasPrefix(href, "https://") { href = href[8:] } pos := strings.Index(href, "/") if pos > 0 { href = href[:pos] } doc["domain"] = href toptype := qu.ObjToString(doc["toptype"]) if qu.ObjToString(doc["type"]) == "bid" { toptype = "结果" } if toptype == "" { toptype = "*" } j := &ju.Job{ SourceMid: qu.BsonIdToSId(doc["_id"]), Category: toptype, Content: qu.ObjToString(doc["detail"]), SpiderCode: qu.ObjToString(doc["spidercode"]), Domain: qu.ObjToString(doc["domain"]), Href: qu.ObjToString(doc["href"]), Title: qu.ObjToString(doc["title"]), Data: &doc, City: qu.ObjToString(doc["city"]), Province: qu.ObjToString(doc["area"]), Result: map[string][]*ju.ExtField{}, //BuyerAddr: qu.ObjToString(doc["buyeraddr"]), } pretreated.AnalyStart(j) return j } //抽取 func (e *ExtractTask) ExtractProcess(j *ju.Job) { qu.Catch() qu.Try(func() { doc := *j.Data //全局前置规则,结果覆盖doc属性 for _, v := range e.RulePres { doc = ExtRegPre(doc, j, v, e.TaskInfo) } //log.Println("全局前置规则", doc) //抽取规则 for _, vc := range e.RuleCores { tmp := ju.DeepCopy(doc).(map[string]interface{}) //是否进入逻辑 if !ju.Logic(vc.LuaLogic, tmp) { continue } //抽取-前置规则 for _, v := range vc.RulePres { tmp = ExtRegPre(tmp, j, v, e.TaskInfo) } //log.Println("抽取-前置规则", tmp) //抽取-规则 for _, v := range vc.RuleCores { ExtRegCore(vc.ExtFrom, tmp, j, v, e) } //log.Println("抽取-规则", tmp) //抽取-后置规则 for _, v := range vc.RuleBacks { ExtRegBack(j, v, e.TaskInfo) } //log.Println("抽取-后置规则", tmp) } //全局后置规则 for _, v := range e.RuleBacks { ExtRegBack(j, v, e.TaskInfo) } //函数清理 for key, val := range j.Result { for _, v := range val { data := clear.DoClearFn(e.ClearFn[key], []interface{}{v.Value, j.Content}) v.Value = data[0] } } //bs, _ := json.Marshal(j.Result) //log.Println("抽取结果", j.Title, j.SourceMid, string(bs)) //抽取省份城市县 //fmt.Println("-----------", j.Province, j.City, j.BuyerAddr, j.Title) //j.Address //ExtractPC(j.Result, j.Province, j.City, j.Title, j.BuyerAddr, j.SourceMid) //j.Address ExtractPC2(j.Result, "Province", "City", "Title", "Addr", j.SourceMid) //分析抽取结果并保存 todo AnalysisSaveResult(j.Data, j.Result, e.TaskInfo) }, func(err interface{}) { log.Println(err) <-e.TaskInfo.ProcessPool }) <-e.TaskInfo.ProcessPool } //前置过滤 func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInfo) map[string]interface{} { before := ju.DeepCopy(doc).(map[string]interface{}) extinfo := map[string]interface{}{} if in.IsLua { lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText} if j != nil { lua.Block = j.Block } extinfo = lua.RunScript("pre") for k, v := range extinfo { //结果覆盖原doc doc[k] = v } AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志 } else { key := qu.If(in.Field == "", "detail", in.Field).(string) text := qu.ObjToString(doc[key]) extinfo[key] = in.RegPreBac.Reg.ReplaceAllString(text, "") doc[key] = extinfo[key] //结果覆盖原doc AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志 } return doc } //抽取-规则 func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) { if in.IsLua { lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText} if in.IsHasFields { //lua脚本配置有属性字段 lua.KvMap = getKvByLuaFields(extfrom, j, in, et.Tag) } else { lua.KvMap = map[string][]map[string]interface{}{} } lua.Block = j.Block extinfo := lua.RunScript("core") for k, v := range extinfo { if j.Result[k] == nil { j.Result[k] = [](*ju.ExtField){} } if tmps, ok := v.([]map[string]interface{}); ok { for _, tmp := range tmps { j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"]}) } } } if len(extinfo) > 0 { AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志 } } else { //全文正则 text := qu.ObjToString(doc[extfrom]) if in.Field != "" { extinfo := extRegCoreToResult(extfrom, text, j, in) if len(extinfo) > 0 { AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志 } } } } //lua脚本根据属性设置提取kv值 func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} { kvmap := map[string][]map[string]interface{}{} for _, vv := range in.LFields { field := qu.ObjToString(vv) tags := t[qu.ObjToString(vv)] //获取对应标签库 for _, bl := range j.Block { //冒号kv if bl.ColonKV != nil { kvs := bl.ColonKV.Kvs kvs2 := bl.ColonKV.Kvs_2 for _, tag := range tags { for _, kv := range kvs { if tag.Type == "string" { if kv.Key == tag.Key { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon1", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(kv.Key) { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon1", "matchtype": "tag_regexp", }) } break } } } for _, kv := range kvs2 { if tag.Type == "string" { if kv.Key == tag.Key { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon2", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(kv.Key) { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "colon2", "matchtype": "tag_regexp", }) } break } } } } } //空格kv if bl.SpaceKV != nil { kvs := bl.SpaceKV.Kvs for _, tag := range tags { for _, kv := range kvs { if tag.Type == "string" { if kv.Key == tag.Key { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "space", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(kv.Key) { text := ju.TrimLRSpace(kv.Value, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "space", "matchtype": "tag_regexp", }) } break } } } } } //表格kv if bl.TableKV != nil { kv := bl.TableKV.Kv for _, tag := range tags { for k, val := range kv { if tag.Type == "string" { if k == tag.Key { text := ju.TrimLRSpace(val, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "table", "matchtype": "tag_string", }) } break } } else if tag.Type == "regexp" { if tag.Reg.MatchString(k) { text := ju.TrimLRSpace(val, "") if text != "" { kvmap[field] = append(kvmap[field], map[string]interface{}{ "field": field, "code": in.Code, "ruletext": tag.Key, "extfrom": extfrom, "value": text, "type": "table", "matchtype": "tag_regexp", }) } break } } } } } } } return kvmap } //正则提取结果 func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[string][]map[string]interface{} { extinfo := map[string][]map[string]interface{}{} if v.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线) apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1) if len(apos) > 0 { pos := apos[0] for k, p := range v.RegCore.ExtractPos { if len(pos) > p { if pos[p] == -1 || pos[p+1] == -1 { continue } val := text[pos[p]:pos[p+1]] tmps := []map[string]interface{}{} tmp := map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "extfrom": extfrom, "value": val, "type": "regexp", "matchtype": "regcontent", } tmps = append(tmps, tmp) extinfo[k] = tmps if val != "" { if j.Result[v.Field] == nil { j.Result[k] = [](*ju.ExtField){} } j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val}) } } } } } else { pos := v.RegCore.Reg.FindStringIndex(text) val := "" if len(pos) == 2 { text = text[pos[1]:] rs := regexp.MustCompile("[^\r\n\t]+") tmp := rs.FindAllString(text, -1) if len(tmp) > 0 { val = tmp[0] } } if val != "" { tmps := []map[string]interface{}{} tmp := map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "extfrom": extfrom, "value": val, "type": "regexp", "matchtype": "regcontent", } tmps = append(tmps, tmp) extinfo[v.Field] = tmps if j.Result[v.Field] == nil { j.Result[v.Field] = [](*ju.ExtField){} } j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val}) } } return extinfo } //后置过滤 func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) { if in.IsLua { result := GetResultMapForLua(j) lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText} if j != nil { lua.Block = j.Block } extinfo := lua.RunScript("back") for k, v := range extinfo { if tmps, ok := v.([]map[string]interface{}); ok { j.Result[k] = [](*ju.ExtField){} for _, tmp := range tmps { j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"]}) } } } if len(extinfo) > 0 { AddExtLog("clear", j.SourceMid, result, extinfo, in, t) //抽取日志 } } else { extinfo := map[string]interface{}{} if in.Field != "" { if j.Result[in.Field] != nil { tmp := j.Result[in.Field] exts := []interface{}{} for k, v := range tmp { text := qu.ObjToString(v.Value) if text != "" { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } j.Result[in.Field][k].Value = text exts = append(exts, map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "type": v.Type, "matchtype": v.MatchType, "extfrom": v.ExtFrom, "value": text, }) } extinfo[in.Field] = exts if len(extinfo) > 0 { AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志 } } } else { for key, tmp := range j.Result { exts := []interface{}{} for k, v := range tmp { text := qu.ObjToString(v.Value) if text != "" { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } j.Result[key][k].Value = text exts = append(exts, map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "type": v.Type, "matchtype": v.MatchType, "extfrom": v.ExtFrom, "value": text, }) } extinfo[key] = exts } if len(extinfo) > 0 { AddExtLog("clear", j.SourceMid, j.Result, extinfo, in, t) //抽取日志 } } } } //获取抽取结果map[string][]interface{},lua脚本使用 func GetResultMapForLua(j *ju.Job) map[string][]map[string]interface{} { result := map[string][]map[string]interface{}{} for key, val := range j.Result { if result[key] == nil { result[key] = []map[string]interface{}{} } for _, v := range val { tmp := map[string]interface{}{ "field": v.Field, "code": v.Code, "ruletext": v.RuleText, "value": v.Value, "type": v.Type, "matchtype": v.MatchType, "extfrom": v.ExtFrom, } result[key] = append(result[key], tmp) } } return result } //抽取日志 func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *RegLuaInfo, t *TaskInfo) { if !t.IsEtxLog { return } logdata := map[string]interface{}{ "code": v.Code, "name": v.Name, "type": ftype, "ruletext": v.RuleText, "islua": v.IsLua, "field": v.Field, "version": t.Version, "taskname": t.Name, "before": before, "extinfo": extinfo, "sid": sid, "comeintime": time.Now().Unix(), } lock.Lock() ExtLogs[t] = append(ExtLogs[t], logdata) lock.Unlock() } //保存抽取日志 func SaveExtLog() { tmpLogs := map[*TaskInfo][]map[string]interface{}{} lock.Lock() tmpLogs = ExtLogs ExtLogs = map[*TaskInfo][]map[string]interface{}{} lock.Unlock() for k, v := range tmpLogs { if len(v) < saveLimit { db.Mgo.SaveBulk(k.TrackColl, v...) } else { for { if len(v) > saveLimit { tmp := v[:saveLimit] db.Mgo.SaveBulk(k.TrackColl, tmp...) v = v[saveLimit:] } else { db.Mgo.SaveBulk(k.TrackColl, v...) break } } } } time.AfterFunc(10*time.Second, SaveExtLog) } type FieldValue struct { Value interface{} Count int } //分析抽取结果并保存 func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.ExtField, task *TaskInfo) { _id := qu.BsonIdToSId((*doc)["_id"]) //结果排序 values := map[string][]*ju.SortObject{} for key, val := range result { fieldValue := map[string][]interface{}{} for _, v := range val { if fieldValue[fmt.Sprint(v.Value)] == nil { fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value} } else { fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1 } } objects := []*ju.SortObject{} for k, v := range fieldValue { tmp := &ju.SortObject{ Key: k, Value: qu.IntAll(v[0]), Object: v[1], } objects = append(objects, tmp) } values[key] = ju.ExtSort(objects) } //从排序结果中取值 tmp := map[string]interface{}{} for key, val := range values { for _, v := range val { //取第一个 if v.Key != "" { tmp[key] = v.Object break } } } if task.TestColl == "" { if len(tmp) > 0 { //保存抽取结果 task.DB.Update(task.SaveColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false) } //保存抽取详情 tmp["result"] = result for k, v := range *doc { if tmp[k] == nil { //&& (k != "detail" || k != "contenthtml") { tmp[k] = v } } db.Mgo.Update("extract_result", `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false) } else { //测试结果 //保存抽取详情 tmp["result"] = result for k, v := range *doc { if tmp[k] == nil { //&& (k != "detail" || k != "contenthtml") { tmp[k] = v } } db.Mgo.Update(task.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false) } } //抽取城市、省份 func ExtractPC2(result map[string][]*ju.ExtField, province, city, title, addr, sourcemid string) (bres bool, c, p string) { var pjnarr, buyerarr []string var pb []interface{} for n, val := range result["projectname"] { pjnarr[n] = fmt.Sprint(val.Value) } for n, val := range result["buyer"] { buyerarr[n] = fmt.Sprint(val.Value) } pl := len(pjnarr) bl := len(buyerarr) max := 0 if pl > bl { max = pl } else { max = bl } //city, buyer, addr, projectname, title if max == 0 { //没有projectname和buyer结果集 tmp1 := []string{city, "", addr, "", title} pb = append(pb, tmp1) } else { //至少有一个结果集 if max == pl { for i := 0; i < max; i++ { p := pjnarr[i] b := "" if i < bl { b = buyerarr[i] } tmp2 := []string{city, b, addr, p, title} pb = append(pb, tmp2) } } else { for i := 0; i < max; i++ { b := buyerarr[i] p := "" if i < pl { p = pjnarr[i] } tmp3 := []string{city, b, addr, p, title} pb = append(pb, tmp3) } } } log.Println(pb) return } func ExtractPC(buyer, projectname, title, city, province, addr string, id interface{}) (bres bool, c, p string) { defer qu.Catch() bc := true //是否继续抽取 if city != "" { if CityBrief[city] == nil { //简称不存在 //log.Println("city err:", city, id) } else { //简称存在 if province != CityBrief[city].P.Brief { //省份不对 log.Println("province err:", city, province, id) } else { bc = false //原值正确,不用抽取 } } } //有省份 bp := false if ProvinceBrief[province] != nil { bp = true } else { //没有省份,先识别省份 for _, str := range []string{city, buyer, addr, projectname, title} { word := AreaProvinceGet.CheckSensitiveWord(str) //省全称 if word != "" { province = ProvinceMap[word] //省简称 bp = true break } } } //匹配城市 if bc { //城市简称不存在CityBrief[city]==nil,或城市简称存在但省份不对,继续抽取 //目前是全匹配模式,如果再加上精简匹配,加一层循环 for pos, GET := range []DFA{AreaGet, AreaSimGet} { ws := make([]string, 5) for n, str := range []string{city, buyer, addr, projectname, title} { if str != "" { word := GET.CheckSensitiveWord(str) if pos == 1 { //用简称 后辍为路、集团替换 str1 := strings.Replace(str, word+"路", "", 1) if str1 != str { word = GET.CheckSensitiveWord(str1) } } ws[n] = word if word != "" { res := AreaToCity[word] if len(res) == 1 { //判断省份 if !bp || province == res[0].P.Brief { //省份不存在或一致直接返回 bres = true c = res[0].Brief p = res[0].P.Brief break } else { //不一致时。。暂时不处理 } } else { //多个时 } } } } if !bres { mc := map[string]int{} for _, w := range ws { res := AreaToCity[w] for _, ct := range res { if ct == nil { continue } if bp { //有省份 if ct.P != nil && ct.P.Brief == province { mc[ct.Brief]++ } } else { //没有省份 mc[ct.Brief]++ } } } //计算mc中最大值且大于1 max := 1 v := "" for mk, mv := range mc { if mv > max { v = mk } } if v != "" { bres = true c = CityBrief[v].Brief p = CityBrief[v].P.Brief } else if len(mc) > 0 { //取级别更大的 v := "" for mk, _ := range mc { if CityBrief[mk].P.Cap == mk { bres = true c = CityBrief[mk].Brief p = CityBrief[mk].P.Brief break } else { v = mk } } if !bres { bres = true c = CityBrief[v].Brief p = CityBrief[v].P.Brief } } } if bres { break } } } else { return } if !bres { //取默认省会 if ProvinceBrief[province] != nil { bres = true c = ProvinceBrief[province].Cap p = province } } return }