extractInit.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. // extractInit
  2. package extract
  3. import (
  4. db "jy/mongodbutil"
  5. "log"
  6. qu "qfw/util"
  7. "regexp"
  8. "strings"
  9. )
  10. type RegLuaInfo struct { //正则或脚本信息
  11. Code, Name, Field string //
  12. RuleText string //
  13. IsLua, IsHasFields bool //IsHasFields正则配置有属性字段
  14. RegPreBac *ExtReg //
  15. RegCore *ExtReg //
  16. LFields []interface{} //lua抽取字段属性组
  17. }
  18. type ExtReg struct {
  19. Reg *regexp.Regexp
  20. Replace string
  21. Bextract bool
  22. ExtractPos map[string]int
  23. }
  24. type RuleCore struct {
  25. Field string //逻辑字段
  26. LuaLogic string //进入逻辑
  27. ExtFrom string //从哪个字段抽取
  28. RulePres []*RegLuaInfo //抽取前置规则
  29. RuleBacks []*RegLuaInfo //抽取后置规则
  30. RuleCores []*RegLuaInfo //抽取规则
  31. }
  32. type TaskInfo struct {
  33. Name, Version, VersionId, TrackColl string //名称、版本、版本id、追踪记录表
  34. FromDbAddr, FromDB, FromColl string //抽取数据库地址、库名、表名
  35. SaveColl, TestColl, LastExtId string //抽取结果表、测试结果表、上次抽取信息id
  36. DB *db.Pool //数据库连接池
  37. IsEtxLog bool //是否开启抽取日志
  38. ProcessPool chan bool //任务进程池
  39. TestLua bool //检查测试用
  40. }
  41. type Tag struct {
  42. Type string //标签类型 string 字符串、regexp 正则
  43. Key string //
  44. Reg *regexp.Regexp //
  45. }
  46. type City struct {
  47. Name string
  48. Brief string
  49. P *Province
  50. }
  51. type Province struct {
  52. Name string
  53. Brief string
  54. Cap string
  55. Captial *City
  56. }
  57. type ExtractTask struct {
  58. Id string //任务id
  59. IsRun bool //是否启动
  60. Content string //信息内容
  61. TaskInfo *TaskInfo //任务信息
  62. RulePres []*RegLuaInfo //通用前置规则
  63. RuleBacks []*RegLuaInfo //通用后置规则
  64. RuleCores []*RuleCore //抽取规则
  65. Tag map[string][]*Tag //标签库
  66. ClearFn map[string][]string //清理函数
  67. }
  68. //敏感词
  69. type DFA struct {
  70. Link map[string]interface{}
  71. }
  72. func init() {
  73. TaskList = make(map[string]*ExtractTask)
  74. go SaveExtLog()
  75. }
  76. //加载任务信息
  77. func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) {
  78. task, _ := db.Mgo.FindById("task", e.Id, nil)
  79. if len(*task) > 1 {
  80. v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`)
  81. e.TaskInfo = &TaskInfo{
  82. Name: (*task)["s_taskname"].(string),
  83. Version: (*task)["s_version"].(string),
  84. VersionId: qu.BsonIdToSId((*v)["_id"]),
  85. TrackColl: trackcoll,
  86. FromDbAddr: (*task)["s_mgoaddr"].(string),
  87. FromDB: (*task)["s_mgodb"].(string),
  88. FromColl: (*task)["s_mgocoll"].(string),
  89. TestColl: resultcoll,
  90. IsEtxLog: true,
  91. ProcessPool: make(chan bool, 1),
  92. }
  93. } else {
  94. return
  95. }
  96. }
  97. //加载任务信息
  98. func (e *ExtractTask) InitTaskInfo() {
  99. task, _ := db.Mgo.FindById("task", e.Id, nil)
  100. log.Println("task", task)
  101. if len(*task) > 1 {
  102. v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`)
  103. e.TaskInfo = &TaskInfo{
  104. Name: (*task)["s_taskname"].(string),
  105. Version: (*task)["s_version"].(string),
  106. VersionId: qu.BsonIdToSId((*v)["_id"]),
  107. //TrackColl: (*task)["s_trackcoll"].(string),
  108. FromDbAddr: (*task)["s_mgoaddr"].(string),
  109. FromDB: (*task)["s_mgodb"].(string),
  110. FromColl: (*task)["s_mgocoll"].(string),
  111. SaveColl: (*task)["s_mgosavecoll"].(string),
  112. IsEtxLog: false, //qu.If(qu.IntAll((*task)["i_track"]) == 1, true, false).(bool),
  113. LastExtId: qu.ObjToString((*task)["s_extlastid"]),
  114. ProcessPool: make(chan bool, qu.IntAllDef((*task)["i_process"], 1)),
  115. }
  116. log.Println(e.TaskInfo.Name, e.TaskInfo.ProcessPool)
  117. } else {
  118. return
  119. }
  120. }
  121. //加载通用前置规则
  122. func (e *ExtractTask) InitRulePres() {
  123. defer qu.Catch()
  124. list, _ := db.Mgo.Find("rule_pre", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  125. for _, v := range *list {
  126. rinfo := &RegLuaInfo{
  127. Code: v["s_code"].(string),
  128. Name: v["s_name"].(string),
  129. IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
  130. }
  131. if rinfo.IsLua {
  132. rinfo.RuleText = v["s_luascript"].(string)
  133. } else {
  134. qu.Try(func() {
  135. rinfo.RuleText = v["s_rule"].(string)
  136. tmp := strings.Split(rinfo.RuleText, "__")
  137. if len(tmp) == 2 {
  138. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
  139. } else {
  140. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
  141. }
  142. }, func(err interface{}) {
  143. log.Println(rinfo.Code, rinfo.Field, err)
  144. })
  145. }
  146. e.RulePres = append(e.RulePres, rinfo)
  147. }
  148. }
  149. //加载通用后置规则
  150. func (e *ExtractTask) InitRuleBacks() {
  151. defer qu.Catch()
  152. list, _ := db.Mgo.Find("rule_back", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  153. for _, v := range *list {
  154. rinfo := &RegLuaInfo{
  155. Code: v["s_code"].(string),
  156. Name: v["s_name"].(string),
  157. IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
  158. }
  159. if rinfo.IsLua {
  160. rinfo.RuleText = v["s_luascript"].(string)
  161. } else {
  162. qu.Try(func() {
  163. rinfo.RuleText = v["s_rule"].(string)
  164. tmp := strings.Split(rinfo.RuleText, "__")
  165. if len(tmp) == 2 {
  166. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
  167. } else {
  168. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
  169. }
  170. }, func(err interface{}) {
  171. log.Println(rinfo.Code, rinfo.Field, err)
  172. })
  173. }
  174. e.RuleBacks = append(e.RuleBacks, rinfo)
  175. }
  176. }
  177. //加载抽取规则
  178. func (e *ExtractTask) InitRuleCore() {
  179. defer qu.Catch()
  180. vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1)
  181. for _, vinfo := range *vinfos {
  182. if b, _ := vinfo["isuse"].(bool); !b {
  183. continue
  184. }
  185. pid := qu.BsonIdToSId(vinfo["_id"])
  186. list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
  187. for _, vv := range *list {
  188. if b, _ := vv["isuse"].(bool); !b {
  189. continue
  190. }
  191. rcore := &RuleCore{}
  192. rcore.Field = vinfo["s_field"].(string)
  193. rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
  194. rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
  195. //前置规则
  196. rulePres := []*RegLuaInfo{}
  197. plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
  198. for _, v := range *plist {
  199. rinfo := &RegLuaInfo{
  200. Code: v["s_code"].(string),
  201. Name: v["s_name"].(string),
  202. IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
  203. }
  204. if rinfo.IsLua {
  205. rinfo.RuleText = v["s_luascript"].(string)
  206. } else {
  207. qu.Try(func() {
  208. rinfo.RuleText = v["s_rule"].(string)
  209. rinfo.Field = v["s_field"].(string)
  210. tmp := strings.Split(rinfo.RuleText, "__")
  211. if len(tmp) == 2 {
  212. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
  213. } else {
  214. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
  215. }
  216. }, func(err interface{}) {
  217. log.Println(rinfo.Code, rinfo.Field, err)
  218. })
  219. }
  220. rulePres = append(rulePres, rinfo)
  221. }
  222. rcore.RulePres = rulePres
  223. //后置规则
  224. ruleBacks := []*RegLuaInfo{}
  225. blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
  226. for _, v := range *blist {
  227. rinfo := &RegLuaInfo{
  228. Code: v["s_code"].(string),
  229. Name: v["s_name"].(string),
  230. IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
  231. }
  232. if rinfo.IsLua {
  233. rinfo.RuleText = v["s_luascript"].(string)
  234. } else {
  235. qu.Try(func() {
  236. rinfo.RuleText = v["s_rule"].(string)
  237. rinfo.Field = v["s_field"].(string)
  238. tmp := strings.Split(rinfo.RuleText, "__")
  239. if len(tmp) == 2 {
  240. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
  241. } else {
  242. rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
  243. }
  244. }, func(err interface{}) {
  245. log.Println(rinfo.Code, rinfo.Field, err)
  246. })
  247. }
  248. ruleBacks = append(ruleBacks, rinfo)
  249. }
  250. rcore.RuleBacks = ruleBacks
  251. //抽取规则
  252. ruleCores := []*RegLuaInfo{}
  253. clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
  254. for _, v := range *clist {
  255. if b, _ := v["isuse"].(bool); !b {
  256. continue
  257. }
  258. rinfo := &RegLuaInfo{
  259. Code: v["s_code"].(string),
  260. Name: v["s_name"].(string),
  261. IsLua: qu.If(v["s_type"].(string) == "1", true, false).(bool),
  262. }
  263. if rinfo.IsLua {
  264. rinfo.RuleText = v["s_luascript"].(string)
  265. //暂时提取全部属性
  266. rinfo.LFields = getALLFields()
  267. rinfo.IsHasFields = true
  268. /*rinfo.LFields, _ = v["s_fields"].([]interface{})
  269. if len(rinfo.LFields) > 0 {
  270. rinfo.IsHasFields = true
  271. }*/
  272. } else {
  273. qu.Try(func() {
  274. rinfo.RuleText = v["s_rule"].(string)
  275. rinfo.Field = v["s_field"].(string)
  276. tmp := strings.Split(rinfo.RuleText, "__")
  277. if len(tmp) == 2 {
  278. epos := strings.Split(tmp[1], ",")
  279. posm := map[string]int{}
  280. for _, v := range epos {
  281. ks := strings.Split(v, ":")
  282. if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
  283. posm[ks[1]] = qu.IntAll(ks[0])
  284. } else { //(.*)招标公告__2
  285. posm[rinfo.Field] = qu.IntAll(ks[0])
  286. }
  287. }
  288. rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: true, ExtractPos: posm}
  289. } else {
  290. rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: false}
  291. }
  292. }, func(err interface{}) {
  293. log.Println(rinfo.Code, rinfo.Field, err)
  294. })
  295. }
  296. ruleCores = append(ruleCores, rinfo)
  297. }
  298. rcore.RuleCores = ruleCores
  299. //
  300. e.RuleCores = append(e.RuleCores, rcore)
  301. }
  302. }
  303. }
  304. //加载标签库
  305. func (e *ExtractTask) InitTag() {
  306. defer qu.Catch()
  307. e.Tag = map[string][]*Tag{}
  308. //字符串标签库
  309. list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"string","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  310. for _, v := range *list {
  311. field := qu.ObjToString(v["s_field"])
  312. if tmp, ok := v["content"].([]interface{}); ok {
  313. for _, key := range tmp {
  314. tag := &Tag{Type: "string", Key: key.(string)}
  315. e.Tag[field] = append(e.Tag[field], tag)
  316. }
  317. }
  318. }
  319. //正则标签库
  320. list, _ = db.Mgo.Find("tagdetailinfo", `{"s_type":"reg","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  321. for _, v := range *list {
  322. field := qu.ObjToString(v["s_field"])
  323. if tmp, ok := v["content"].([]interface{}); ok {
  324. for _, key := range tmp {
  325. tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))}
  326. e.Tag[field] = append(e.Tag[field], tag)
  327. }
  328. }
  329. }
  330. }
  331. //获取fields
  332. func getALLFields() []interface{} {
  333. fields := []interface{}{}
  334. list, _ := db.Mgo.Find("fields", `{}`, nil, `{"s_field":1}`, false, -1, -1)
  335. for _, v := range *list {
  336. fields = append(fields, v["s_field"])
  337. }
  338. return fields
  339. }
  340. //加载clear函数
  341. func (e *ExtractTask) InitClearFn() {
  342. list, _ := db.Mgo.Find("cleanup", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  343. fn := map[string][]string{}
  344. for _, tmp := range *list {
  345. field := tmp["s_field"].(string)
  346. fns := tmp["clear"].([]interface{})
  347. if fn[field] == nil {
  348. fn[field] = []string{}
  349. }
  350. for _, v := range fns {
  351. fn[field] = append(fn[field], v.(string))
  352. }
  353. }
  354. e.ClearFn = fn
  355. }
  356. //加载省份
  357. func (e *ExtractTask) InitProvince() {
  358. defer qu.Catch()
  359. fn := map[string]interface{}{}
  360. list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  361. for _, v := range *list {
  362. name := qu.ObjToString(v["s_name"])
  363. content := v["content"]
  364. switch content.(type) {
  365. case string:
  366. fn[name] = []interface{}{content.(string)}
  367. case []interface{}:
  368. fn[name] = content
  369. }
  370. }
  371. ProviceConfig = fn
  372. }
  373. //加载城市简称
  374. func (e *ExtractTask) InitCitySim() {
  375. defer qu.Catch()
  376. list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  377. fn := map[string]map[string]interface{}{}
  378. for _, v := range *list {
  379. name := qu.ObjToString(v["s_name"])
  380. tmp := v["content"].(map[string]interface{})
  381. fn[name] = tmp
  382. }
  383. CitySimConfig = fn
  384. }
  385. //加载城市全称
  386. func (e *ExtractTask) InitCityAll() {
  387. defer qu.Catch()
  388. list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
  389. // if len(*list) != 34 {
  390. // fmt.Println("加载城市配置文件出错", len(*list))
  391. // }
  392. fn := map[string]map[string]interface{}{}
  393. for _, v := range *list {
  394. name := qu.ObjToString(v["s_name"])
  395. tmp := v["content"].(map[string]interface{})
  396. fn[name] = tmp
  397. }
  398. CityAllConfig = fn
  399. }
  400. //初始化城市省份敏感词
  401. func InitDFA() {
  402. AreaGet = DFA{}
  403. AreaProvinceGet = DFA{}
  404. for k, v := range ProviceConfig {
  405. log.Println(k, "----------", v)
  406. for _, p := range v.([]interface{}) {
  407. log.Println("ppppp", p)
  408. p1, _ := p.(string)
  409. AreaProvinceGet.AddWord(p1)
  410. ProvinceMap[p1] = k
  411. }
  412. }
  413. log.Println("ProvinceMap11----", ProvinceMap)
  414. for k, v := range CityAllConfig {
  415. AreaProvinceGet.AddWord(k) //省全称
  416. p := &Province{}
  417. p.Name = k
  418. p.Brief = v["brief"].(string)
  419. ProvinceMap[k] = p.Brief
  420. log.Println("ProvinceMap22----", ProvinceMap)
  421. ProvinceBrief[p.Brief] = p
  422. p.Cap = v["captial"].(string)
  423. log.Println("ProvinceBrief11====", p.Brief, ProvinceBrief[p.Brief].Name, ProvinceBrief[p.Brief].Brief, "==", ProvinceBrief[p.Brief].Cap)
  424. city, _ := v["city"].(map[string]interface{})
  425. log.Println("======================================================")
  426. for k1, v1 := range city {
  427. v1m, _ := v1.(map[string]interface{})
  428. c := &City{}
  429. c.Name = k1
  430. if v1m["brief"] == nil {
  431. log.Println(k, k1)
  432. }
  433. c.Brief = v1m["brief"].(string)
  434. //cityAll[k1] = c
  435. CityBrief[c.Brief] = c
  436. c.P = p
  437. if c.Brief == p.Cap {
  438. p.Captial = c
  439. }
  440. log.Println("CityBrief11+++", k1, "---", CityBrief[c.Brief].Name, CityBrief[c.Brief].Brief, "===", CityBrief[c.Brief].P.Captial, "===", CityBrief[c.Brief].P.Name)
  441. //加入到城市map中
  442. cs := AreaToCity[k1]
  443. AreaGet.AddWord(k1) //市全称
  444. if cs != nil {
  445. cs = append(cs, c)
  446. } else {
  447. cs = []*City{c}
  448. }
  449. AreaToCity[k1] = cs
  450. log.Println("市---", k1, AreaToCity[k1][0].Brief, AreaToCity[k1][0].Name, AreaToCity[k1][0].P.Name)
  451. /*
  452. AreaToCity["衢州市"] = []interface{}{
  453. &City{
  454. c.Name = 衢州市,
  455. c.Brief = 衢州,
  456. c.P = xxx
  457. },
  458. }
  459. */
  460. arr := v1m["area"].([]interface{})
  461. for _, k2 := range arr {
  462. s := k2.(string)
  463. cs := AreaToCity[s]
  464. AreaGet.AddWord(s) //街道全称
  465. if cs != nil {
  466. cs = append(cs, c)
  467. } else {
  468. cs = []*City{c}
  469. }
  470. AreaToCity[s] = cs
  471. log.Println("街道===", k2, AreaToCity)
  472. }
  473. }
  474. }
  475. log.Println("======================================================")
  476. //加载简称
  477. AreaSimGet = DFA{}
  478. //util.ReadConfig("./city_sim.json", &CitySimConfig)
  479. // if len(CitySimConfig) != 34 {
  480. // log.Println("加载简称配置文件出错", len(CitySimConfig))
  481. // }
  482. for k, v := range CitySimConfig {
  483. pb := v["brief"].(string)
  484. p := ProvinceBrief[pb]
  485. log.Println("++++++++++++++++++", p)
  486. //加载
  487. for _, ss := range []string{k, pb} {
  488. cs := AreaToCity[ss]
  489. if cs != nil {
  490. cs = append(cs, p.Captial)
  491. } else {
  492. cs = []*City{p.Captial}
  493. }
  494. AreaToCity[ss] = cs
  495. log.Println("+++", ss, AreaToCity)
  496. AreaSimGet.AddWord(ss) //省全称和省简称
  497. }
  498. city, _ := v["city"].(map[string]interface{})
  499. for k1, v1 := range city {
  500. v1m, _ := v1.(map[string]interface{})
  501. if v1m["brief"] == nil {
  502. log.Println(k, k1)
  503. }
  504. cb := v1m["brief"].(string)
  505. c := AreaToCity[k1][0]
  506. //加入到城市map中
  507. for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州 浙江杭州
  508. AreaSimGet.AddWord(ss)
  509. cs := AreaToCity[ss]
  510. if cs != nil {
  511. cs = append(cs, c)
  512. } else {
  513. cs = []*City{c}
  514. }
  515. AreaToCity[ss] = cs
  516. log.Println("+-+-", ss, AreaToCity)
  517. }
  518. arr := v1m["area"].([]interface{})
  519. for _, k2 := range arr {
  520. s := k2.(string)
  521. for _, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安
  522. cs := AreaToCity[ss]
  523. AreaSimGet.AddWord(ss)
  524. if cs != nil {
  525. cs = append(cs, c)
  526. } else {
  527. cs = []*City{c}
  528. }
  529. AreaToCity[ss] = cs
  530. log.Println("-+-+", ss, AreaToCity)
  531. }
  532. }
  533. }
  534. }
  535. log.Println(AreaToCity)
  536. }
  537. func (d *DFA) AddWord(keys ...string) {
  538. d.AddWordAll(true, keys...)
  539. }
  540. func (d *DFA) AddWordAll(haskey bool, keys ...string) {
  541. if d.Link == nil {
  542. d.Link = make(map[string]interface{})
  543. }
  544. for _, key := range keys {
  545. nowMap := &d.Link
  546. for i := 0; i < len(key); i++ {
  547. kc := key[i : i+1]
  548. if v, ok := (*nowMap)[kc]; ok {
  549. nowMap, _ = v.(*map[string]interface{})
  550. } else {
  551. newMap := map[string]interface{}{}
  552. newMap["YN"] = "0"
  553. (*nowMap)[kc] = &newMap
  554. nowMap = &newMap
  555. }
  556. if i == len(key)-1 {
  557. (*nowMap)["YN"] = "1"
  558. if haskey {
  559. (*nowMap)["K"] = key
  560. }
  561. }
  562. }
  563. }
  564. }
  565. func (d *DFA) CheckSensitiveWord(src string) string {
  566. pos := 0
  567. nowMap := &d.Link
  568. res := ""
  569. for i := 0; i < len(src); i++ {
  570. word := src[i : i+1]
  571. nowMap, _ = (*nowMap)[word].(*map[string]interface{})
  572. if nowMap != nil { // 存在,则判断是否为最后一个
  573. if pos == 0 {
  574. pos = i
  575. }
  576. if "1" == qu.ObjToString((*nowMap)["YN"]) { // 如果为最后一个匹配规则,结束循环,返回匹配标识数
  577. res = qu.ObjToString((*nowMap)["K"])
  578. //pos = 0
  579. //break
  580. }
  581. } else {
  582. if res != "" {
  583. break
  584. } else {
  585. nowMap = &d.Link
  586. if pos > 0 {
  587. i = pos
  588. pos = 0
  589. }
  590. }
  591. }
  592. }
  593. return res
  594. }