extractcity.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. package extract
  2. import (
  3. . "jy/pretreated"
  4. ju "jy/util"
  5. qu "qfw/util"
  6. "strings"
  7. )
  8. //省
  9. type Province struct {
  10. Name string
  11. Brief string
  12. Cap string
  13. Captial *City
  14. }
  15. //市
  16. type City struct {
  17. Name string
  18. Brief string
  19. P *Province
  20. }
  21. //区或县
  22. type District struct {
  23. Name string
  24. C *City
  25. }
  26. //街道
  27. type Street struct {
  28. Name string
  29. D *District
  30. }
  31. //村、社区、居委会
  32. type Community struct {
  33. Name string
  34. S *Street
  35. }
  36. //区或县简称对应的全称和市信息
  37. type DistrictSimFull struct {
  38. SimName string
  39. FullName string
  40. C *City
  41. }
  42. //邮编
  43. type PostCode struct {
  44. Code string
  45. P string
  46. C string
  47. D []string
  48. }
  49. //区号
  50. type AreaCode struct {
  51. Code string
  52. P string
  53. C []string
  54. }
  55. //抽取city
  56. func (e *ExtractTask) ExtractCity(j *ju.Job, resulttmp map[string]interface{}, id string) {
  57. /*
  58. 高准确率:
  59. 1.爬虫数据jsondata
  60. 2.采购单位库
  61. 3.邮编
  62. 4.固话
  63. 5.site(todo)
  64. 低准确率:(全称库匹配到不走简称库)
  65. 1.city全称库(buyeraddr;title,projectname)
  66. 2.city简称库(buyeraddr;title,projectname)
  67. */
  68. defer qu.Catch()
  69. //初始化
  70. if j.FullAreaScore == nil {
  71. j.FullAreaScore = make(map[string]float64)
  72. }
  73. if j.FullCityScore == nil {
  74. j.FullCityScore = make(map[string]float64)
  75. }
  76. if j.FullDistrictScore == nil {
  77. j.FullDistrictScore = make(map[string]float64)
  78. }
  79. sm := NewSortMap()
  80. //高精度抽取city
  81. //存储每个流程的抽取结果
  82. area1 := make([]map[string]string, 4)
  83. city1 := make([]map[string]string, 4)
  84. district1 := make([]map[string]string, 4)
  85. //jsondata
  86. p0, c0, d0, p, c, d := e.GetCityByJsonData(j)
  87. area1 = append(area1, map[string]string{"a_c_d": p})
  88. city1 = append(city1, map[string]string{"a_c_d": c})
  89. district1 = append(district1, map[string]string{"a_c_d": d})
  90. area1[0] = map[string]string{"jsondata": p0}
  91. city1[0] = map[string]string{"jsondata": c0}
  92. district1[0] = map[string]string{"jsondata": d0}
  93. //qu.Debug("=====jsondata打分---", j.AreaScore, j.CityScore, j.DistrictScore)
  94. //采购单位库
  95. buyer, _ := resulttmp["buyer"].(string)
  96. p1, c1, d1 := e.GetCityByBuyer(j, buyer)
  97. //qu.Debug("buyer p--", p1, "c--", c1, "d--", d1)
  98. area1[1] = map[string]string{"buyer": p1}
  99. city1[1] = map[string]string{"buyer": c1}
  100. district1[1] = map[string]string{"buyer": d1}
  101. //qu.Debug("=====采购单位库打分---", j.AreaScore, j.CityScore, j.DistrictScore)
  102. //postcode邮编
  103. buyerzipcode, _ := resulttmp["buyerzipcode"].(string)
  104. p2, c2, d2 := e.GetCityByPostCode(j, buyerzipcode)
  105. //qu.Debug("postcode p--", p2, "c--", c2, "d--", d2)
  106. area1[2] = map[string]string{"postcode": p2}
  107. city1[2] = map[string]string{"postcode": c2}
  108. district1[2] = map[string]string{"postcode": d2}
  109. //qu.Debug("=====postcode邮编打分---", j.AreaScore, j.CityScore, j.DistrictScore)
  110. //areacode固话区号
  111. buyertel, _ := resulttmp["buyertel"].(string)
  112. p3, c3, d3 := e.GetCityByAreaCode(j, buyertel)
  113. //qu.Debug("areacode p--", p3, "c--", c3, "d--", d3, buyertel)
  114. area1[3] = map[string]string{"areacode": p3}
  115. city1[3] = map[string]string{"areacode": c3}
  116. district1[3] = map[string]string{"areacode": d3}
  117. //qu.Debug("=====areacode固话区号打分---", j.AreaScore, j.CityScore, j.DistrictScore)
  118. HighPreCity := make(map[string]interface{})
  119. HighPreCity["area"] = area1
  120. HighPreCity["city"] = city1
  121. HighPreCity["district"] = district1
  122. //低精度抽取city
  123. //buyeraddr,title,projectname
  124. buyeraddr, _ := resulttmp["buyeraddr"].(string)
  125. title, _ := resulttmp["title"].(string)
  126. projectname, _ := resulttmp["projectname"].(string)
  127. //qu.Debug(buyeraddr, "--", buyer, "--", title, "--", projectname)
  128. sm.AddKey("buyeraddr", buyeraddr)
  129. sm.AddKey("buyer", buyer)
  130. sm.AddKey("title", title)
  131. sm.AddKey("projectname", projectname)
  132. area2, city2, district2 := e.GetCityByOthers(j, sm)
  133. LowPreCity := make(map[string]interface{})
  134. LowPreCity["area"] = area2
  135. LowPreCity["city"] = city2
  136. LowPreCity["district"] = district2
  137. // resulttmp["highprecity"] = HighPreCity
  138. // resulttmp["lowprecity"] = LowPreCity
  139. //qu.Debug("最终打分---", j.AreaScore, j.CityScore, j.DistrictScore)
  140. //最终抽取结果
  141. finishP := HighestScoreArr(j.FullAreaScore)
  142. finishC := HighestScoreArr(j.FullCityScore)
  143. finishD := HighestScoreArr(j.FullDistrictScore)
  144. // area, _ := resulttmp["area"].(string)
  145. // city, _ := resulttmp["city"].(string)
  146. // district, _ := resulttmp["district"].(string)
  147. // qu.Debug("之前结果结果===", area, city, district)
  148. arearesult := ""
  149. cityresult := ""
  150. districtresult := ""
  151. if len(finishP) == 1 { //最高分一个
  152. arearesult = finishP[0] //抽取结果直接赋值
  153. cityresult = GetCity(arearesult, cityresult, e, finishC)
  154. cityresult, districtresult = GetDistrict(arearesult, cityresult, districtresult, e, finishD)
  155. } else if len(finishP) > 1 { //province最高分多个
  156. if len(finishC) == 1 {
  157. cityresult = finishC[0]
  158. if cfMap := e.CityFullMap[cityresult]; cfMap != nil {
  159. arearesult = cfMap.P.Brief
  160. cityresult, districtresult = GetDistrict(arearesult, cityresult, districtresult, e, finishD)
  161. }
  162. } else { //对应的city有多个(多个province和city)
  163. arearesult = finishP[0] //抽取结果直接赋值
  164. cityresult = GetCity(arearesult, cityresult, e, finishC)
  165. cityresult, districtresult = GetDistrict(arearesult, cityresult, districtresult, e, finishD)
  166. }
  167. }
  168. //qu.Debug("结果===", arearesult, "--", cityresult, "--", districtresult)
  169. if arearesult == "" {
  170. arearesult = "全国"
  171. } else if cityresult == "" {
  172. if pbMap := e.ProvinceBriefMap[arearesult]; pbMap != nil {
  173. cityresult = pbMap.Cap
  174. resulttmp["defaultpcap"] = true
  175. }
  176. }
  177. //qu.Debug("结果===", arearesult, "--", cityresult, "--", districtresult)
  178. resulttmp["area"] = arearesult
  179. resulttmp["city"] = cityresult
  180. resulttmp["district"] = districtresult
  181. }
  182. func (e *ExtractTask) GetCityByJsonData(j *ju.Job) (province, city, district, p, c, d string) {
  183. defer qu.Catch()
  184. jsondata := *j.Jsondata
  185. if jsondata != nil { //jsondata中获取province和city
  186. if acd, ok := jsondata["area_city_district"].(string); ok && acd != "" {
  187. flag := false
  188. p, flag = GetPCDByAreaDFA(p, acd, e, j, flag)
  189. if !flag {
  190. p, c, flag = GetPCDByCityDFA(p, c, acd, e, j, flag)
  191. }
  192. if !flag {
  193. p, city, c = GetPCDByDistrictDFA(p, c, d, acd, e, j)
  194. }
  195. }
  196. city, _ = jsondata["city"].(string) //city全称或者简称
  197. province, _ = jsondata["area"].(string) //province简称
  198. district, _ = jsondata["district"].(string) //district全称
  199. }
  200. PCDScore(j, "district", district, 5, true) //district打分
  201. bp := false
  202. if province != "" {
  203. if e.ProvinceBriefMap[province] != nil { //判断爬虫的省份是否正确 (全国)
  204. bp = true //省份正确
  205. }
  206. }
  207. pbrief := ""
  208. if city != "" {
  209. cityfullmap := e.CityFullMap[city] //判断city全称是否正确
  210. if cityfullmap != nil {
  211. pbrief = cityfullmap.P.Brief //province简称
  212. } else {
  213. citybriefmap := e.CityBriefMap[city] //判断city简称是否正确
  214. if citybriefmap != nil {
  215. city = citybriefmap.Name //city简称替换为全称
  216. pbrief = citybriefmap.P.Brief
  217. }
  218. }
  219. }
  220. if bp {
  221. if pbrief == province { //爬虫的province和city匹配
  222. PCDScore(j, "city", city, 5, true)
  223. } else { //pbrief不匹配province(此时city为空或者错误)
  224. city = ""
  225. }
  226. PCDScore(j, "province", province, 5, true)
  227. } else { //省份错误或为空,取city的对应的pbrief为province
  228. if pbrief != "" {
  229. province = pbrief
  230. PCDScore(j, "province", province, 5, true)
  231. PCDScore(j, "city", city, 5, true)
  232. } else {
  233. province = ""
  234. city = ""
  235. }
  236. }
  237. return
  238. }
  239. func (e *ExtractTask) GetCityByBuyer(j *ju.Job, buyer string) (province, city, district string) {
  240. defer qu.Catch()
  241. return
  242. }
  243. func (e *ExtractTask) GetCityByPostCode(j *ju.Job, postcode string) (province, city, district string) {
  244. defer qu.Catch()
  245. pc := e.PostCodeMap[postcode]
  246. if pc != nil {
  247. province = pc.P
  248. city = pc.C
  249. districtTmp := pc.D
  250. if len(districtTmp) == 1 { //对应多个district舍去
  251. district = districtTmp[0]
  252. PCDScore(j, "district", district, 5, true)
  253. }
  254. PCDScore(j, "province", province, 5, true)
  255. PCDScore(j, "city", city, 5, true)
  256. }
  257. return
  258. }
  259. func (e *ExtractTask) GetCityByAreaCode(j *ju.Job, buyertel string) (province, city, district string) {
  260. defer qu.Catch()
  261. if len(buyertel) >= 11 {
  262. if strings.HasPrefix(buyertel, "0") { //区号除了澳门853其他都是以0开头
  263. n := 4
  264. L:
  265. areacode := buyertel[:n]
  266. ac := e.AreaCodeMap[areacode]
  267. if ac != nil {
  268. province = ac.P
  269. citytmp := ac.C
  270. if len(citytmp) == 1 { //对应多个city舍去
  271. city = citytmp[0]
  272. PCDScore(j, "city", city, 5, true)
  273. }
  274. PCDScore(j, "province", province, 5, true)
  275. } else {
  276. n = n - 1
  277. if n >= 3 {
  278. goto L
  279. }
  280. }
  281. } else if buyertel[:3] == "853" { //澳门
  282. province = "澳门"
  283. city = "澳门"
  284. PCDScore(j, "province", province, 5, true)
  285. PCDScore(j, "city", city, 5, true)
  286. }
  287. }
  288. return
  289. }
  290. func (e *ExtractTask) GetCityByOthers(j *ju.Job, sm *SortMap) ([]map[string]string, []map[string]string, []map[string]string) {
  291. //存储每个流程的抽取结果
  292. area2 := []map[string]string{}
  293. city2 := []map[string]string{}
  294. district2 := []map[string]string{}
  295. isExtPC := false
  296. for _, from := range sm.Keys { //buyeraddr;title;projectname
  297. str, _ := sm.Map[from].(string)
  298. //分别记录buyeraddr;title;projectname全称匹配的打分情况
  299. pscore1 := make(map[string]int)
  300. cscore1 := make(map[string]int)
  301. dscore1 := make(map[string]int)
  302. //优先province,city,district,street全称匹配
  303. for pos, GET := range []*ju.DFA{e.ProvinceAllGet, e.CityAllGet, e.DistrictAllGet, e.StreetGet} {
  304. word := GET.CheckSensitiveWord(str)
  305. if word != "" {
  306. if pos == 0 { //province
  307. pbrief := e.ProvinceMap[word] //取province简称
  308. OtherScore("p", []string{pbrief}, &pscore1, &cscore1, &dscore1)
  309. } else if pos == 1 { //city
  310. p := ""
  311. cityfullmap := e.CityFullMap[word]
  312. if cityfullmap != nil {
  313. p = cityfullmap.P.Brief //取province简称
  314. }
  315. OtherScore("c", []string{p, word}, &pscore1, &cscore1, &dscore1)
  316. } else if pos == 2 { //district
  317. p, c := "", ""
  318. dcitymap := e.DistrictCityMap[word] //区对应的city
  319. if dcitymap != nil {
  320. c = dcitymap.Name //city全称
  321. p = dcitymap.P.Brief //province简称
  322. }
  323. tmpArr := []string{p, c, word}
  324. if word == c { //河南济源市
  325. tmpArr = []string{p, c}
  326. }
  327. OtherScore("d", tmpArr, &pscore1, &cscore1, &dscore1)
  328. } else if pos == 3 { //street
  329. p, c, d := "", "", ""
  330. sdmap := e.StreetDistrictMap[word] //对应的区
  331. if sdmap != nil {
  332. d = sdmap.Name
  333. c = sdmap.C.Name
  334. p = sdmap.C.P.Brief
  335. }
  336. tmpArr := []string{p, c, d}
  337. if c == d { //河南济源市
  338. tmpArr = []string{p, c}
  339. }
  340. OtherScore("d", tmpArr, &pscore1, &cscore1, &dscore1)
  341. }
  342. }
  343. }
  344. //取最高分的province,city,district
  345. ph1 := HighestScore(pscore1)
  346. ch1 := HighestScore(cscore1)
  347. dh1 := HighestScore(dscore1)
  348. isMatch := IsMatch(ph1, ch1, e) //最高分p和最高分c可能不对应
  349. if ch1 != "" && ph1 != "" && isMatch {
  350. isExtPC = true
  351. }
  352. //是否相互匹配
  353. area2 = append(area2, map[string]string{from + "_all": ph1})
  354. city2 = append(city2, map[string]string{from + "_all": ch1})
  355. district2 = append(district2, map[string]string{from + "_all": dh1})
  356. //buyeraddr,title,projectname匹配对应的结果加入最终得分
  357. if isMatch {
  358. if from == "buyeraddr" || from == "buyer" { //全称匹配,buyeraddr和buyer3分,title和projectname2分
  359. PCDScore(j, "province", ph1, 3, true)
  360. PCDScore(j, "city", ch1, 3, true)
  361. PCDScore(j, "district", dh1, 3, true)
  362. } else {
  363. PCDScore(j, "province", ph1, 2, true)
  364. PCDScore(j, "city", ch1, 2, true)
  365. PCDScore(j, "district", dh1, 2, true)
  366. }
  367. }
  368. }
  369. //判断全称是否抽出了province和city,一个未抽出走简称抽取
  370. if !isExtPC {
  371. for _, from := range sm.Keys { //buyeraddr;title;projectname
  372. str, _ := sm.Map[from].(string)
  373. pscore2 := make(map[string]int)
  374. cscore2 := make(map[string]int)
  375. dscore2 := make(map[string]int)
  376. for pos, GET := range []*ju.DFA{e.ProvinceSimGet, e.CitySimGet, e.DistrictSimGet} {
  377. word := GET.CheckSensitiveWord(str)
  378. if word != "" {
  379. if pos == 0 { //province
  380. OtherScore("p", []string{word}, &pscore2, &cscore2, &dscore2)
  381. } else if pos == 1 { //city
  382. p, c := "", ""
  383. citybriefmap := e.CityBriefMap[word]
  384. if citybriefmap != nil {
  385. p = citybriefmap.P.Brief
  386. c = citybriefmap.Name
  387. }
  388. OtherScore("c", []string{p, c}, &pscore2, &cscore2, &dscore2)
  389. } else if pos == 2 { //district
  390. p, c := "", ""
  391. d := e.DistrictSimAndAll[word]
  392. dcitymap := e.DistrictCityMap[word]
  393. if dcitymap != nil {
  394. c = dcitymap.Name
  395. p = dcitymap.P.Brief
  396. }
  397. OtherScore("d", []string{p, c, d}, &pscore2, &cscore2, &dscore2)
  398. }
  399. }
  400. }
  401. //取最高分的province,city,district
  402. ph2 := HighestScore(pscore2)
  403. ch2 := HighestScore(cscore2)
  404. dh2 := HighestScore(dscore2)
  405. area2 = append(area2, map[string]string{from + "_sim": ph2})
  406. city2 = append(city2, map[string]string{from + "_sim": ch2})
  407. district2 = append(district2, map[string]string{from + "_sim": dh2})
  408. //buyeraddr,title,projectname匹配对应的结果加入最终得分
  409. if from == "buyeraddr" {
  410. PCDScore(j, "province", ph2, 2, true)
  411. PCDScore(j, "city", ch2, 2, true)
  412. PCDScore(j, "district", dh2, 2, true)
  413. } else {
  414. PCDScore(j, "province", ph2, 1, true)
  415. PCDScore(j, "city", ch2, 1, true)
  416. PCDScore(j, "district", dh2, 1, true)
  417. }
  418. }
  419. }
  420. return area2, city2, district2
  421. }
  422. func IsMatch(p, c string, e *ExtractTask) bool {
  423. ism := false
  424. if p != "" && c == "" {
  425. return true
  426. }
  427. if cfMap := e.CityFullMap[c]; cfMap != nil {
  428. if cfMap.P.Brief == p {
  429. ism = true
  430. }
  431. }
  432. return ism
  433. }
  434. //计算province,city,district得分
  435. func PCDScore(j *ju.Job, stype, text string, score float64, isfull bool) {
  436. defer qu.Catch()
  437. if text != "" {
  438. if stype == "district" {
  439. tmpdistrict := make(map[string]float64)
  440. if isfull {
  441. tmpdistrict = j.FullDistrictScore
  442. } else {
  443. tmpdistrict = j.SimDistrictScore
  444. }
  445. scoretmp := tmpdistrict[text]
  446. tmpdistrict[text] = scoretmp + score
  447. } else if stype == "city" {
  448. tmpcity := make(map[string]float64)
  449. if isfull {
  450. tmpcity = j.FullCityScore
  451. } else {
  452. tmpcity = j.SimCityScore
  453. }
  454. scoretmp := tmpcity[text]
  455. tmpcity[text] = scoretmp + score
  456. } else if stype == "province" {
  457. tmpprovince := make(map[string]float64)
  458. if isfull {
  459. tmpprovince = j.FullAreaScore
  460. } else {
  461. tmpprovince = j.SimAreaScore
  462. }
  463. scoretmp := tmpprovince[text]
  464. tmpprovince[text] = scoretmp + score
  465. }
  466. }
  467. }
  468. func OtherScore(stype string, text []string, ps, cs, ds *map[string]int) {
  469. defer qu.Catch()
  470. for i, t := range text {
  471. if t != "" {
  472. if i == 0 { //p
  473. tmpscore := (*ps)[t]
  474. (*ps)[t] = tmpscore + 1
  475. } else if i == 1 { //c
  476. tmpscore := (*cs)[t]
  477. (*cs)[t] = tmpscore + 1
  478. } else if i == 2 { //d
  479. tmpscore := (*ds)[t]
  480. (*ds)[t] = tmpscore + 1
  481. }
  482. }
  483. }
  484. }
  485. func HighestScore(m map[string]int) string {
  486. result := ""
  487. tmpscore := 0
  488. for str, score := range m {
  489. if str != "" && tmpscore < score {
  490. result = str
  491. tmpscore = score
  492. }
  493. }
  494. return result
  495. }
  496. func HighestScoreArr(m map[string]float64) []string {
  497. result := make(map[float64][]string)
  498. tmpscore := 0.0
  499. for str, score := range m {
  500. if str != "" && tmpscore <= score {
  501. if result[tmpscore] != nil && tmpscore != score {
  502. delete(result, tmpscore)
  503. }
  504. if r := result[score]; r != nil {
  505. r = append(r, str)
  506. result[score] = r
  507. } else {
  508. result[score] = []string{str}
  509. }
  510. tmpscore = score
  511. }
  512. }
  513. return result[tmpscore]
  514. }
  515. func GetCity(area, city string, e *ExtractTask, finishC []string) string {
  516. for _, c := range finishC { //取最高分与province匹配的city
  517. if cfMap := e.CityFullMap[c]; cfMap != nil {
  518. if cfMap.P.Brief == area {
  519. city = c
  520. break
  521. }
  522. }
  523. }
  524. return city
  525. }
  526. func GetDistrict(area, city, district string, e *ExtractTask, finishD []string) (string, string) {
  527. for _, d := range finishD { //取最高分与province匹配的district
  528. if dcMap := e.DistrictCityMap[d]; dcMap != nil {
  529. if dcMap.P.Brief == area {
  530. district = d
  531. tmpcity := dcMap.Name
  532. if city != tmpcity {
  533. if cfMap := e.CityFullMap[tmpcity]; cfMap != nil {
  534. if cfMap.P.Brief == area {
  535. city = tmpcity
  536. break
  537. }
  538. }
  539. }
  540. }
  541. }
  542. }
  543. return city, district
  544. }
  545. func GetPCDByAreaDFA(province, acd string, e *ExtractTask, j *ju.Job, flag bool) (string, bool) {
  546. if word := e.ProvinceSimGet.CheckSensitiveWord(acd); word != "" { //取省
  547. if pbMap := e.ProvinceBriefMap[word]; pbMap != nil {
  548. province = pbMap.Brief
  549. if province == acd || pbMap.Name == acd { //用于判断area_city_district是否只有省份信息,flag为true就不在匹配area_city_district中的city和district
  550. flag = true
  551. }
  552. PCDScore(j, "province", province, 5, true)
  553. }
  554. }
  555. return province, flag
  556. }
  557. func GetPCDByCityDFA(province, city, acd string, e *ExtractTask, j *ju.Job, flag bool) (string, string, bool) {
  558. for pos, GET := range []*ju.DFA{e.CityAllGet, e.CitySimGet} { //取市
  559. if word := GET.CheckSensitiveWord(acd); word != "" {
  560. if pos == 0 { //全称
  561. if cfMap := e.CityFullMap[word]; cfMap != nil {
  562. if province != "" && cfMap.P.Brief == province { //acd有province信息
  563. city = cfMap.Name
  564. if acd == province+city || acd == cfMap.P.Name+city {
  565. flag = true
  566. }
  567. } else if province == "" { //acd有city;city和district信息
  568. city = cfMap.Name
  569. province = cfMap.P.Brief
  570. PCDScore(j, "province", province, 5, true)
  571. if acd == city {
  572. flag = true
  573. }
  574. }
  575. PCDScore(j, "city", city, 5, true)
  576. break
  577. }
  578. } else { //简称
  579. if cbMap := e.CityBriefMap[word]; cbMap != nil {
  580. if province != "" && cbMap.P.Brief == province {
  581. city = cbMap.Name
  582. if acd == province+city || acd == cbMap.P.Name+city {
  583. flag = true
  584. }
  585. } else if province == "" {
  586. city = cbMap.Name
  587. province = cbMap.P.Brief
  588. PCDScore(j, "province", province, 5, true)
  589. if acd == city {
  590. flag = true
  591. }
  592. }
  593. PCDScore(j, "city", city, 5, true)
  594. break
  595. }
  596. }
  597. }
  598. }
  599. return province, city, flag
  600. }
  601. func GetPCDByDistrictDFA(province, city, district, acd string, e *ExtractTask, j *ju.Job) (string, string, string) {
  602. //area_city_district字段不会单独存区信息(省市,省,市,省区,省市区)
  603. for pos, GET := range []*ju.DFA{e.DistrictAllGet, e.DistrictSimGet} { //取区
  604. if word := GET.CheckSensitiveWord(acd); word != "" {
  605. if dcMap := e.DistrictCityMap[word]; dcMap != nil {
  606. district = word
  607. if pos == 1 { //简称换为全称
  608. district = e.DistrictSimAndAll[district]
  609. }
  610. if city == "" && dcMap.P.Brief == province { //只有province和district(are_city_district:河南省二七区)
  611. city = dcMap.Name
  612. PCDScore(j, "city", city, 5, true)
  613. } else if province == "" { //province和city都没有(are_city_district:二七区)
  614. city = dcMap.Name
  615. province = dcMap.P.Brief
  616. PCDScore(j, "city", city, 5, true)
  617. PCDScore(j, "province", province, 5, true)
  618. }
  619. PCDScore(j, "district", district, 5, true)
  620. break
  621. }
  622. }
  623. }
  624. return province, city, district
  625. }