project_other.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/olivere/elastic/v7"
  7. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  8. "regexp"
  9. "sort"
  10. "strings"
  11. "unicode"
  12. )
  13. var (
  14. FilterReg_3 = regexp.MustCompile("(项目|公告|公示)$")
  15. FilterReg_2 = regexp.MustCompile("^[)\\)>》】\\]}}〕,,;;::'\"“”。.\\??、/+=\\_—*&……\\^%$¥@!!`~·(\\(<《【\\[{{〔]+$")
  16. FilterReg_1 = regexp.MustCompile("^([0-9]{1,3}|[零一二三四五六七八九十]{1,2}|联系人?|电话|地址|编号|采购|政府采购|成交|更正|招标|中标|变更|结果)$")
  17. FilterReg = regexp.MustCompile("^[的人号时元万公告项目地址电话邮编日期联系招标中结果成交项目项目采购采购项目政府采购公告更正公告]+$")
  18. )
  19. // getSearch 模拟网站所有,只提供精准和分词;不再打分;只过滤省市,只匹配标题、项目名称;根据参数决定是否匹配详情;
  20. func getSearch(client *elastic.Client, projectName, areacode string, isDetail int) ([]map[string]interface{}, error) {
  21. var results []map[string]interface{}
  22. seenIDs := make(map[string]*elastic.SearchHit)
  23. province, city := "", ""
  24. if areacode != "" {
  25. code := areacode[:6]
  26. where := map[string]interface{}{
  27. "code": code,
  28. }
  29. res, _ := MgoQY.FindOne("address_new_2020", where)
  30. province = util.ObjToString((*res)["province"])
  31. city = util.ObjToString((*res)["city"])
  32. }
  33. //fmt.Println(province, city)
  34. projectName = RemoveInvisibleChars(projectName)
  35. projectName = FilterGeneric(projectName)
  36. // 1. 精准查询
  37. preciseHits, err := searchPreciseOther(client, projectName, province, city, isDetail)
  38. if err != nil {
  39. return nil, err
  40. }
  41. for _, hit := range preciseHits {
  42. if _, exists := seenIDs[hit.Id]; !exists {
  43. seenIDs[hit.Id] = hit
  44. }
  45. }
  46. // 2. 分词查询(
  47. tokenHits, err := searchByTokenOther(client, projectName, province, city, isDetail)
  48. if err != nil {
  49. return nil, err
  50. }
  51. for _, hit := range tokenHits {
  52. if _, exists := seenIDs[hit.Id]; !exists {
  53. seenIDs[hit.Id] = hit
  54. }
  55. }
  56. for id, hit := range seenIDs {
  57. var doc map[string]interface{}
  58. if err = json.Unmarshal(hit.Source, &doc); err != nil {
  59. continue
  60. }
  61. // 从 Mongo 读取 detail 字段用于后续 buyer 过滤
  62. bidd, _ := MgoB.FindById("bidding", id, nil)
  63. detail := util.ObjToString((*bidd)["detail"])
  64. if detail != "" {
  65. doc["detail"] = detail
  66. }
  67. results = append(results, doc)
  68. }
  69. sort.SliceStable(results, func(i, j int) bool {
  70. return util.Int64All(results[i]["publishtime"]) > util.Int64All(results[j]["publishtime"])
  71. })
  72. return results, nil
  73. }
  74. func getSearchNew(client *elastic.Client, projectName, areacode string, isDetail int) ([]map[string]interface{}, error) {
  75. var results []map[string]interface{}
  76. seenIDs := make(map[string]*elastic.SearchHit)
  77. province, city := "", ""
  78. if areacode != "" {
  79. code := areacode[:6]
  80. where := map[string]interface{}{
  81. "code": code,
  82. }
  83. res, _ := MgoQY.FindOne("address_new_2020", where)
  84. province = util.ObjToString((*res)["province"])
  85. city = util.ObjToString((*res)["city"])
  86. }
  87. //fmt.Println(province, city)
  88. projectName = RemoveInvisibleChars(projectName)
  89. projectName = FilterGeneric(projectName)
  90. // 1. 精准查询
  91. preciseHits, err := searchPreciseOther(client, projectName, province, city, isDetail)
  92. if err != nil {
  93. return nil, err
  94. }
  95. for _, hit := range preciseHits {
  96. if _, exists := seenIDs[hit.Id]; !exists {
  97. seenIDs[hit.Id] = hit
  98. }
  99. }
  100. // 2. 分词查询(
  101. tokenHits, err := searchByTokenOtherNew(client, projectName, province, city, isDetail)
  102. if err != nil {
  103. return nil, err
  104. }
  105. for _, hit := range tokenHits {
  106. if _, exists := seenIDs[hit.Id]; !exists {
  107. seenIDs[hit.Id] = hit
  108. }
  109. }
  110. for id, hit := range seenIDs {
  111. var doc map[string]interface{}
  112. if err = json.Unmarshal(hit.Source, &doc); err != nil {
  113. continue
  114. }
  115. // 从 Mongo 读取 detail 字段用于后续 buyer 过滤
  116. bidd, _ := MgoB.FindById("bidding", id, nil)
  117. detail := util.ObjToString((*bidd)["detail"])
  118. //has := true
  119. //for _, v := range tokens {
  120. // if !strings.Contains(detail, v) {
  121. // has = false
  122. // break
  123. // }
  124. //}
  125. //if !has {
  126. // continue
  127. //}
  128. if detail != "" {
  129. doc["detail"] = detail
  130. }
  131. results = append(results, doc)
  132. }
  133. sort.SliceStable(results, func(i, j int) bool {
  134. return util.Int64All(results[i]["publishtime"]) > util.Int64All(results[j]["publishtime"])
  135. })
  136. return results, nil
  137. }
  138. // searchPreciseOther 精准搜索;m默认项目名称+标题;详情可选参数
  139. func searchPreciseOther(client *elastic.Client, projectName, area, city string, isDetail int) ([]*elastic.SearchHit, error) {
  140. fieldsToTry := []string{"projectname.pname", "title"}
  141. //if isDetail > 0 {
  142. // fieldsToTry = append(fieldsToTry, "detail")
  143. //}
  144. filtersToTry := [][]elastic.Query{
  145. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  146. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  147. }
  148. runQuery := func(withArea bool) ([]*elastic.SearchHit, error) {
  149. var allResults []*elastic.SearchHit
  150. seenIDs := make(map[string]bool)
  151. for _, field := range fieldsToTry {
  152. for _, filters := range filtersToTry {
  153. var queries []*elastic.BoolQuery
  154. queryBase := elastic.NewBoolQuery().
  155. Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
  156. Filter(filters...)
  157. if withArea {
  158. if area != "" {
  159. queryBase = queryBase.Must(elastic.NewTermQuery("area", area))
  160. }
  161. if city != "" {
  162. queryBase = queryBase.Must(elastic.NewTermQuery("city", city))
  163. }
  164. }
  165. queries = append(queries, queryBase)
  166. for _, query := range queries {
  167. fetchFields := elastic.NewFetchSourceContext(true).Include(
  168. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  169. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  170. "s_winner", "winnertel", "agency", "publishtime")
  171. searchResult, err := client.Search().
  172. Index("bidding").
  173. Query(query).
  174. FetchSourceContext(fetchFields).
  175. Do(context.Background())
  176. if err != nil {
  177. return nil, err
  178. }
  179. for _, hit := range searchResult.Hits.Hits {
  180. if !seenIDs[hit.Id] {
  181. seenIDs[hit.Id] = true
  182. allResults = append(allResults, hit)
  183. }
  184. }
  185. }
  186. }
  187. }
  188. return allResults, nil
  189. }
  190. // 第一次查询:包含省份和城市过滤(如果有)
  191. results, err := runQuery(true)
  192. if err != nil {
  193. return nil, err
  194. }
  195. if len(results) == 0 && area != "" {
  196. // 如果查不到,并且存在省份条件,再执行一次去掉 area 的查询
  197. return runQuery(false)
  198. }
  199. return results, nil
  200. }
  201. // searchByTokenOther 分词查询;
  202. func searchByTokenOther2(client *elastic.Client, projectName, province, city string, isDetail int) ([]*elastic.SearchHit, error) {
  203. fieldsToTry := []string{"projectname.pname", "title"}
  204. if isDetail > 0 {
  205. fieldsToTry = append(fieldsToTry, "detail")
  206. }
  207. filtersToTry := [][]elastic.Query{
  208. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  209. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  210. }
  211. // 分词处理
  212. analyzeResp, err := client.IndexAnalyze().
  213. Index("bidding").
  214. Analyzer("ik_smart").
  215. Text(projectName).
  216. Do(context.Background())
  217. if err != nil {
  218. return nil, err
  219. }
  220. var tokens []string
  221. for _, token := range analyzeResp.Tokens {
  222. tokens = append(tokens, token.Token)
  223. }
  224. if len(tokens) == 0 {
  225. return nil, fmt.Errorf("no tokens found from ik_smart")
  226. }
  227. queryText := strings.Join(tokens, " ")
  228. // 指定返回字段
  229. fetchFields := elastic.NewFetchSourceContext(true).Include(
  230. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  231. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  232. "s_winner", "winnertel", "agency", "publishtime")
  233. var allHits []*elastic.SearchHit
  234. seen := make(map[string]bool)
  235. for _, field := range fieldsToTry {
  236. for _, filters := range filtersToTry {
  237. var queries []*elastic.BoolQuery
  238. if field == "detail" {
  239. query := elastic.NewBoolQuery()
  240. if province != "" {
  241. query = query.Must(elastic.NewTermQuery("area", province))
  242. }
  243. query = query.Must(elastic.NewMatchQuery(field, queryText)).
  244. Filter(filters...)
  245. queries = append(queries, query)
  246. } else {
  247. // 省+市
  248. if province != "" && city != "" {
  249. q := elastic.NewBoolQuery().
  250. Must(elastic.NewMatchQuery(field, queryText)).
  251. Must(elastic.NewTermQuery("area", province)).
  252. Must(elastic.NewTermQuery("city", city)).
  253. Filter(filters...)
  254. queries = append(queries, q)
  255. }
  256. // 仅省
  257. if province != "" {
  258. q := elastic.NewBoolQuery().
  259. Must(elastic.NewMatchQuery(field, queryText)).
  260. Must(elastic.NewTermQuery("area", province)).
  261. Filter(filters...)
  262. queries = append(queries, q)
  263. }
  264. }
  265. for _, query := range queries {
  266. searchResult, err := client.Search().
  267. Index("bidding").
  268. Query(query).
  269. FetchSourceContext(fetchFields).
  270. Do(context.Background())
  271. if err != nil {
  272. continue
  273. }
  274. for _, hit := range searchResult.Hits.Hits {
  275. if !seen[hit.Id] {
  276. seen[hit.Id] = true
  277. allHits = append(allHits, hit)
  278. }
  279. }
  280. }
  281. }
  282. }
  283. return allHits, nil
  284. }
  285. func searchByTokenOther(client *elastic.Client, projectName, province, city string, isDetail int) ([]*elastic.SearchHit, error) {
  286. fieldsToTry := []string{"projectname.pname", "title"}
  287. if isDetail > 0 {
  288. fieldsToTry = append(fieldsToTry, "detail")
  289. }
  290. filtersToTry := [][]elastic.Query{
  291. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  292. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  293. }
  294. // 分词处理
  295. analyzeResp, err := client.IndexAnalyze().
  296. Index("bidding").
  297. Analyzer("ik_smart").
  298. Text(projectName).
  299. Do(context.Background())
  300. if err != nil {
  301. return nil, err
  302. }
  303. var tokens []string
  304. for _, token := range analyzeResp.Tokens {
  305. tokens = append(tokens, token.Token)
  306. }
  307. if len(tokens) == 0 {
  308. return nil, fmt.Errorf("no tokens found from ik_smart")
  309. }
  310. queryText := strings.Join(tokens, " ")
  311. // 指定返回字段
  312. fetchFields := elastic.NewFetchSourceContext(true).Include(
  313. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  314. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  315. "s_winner", "winnertel", "agency", "publishtime")
  316. // 抽象出内部查询逻辑,参数控制是否使用省份过滤
  317. runQuery := func(withProvince bool) ([]*elastic.SearchHit, error) {
  318. var allHits []*elastic.SearchHit
  319. seen := make(map[string]bool)
  320. for _, field := range fieldsToTry {
  321. for _, filters := range filtersToTry {
  322. //query := elastic.NewBoolQuery().
  323. // Must(elastic.NewMatchQuery(field, queryText)).
  324. // Filter(filters...)
  325. query := elastic.NewBoolQuery().
  326. Must(
  327. elastic.NewMultiMatchQuery(queryText, field).
  328. MinimumShouldMatch("100%"),
  329. ).
  330. Filter(filters...)
  331. // 动态加上 area/city 条件
  332. if withProvince && province != "" {
  333. query = query.Must(elastic.NewTermQuery("area", province))
  334. }
  335. if city != "" {
  336. query = query.Must(elastic.NewTermQuery("city", city))
  337. }
  338. searchResult, err := client.Search().
  339. Index("bidding").
  340. Query(query).
  341. FetchSourceContext(fetchFields).
  342. Do(context.Background())
  343. if err != nil {
  344. continue
  345. }
  346. for _, hit := range searchResult.Hits.Hits {
  347. if !seen[hit.Id] {
  348. seen[hit.Id] = true
  349. allHits = append(allHits, hit)
  350. }
  351. }
  352. }
  353. }
  354. return allHits, nil
  355. }
  356. // 第一次尝试带上 province
  357. results, err := runQuery(true)
  358. if err != nil {
  359. return nil, err
  360. }
  361. // 如果查不到,并且设置了省份,则再试一次去掉 province
  362. if len(results) == 0 && province != "" {
  363. return runQuery(false)
  364. }
  365. return results, nil
  366. }
  367. // searchByTokenOtherNew searchByTokenOtherNew
  368. func searchByTokenOtherNew(client *elastic.Client, projectName, province, city string, isDetail int) ([]*elastic.SearchHit, error) {
  369. var tokens []string
  370. fieldsToTry := []string{"projectname.pname", "title"}
  371. if isDetail > 0 {
  372. fieldsToTry = append(fieldsToTry, "detail")
  373. }
  374. filtersToTry := [][]elastic.Query{
  375. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  376. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  377. }
  378. // 分词处理
  379. analyzeResp, err := client.IndexAnalyze().
  380. Index("bidding").
  381. Analyzer("ik_smart").
  382. Text(projectName).
  383. Do(context.Background())
  384. if err != nil {
  385. return nil, err
  386. }
  387. for _, token := range analyzeResp.Tokens {
  388. tokens = append(tokens, token.Token)
  389. }
  390. if len(tokens) == 0 {
  391. return nil, fmt.Errorf("no tokens found from ik_smart")
  392. }
  393. // 指定返回字段
  394. fetchFields := elastic.NewFetchSourceContext(true).Include(
  395. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  396. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  397. "s_winner", "winnertel", "agency", "publishtime")
  398. // 抽象出内部查询逻辑,参数控制是否使用省份过滤
  399. runQuery := func(withProvince bool) ([]*elastic.SearchHit, error) {
  400. var allHits []*elastic.SearchHit
  401. seen := make(map[string]bool)
  402. for _, field := range fieldsToTry {
  403. boolQ := elastic.NewBoolQuery()
  404. for _, token := range tokens {
  405. boolQ = boolQ.Must(
  406. elastic.NewTermQuery(field, token), // 精确匹配分词结果
  407. )
  408. }
  409. for _, filters := range filtersToTry {
  410. query := elastic.NewBoolQuery().
  411. Must(
  412. boolQ,
  413. ).
  414. Filter(filters...)
  415. // 动态加上 area/city 条件
  416. if withProvince && province != "" {
  417. query = query.Must(elastic.NewTermQuery("area", province))
  418. }
  419. if withProvince && city != "" {
  420. query = query.Must(elastic.NewTermQuery("city", city))
  421. }
  422. searchResult, err := client.Search().
  423. Index("bidding").
  424. Query(query).
  425. FetchSourceContext(fetchFields).
  426. Do(context.Background())
  427. if err != nil {
  428. continue
  429. }
  430. for _, hit := range searchResult.Hits.Hits {
  431. if !seen[hit.Id] {
  432. seen[hit.Id] = true
  433. allHits = append(allHits, hit)
  434. }
  435. }
  436. }
  437. }
  438. return allHits, nil
  439. }
  440. // 第一次尝试带上 province
  441. results, err := runQuery(true)
  442. if err != nil {
  443. return nil, err
  444. }
  445. // 如果查不到,并且设置了省份,则再试一次去掉 province
  446. if len(results) == 0 && province != "" {
  447. return runQuery(false)
  448. }
  449. return results, nil
  450. }
  451. // RemoveInvisibleChars 移除控制字符和不可见字符
  452. func RemoveInvisibleChars(s string) string {
  453. return strings.Map(func(r rune) rune {
  454. // 保留普通字符、中文、标点等可见字符
  455. if unicode.IsGraphic(r) && !unicode.IsControl(r) {
  456. return r
  457. }
  458. return -1
  459. }, s)
  460. }
  461. // FilterGeneric 通用词处理
  462. func FilterGeneric(keyWords string) string {
  463. keyWords = FilterReg_3.ReplaceAllString(keyWords, "")
  464. keyWords = FilterReg_2.ReplaceAllString(keyWords, "")
  465. keyWords = FilterReg_1.ReplaceAllString(keyWords, "")
  466. keyWords = FilterReg.ReplaceAllString(keyWords, "")
  467. return keyWords
  468. }