project_other.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/olivere/elastic/v7"
  7. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  8. "regexp"
  9. "sort"
  10. "strings"
  11. "unicode"
  12. )
  13. var (
  14. FilterReg_3 = regexp.MustCompile("(项目|公告|公示)$")
  15. FilterReg_2 = regexp.MustCompile("^[)\\)>》】\\]}}〕,,;;::'\"“”。.\\??、/+=\\_—*&……\\^%$¥@!!`~·(\\(<《【\\[{{〔]+$")
  16. FilterReg_1 = regexp.MustCompile("^([0-9]{1,3}|[零一二三四五六七八九十]{1,2}|联系人?|电话|地址|编号|采购|政府采购|成交|更正|招标|中标|变更|结果)$")
  17. FilterReg = regexp.MustCompile("^[的人号时元万公告项目地址电话邮编日期联系招标中结果成交项目项目采购采购项目政府采购公告更正公告]+$")
  18. )
  19. // getSearch 模拟网站所有,只提供精准和分词;不再打分;只过滤省市,只匹配标题、项目名称;根据参数决定是否匹配详情;
  20. func getSearch(client *elastic.Client, projectName, areacode string, isDetail int) ([]map[string]interface{}, error) {
  21. var results []map[string]interface{}
  22. seenIDs := make(map[string]*elastic.SearchHit)
  23. province, city := "", ""
  24. if areacode != "" {
  25. code := areacode[:6]
  26. where := map[string]interface{}{
  27. "code": code,
  28. }
  29. res, _ := MgoQY.FindOne("address_new_2020", where)
  30. province = util.ObjToString((*res)["province"])
  31. city = util.ObjToString((*res)["city"])
  32. }
  33. //fmt.Println(province, city)
  34. projectName = RemoveInvisibleChars(projectName)
  35. projectName = FilterGeneric(projectName)
  36. // 1. 精准查询
  37. preciseHits, err := searchPreciseOther(client, projectName, province, city, isDetail)
  38. if err != nil {
  39. return nil, err
  40. }
  41. for _, hit := range preciseHits {
  42. if _, exists := seenIDs[hit.Id]; !exists {
  43. seenIDs[hit.Id] = hit
  44. }
  45. }
  46. // 2. 分词查询(
  47. tokenHits, err := searchByTokenOther(client, projectName, province, city, isDetail)
  48. if err != nil {
  49. return nil, err
  50. }
  51. for _, hit := range tokenHits {
  52. if _, exists := seenIDs[hit.Id]; !exists {
  53. seenIDs[hit.Id] = hit
  54. }
  55. }
  56. for id, hit := range seenIDs {
  57. var doc map[string]interface{}
  58. if err = json.Unmarshal(hit.Source, &doc); err != nil {
  59. continue
  60. }
  61. // 从 Mongo 读取 detail 字段用于后续 buyer 过滤
  62. bidd, _ := MgoB.FindById("bidding", id, nil)
  63. detail := util.ObjToString((*bidd)["detail"])
  64. if detail != "" {
  65. doc["detail"] = detail
  66. }
  67. results = append(results, doc)
  68. }
  69. sort.SliceStable(results, func(i, j int) bool {
  70. return util.Int64All(results[i]["publishtime"]) > util.Int64All(results[j]["publishtime"])
  71. })
  72. return results, nil
  73. }
  74. // searchPreciseOther 精准搜索;m默认项目名称+标题;详情可选参数
  75. func searchPreciseOther(client *elastic.Client, projectName, area, city string, isDetail int) ([]*elastic.SearchHit, error) {
  76. fieldsToTry := []string{"projectname.pname", "title"}
  77. if isDetail > 0 {
  78. fieldsToTry = append(fieldsToTry, "detail")
  79. }
  80. filtersToTry := [][]elastic.Query{
  81. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  82. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  83. }
  84. runQuery := func(withArea bool) ([]*elastic.SearchHit, error) {
  85. var allResults []*elastic.SearchHit
  86. seenIDs := make(map[string]bool)
  87. for _, field := range fieldsToTry {
  88. for _, filters := range filtersToTry {
  89. var queries []*elastic.BoolQuery
  90. queryBase := elastic.NewBoolQuery().
  91. Must(elastic.NewMultiMatchQuery(projectName, field).Type("phrase")).
  92. Filter(filters...)
  93. if withArea {
  94. if area != "" {
  95. queryBase = queryBase.Must(elastic.NewTermQuery("area", area))
  96. }
  97. if city != "" {
  98. queryBase = queryBase.Must(elastic.NewTermQuery("city", city))
  99. }
  100. }
  101. queries = append(queries, queryBase)
  102. for _, query := range queries {
  103. fetchFields := elastic.NewFetchSourceContext(true).Include(
  104. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  105. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  106. "s_winner", "winnertel", "agency", "publishtime")
  107. searchResult, err := client.Search().
  108. Index("bidding").
  109. Query(query).
  110. FetchSourceContext(fetchFields).
  111. Do(context.Background())
  112. if err != nil {
  113. return nil, err
  114. }
  115. for _, hit := range searchResult.Hits.Hits {
  116. if !seenIDs[hit.Id] {
  117. seenIDs[hit.Id] = true
  118. allResults = append(allResults, hit)
  119. }
  120. }
  121. }
  122. }
  123. }
  124. return allResults, nil
  125. }
  126. // 第一次查询:包含省份和城市过滤(如果有)
  127. results, err := runQuery(true)
  128. if err != nil {
  129. return nil, err
  130. }
  131. if len(results) == 0 && area != "" {
  132. // 如果查不到,并且存在省份条件,再执行一次去掉 area 的查询
  133. return runQuery(false)
  134. }
  135. return results, nil
  136. }
  137. // searchByTokenOther 分词查询;
  138. func searchByTokenOther2(client *elastic.Client, projectName, province, city string, isDetail int) ([]*elastic.SearchHit, error) {
  139. fieldsToTry := []string{"projectname.pname", "title"}
  140. if isDetail > 0 {
  141. fieldsToTry = append(fieldsToTry, "detail")
  142. }
  143. filtersToTry := [][]elastic.Query{
  144. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  145. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  146. }
  147. // 分词处理
  148. analyzeResp, err := client.IndexAnalyze().
  149. Index("bidding").
  150. Analyzer("ik_smart").
  151. Text(projectName).
  152. Do(context.Background())
  153. if err != nil {
  154. return nil, err
  155. }
  156. var tokens []string
  157. for _, token := range analyzeResp.Tokens {
  158. tokens = append(tokens, token.Token)
  159. }
  160. if len(tokens) == 0 {
  161. return nil, fmt.Errorf("no tokens found from ik_smart")
  162. }
  163. queryText := strings.Join(tokens, " ")
  164. // 指定返回字段
  165. fetchFields := elastic.NewFetchSourceContext(true).Include(
  166. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  167. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  168. "s_winner", "winnertel", "agency", "publishtime")
  169. var allHits []*elastic.SearchHit
  170. seen := make(map[string]bool)
  171. for _, field := range fieldsToTry {
  172. for _, filters := range filtersToTry {
  173. var queries []*elastic.BoolQuery
  174. if field == "detail" {
  175. query := elastic.NewBoolQuery()
  176. if province != "" {
  177. query = query.Must(elastic.NewTermQuery("area", province))
  178. }
  179. query = query.Must(elastic.NewMatchQuery(field, queryText)).
  180. Filter(filters...)
  181. queries = append(queries, query)
  182. } else {
  183. // 省+市
  184. if province != "" && city != "" {
  185. q := elastic.NewBoolQuery().
  186. Must(elastic.NewMatchQuery(field, queryText)).
  187. Must(elastic.NewTermQuery("area", province)).
  188. Must(elastic.NewTermQuery("city", city)).
  189. Filter(filters...)
  190. queries = append(queries, q)
  191. }
  192. // 仅省
  193. if province != "" {
  194. q := elastic.NewBoolQuery().
  195. Must(elastic.NewMatchQuery(field, queryText)).
  196. Must(elastic.NewTermQuery("area", province)).
  197. Filter(filters...)
  198. queries = append(queries, q)
  199. }
  200. }
  201. for _, query := range queries {
  202. searchResult, err := client.Search().
  203. Index("bidding").
  204. Query(query).
  205. FetchSourceContext(fetchFields).
  206. Do(context.Background())
  207. if err != nil {
  208. continue
  209. }
  210. for _, hit := range searchResult.Hits.Hits {
  211. if !seen[hit.Id] {
  212. seen[hit.Id] = true
  213. allHits = append(allHits, hit)
  214. }
  215. }
  216. }
  217. }
  218. }
  219. return allHits, nil
  220. }
  221. func searchByTokenOther(client *elastic.Client, projectName, province, city string, isDetail int) ([]*elastic.SearchHit, error) {
  222. fieldsToTry := []string{"projectname.pname", "title"}
  223. if isDetail > 0 {
  224. fieldsToTry = append(fieldsToTry, "detail")
  225. }
  226. filtersToTry := [][]elastic.Query{
  227. {elastic.NewTermsQuery("subtype", "中标", "成交", "合同", "单一")},
  228. {elastic.NewTermsQuery("toptype", "招标", "预告", "采购意向", "拟建")},
  229. }
  230. // 分词处理
  231. analyzeResp, err := client.IndexAnalyze().
  232. Index("bidding").
  233. Analyzer("ik_smart").
  234. Text(projectName).
  235. Do(context.Background())
  236. if err != nil {
  237. return nil, err
  238. }
  239. var tokens []string
  240. for _, token := range analyzeResp.Tokens {
  241. tokens = append(tokens, token.Token)
  242. }
  243. if len(tokens) == 0 {
  244. return nil, fmt.Errorf("no tokens found from ik_smart")
  245. }
  246. queryText := strings.Join(tokens, " ")
  247. // 指定返回字段
  248. fetchFields := elastic.NewFetchSourceContext(true).Include(
  249. "id", "title", "projectname", "projectcode", "bidamount", "area", "city",
  250. "toptype", "subtype", "buyer", "budget", "buyerperson", "buyertel",
  251. "s_winner", "winnertel", "agency", "publishtime")
  252. // 抽象出内部查询逻辑,参数控制是否使用省份过滤
  253. runQuery := func(withProvince bool) ([]*elastic.SearchHit, error) {
  254. var allHits []*elastic.SearchHit
  255. seen := make(map[string]bool)
  256. for _, field := range fieldsToTry {
  257. for _, filters := range filtersToTry {
  258. //query := elastic.NewBoolQuery().
  259. // Must(elastic.NewMatchQuery(field, queryText)).
  260. // Filter(filters...)
  261. query := elastic.NewBoolQuery().
  262. Must(
  263. elastic.NewMultiMatchQuery(queryText, field).
  264. MinimumShouldMatch("100%"),
  265. ).
  266. Filter(filters...)
  267. // 动态加上 area/city 条件
  268. if withProvince && province != "" {
  269. query = query.Must(elastic.NewTermQuery("area", province))
  270. }
  271. if city != "" {
  272. query = query.Must(elastic.NewTermQuery("city", city))
  273. }
  274. searchResult, err := client.Search().
  275. Index("bidding").
  276. Query(query).
  277. FetchSourceContext(fetchFields).
  278. Do(context.Background())
  279. if err != nil {
  280. continue
  281. }
  282. for _, hit := range searchResult.Hits.Hits {
  283. if !seen[hit.Id] {
  284. seen[hit.Id] = true
  285. allHits = append(allHits, hit)
  286. }
  287. }
  288. }
  289. }
  290. return allHits, nil
  291. }
  292. // 第一次尝试带上 province
  293. results, err := runQuery(true)
  294. if err != nil {
  295. return nil, err
  296. }
  297. // 如果查不到,并且设置了省份,则再试一次去掉 province
  298. if len(results) == 0 && province != "" {
  299. return runQuery(false)
  300. }
  301. return results, nil
  302. }
  303. // RemoveInvisibleChars 移除控制字符和不可见字符
  304. func RemoveInvisibleChars(s string) string {
  305. return strings.Map(func(r rune) rune {
  306. // 保留普通字符、中文、标点等可见字符
  307. if unicode.IsGraphic(r) && !unicode.IsControl(r) {
  308. return r
  309. }
  310. return -1
  311. }, s)
  312. }
  313. // FilterGeneric 通用词处理
  314. func FilterGeneric(keyWords string) string {
  315. keyWords = FilterReg_3.ReplaceAllString(keyWords, "")
  316. keyWords = FilterReg_2.ReplaceAllString(keyWords, "")
  317. keyWords = FilterReg_1.ReplaceAllString(keyWords, "")
  318. keyWords = FilterReg.ReplaceAllString(keyWords, "")
  319. return keyWords
  320. }