search.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. package util
  2. import (
  3. MC "app.yhyue.com/moapp/jybase/common"
  4. elastic "app.yhyue.com/moapp/jybase/esv1"
  5. "crypto/rand"
  6. "encoding/json"
  7. "fmt"
  8. "io/ioutil"
  9. "log"
  10. "math/big"
  11. "net/http"
  12. "net/url"
  13. "regexp"
  14. "strconv"
  15. "strings"
  16. "time"
  17. "unicode"
  18. )
  19. var MatchSpace = regexp.MustCompile("\\s+")
  20. var filterReg_3 = regexp.MustCompile("(项目|公告|公示)$")
  21. var filterReg_2 = regexp.MustCompile("^[)\\)>》】\\]}}〕,,;;::'\"“”。.\\??、/+=\\_—*&……\\^%$¥@!!`~·(\\(<《【\\[{{〔]+$")
  22. var filterReg_1 = regexp.MustCompile("^([0-9]{1,3}|[零一二三四五六七八九十]{1,2}|联系人?|电话|地址|编号|采购|政府采购|成交|更正|招标|中标|变更|结果)$")
  23. var filterReg = regexp.MustCompile("^[的人号时元万公告项目地址电话邮编日期联系招标中结果成交项目项目采购采购项目政府采购公告更正公告]+$")
  24. var PhoneReg = regexp.MustCompile("^[1][3-9][0-9]{9}$")
  25. var EmailPattern = regexp.MustCompile("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$")
  26. func SearchHistory(history, searchvalue string) []string {
  27. arrs := strings.Split(history, ",")
  28. //新增历史记录
  29. if history == "" {
  30. arrs = make([]string, 0)
  31. }
  32. for k, v := range arrs {
  33. if v == strings.TrimSpace(searchvalue) {
  34. arrs = append(arrs[:k], arrs[k+1:]...)
  35. break
  36. }
  37. }
  38. arrs = append(arrs, searchvalue)
  39. if len(arrs) > 10 {
  40. arrs = arrs[1:11]
  41. }
  42. return arrs
  43. }
  44. func FilteKey(k string) string {
  45. k = strings.TrimSpace(k)
  46. k = filterReg_3.ReplaceAllString(k, "")
  47. k = filterReg_2.ReplaceAllString(k, "")
  48. k = filterReg_1.ReplaceAllString(k, "")
  49. k = filterReg.ReplaceAllString(k, "")
  50. return k
  51. }
  52. //超过20个字,截断
  53. //返回截取后的字符串和截取掉中的前3个字
  54. func InterceptSearchKW(word string, isIntercept, isFilter bool) (b_word, a_word, s_word string) {
  55. if isFilter {
  56. word = FilteKey(word)
  57. }
  58. word = MatchSpace.ReplaceAllString(strings.TrimSpace(word), " ")
  59. words := []rune(word)
  60. if len(words) > 20 && isIntercept {
  61. b_word = string(words[:20])
  62. b_word = strings.TrimSpace(b_word)
  63. if len(words) > 23 {
  64. a_word = string(words[20:23])
  65. } else {
  66. a_word = string(words[20:])
  67. }
  68. } else {
  69. b_word = word
  70. }
  71. a_word = strings.TrimSpace(a_word)
  72. s_word = MatchSpace.ReplaceAllString(b_word, "+")
  73. return
  74. }
  75. func HttpEs(ques, analyzer, esAddress string) (res string) {
  76. var addrs []string
  77. surl := ""
  78. for _, s := range strings.Split(esAddress, ",") {
  79. addrs = append(addrs, s)
  80. }
  81. i, _ := rand.Int(rand.Reader, big.NewInt(int64(len(addrs)))) //随机
  82. surl = addrs[int(i.Int64())] + "/bidding/_analyze"
  83. URL, _ := url.Parse(surl)
  84. Q := URL.Query()
  85. Q.Add("text", ques)
  86. Q.Add("analyzer", analyzer)
  87. URL.RawQuery = Q.Encode()
  88. resp, err := http.Get(URL.String())
  89. if err != nil {
  90. log.Println("es连接失败 err1:", err)
  91. resp, err = getesResp(ques, analyzer, addrs)
  92. if err != nil {
  93. return
  94. }
  95. }
  96. result, err := ioutil.ReadAll(resp.Body)
  97. if err == nil {
  98. defer resp.Body.Close()
  99. var resmap map[string]interface{}
  100. json.Unmarshal(result, &resmap)
  101. if resmap != nil && resmap["tokens"] != nil {
  102. tokens := MC.ObjArrToMapArr(resmap["tokens"].([]interface{}))
  103. for _, v := range tokens {
  104. token := MC.ObjToString(v["token"])
  105. if len([]rune(token)) == 1 && unicode.IsLetter([]rune(token)[0]) {
  106. continue
  107. }
  108. if res != "" {
  109. res += "+"
  110. }
  111. res += token
  112. }
  113. }
  114. }
  115. return
  116. }
  117. //
  118. func getesResp(ques, analyzer string, addrs []string) (resp *http.Response, err error) {
  119. for _, v := range addrs {
  120. surl := v + "/bidding/_analyze"
  121. URL, _ := url.Parse(surl)
  122. Q := URL.Query()
  123. Q.Add("text", ques)
  124. Q.Add("analyzer", analyzer)
  125. URL.RawQuery = Q.Encode()
  126. resp, err = http.Get(URL.String())
  127. if err == nil {
  128. break
  129. }
  130. }
  131. return resp, err
  132. }
  133. //pc、微信、app 招标信息搜索
  134. const (
  135. INDEX = "bidding"
  136. TYPE = "bidding"
  137. bidSearch_sort = `{"publishtime":-1}`
  138. //招标搜索分页--每页显示数量
  139. SearchPageSize_APP = 50
  140. SearchPageSize_WX = 50
  141. SearchPageSize_PC = 50
  142. //招标搜索分页--最大页数
  143. SearchMaxPageNum_APP = 10
  144. SearchMaxPageNum_WX = 10
  145. SearchMaxPageNum_PC = 10 //免费用户500条记录
  146. SearchMaxPageNum_PAYED = 100 //付费用户5000条记录
  147. bidSearch_field_1 = `"_id","title","publishtime","toptype","subtype","type","area","city","s_subscopeclass","bidamount","budget","buyerclass"`
  148. bidSearch_field = bidSearch_field_1 + `,"bidopentime","winner","buyer","projectname","projectcode","projectinfo"`
  149. )
  150. func SearchData(platform string, request *http.Request, currentPage int, userId, secondKWS, s_word, area, city, publishtime, subtype, industry, minprice, maxprice, winner, buyerclass, hasBuyerTel, hasWinnerTel, fileExists string, start, pageSize int, isGetCount bool, queryItems []string, field, notkey string, isPayedUser bool) (second, b_word, a_word, pcAjaxFlag, secondFlag string, count, totalPage int64, list *[]map[string]interface{}) {
  151. count, totalPage, list = GetPcBidSearchData(s_word, area, city, publishtime, subtype, industry, minprice, maxprice, winner, buyerclass, hasBuyerTel, hasWinnerTel, fileExists, start, pageSize, isGetCount, queryItems, field, notkey, isPayedUser)
  152. //if len([]rune(s_word)) > 3 && int(count) < SearchPageSize_PC && start == 0 {
  153. // var paramList = list
  154. // s_word, pcAjaxFlag, secondFlag, second, list = IntegratedData(platform, s_word, secondKWS, industry, minprice, maxprice, hasBuyerTel, hasWinnerTel, fileExists, secondFlag, area, city, publishtime, subtype, buyerclass, notkey, queryItems, paramList)
  155. //}
  156. listSize := 0
  157. if list != nil {
  158. listSize = len(*list)
  159. }
  160. //public.SaveUserSearchLog(request, userId, -1, platform, "超级搜索", map[string]interface{}{
  161. // "search_word": MC.If(platform == "app" || platform == "wx", searchvalue, s_word),
  162. // "search_area": area,
  163. // "search_city": city,
  164. // "search_price": []string{minprice, maxprice},
  165. // "search_publishtime": publishtime,
  166. // "search_type": subtype,
  167. // "search_industry": industry,
  168. // "pagenum": currentPage,
  169. // "pagesize": listSize,
  170. // "fileExists": fileExists,
  171. //})
  172. return
  173. }
  174. //GetPcBidSearchData pc端招标信息搜索
  175. func GetPcBidSearchData(searchvalue, area, city, publishtime, subtype, industry, minprice, maxprice, winner, buyerclass, hasBuyerTel, hasWinnerTel, fileExists, selectType, field, notkey string, start, pageSize int, isGetCount bool, ispayed bool) (count, totalPage int64, list *[]map[string]interface{}) {
  176. var findfields string
  177. var hightlightContent bool = false //是否高亮正文
  178. for _, v := range selectTypeArr {
  179. if v == "detail" {
  180. hightlightContent = true
  181. break
  182. }
  183. }
  184. if selectTypeArr == nil || len(selectTypeArr) == 0 {
  185. findfields = `"title"`
  186. } else {
  187. findfields = fmt.Sprintf(`"%s"`, strings.Join(selectTypeArr, "\",\""))
  188. }
  189. qstr := GetSearchQuery(searchvalue, industry, minprice, maxprice, hasBuyerTel, hasWinnerTel, fileExists, findfields, GetBidSearchQuery(area, city, publishtime, subtype, winner, buyerclass), notkey)
  190. if isGetCount && qstr != "" && start == 0 {
  191. count = elastic.Count(INDEX, TYPE, qstr)
  192. }
  193. if !isGetCount || count > 0 || start > 0 {
  194. var repl *[]map[string]interface{}
  195. if hightlightContent {
  196. repl = elastic.GetAllByNgram(INDEX, TYPE, qstr, `"detail"`, bidSearch_sort, field, start, pageSize, 115, true)
  197. } else {
  198. repl = elastic.GetAllByNgram(INDEX, TYPE, qstr, ``, bidSearch_sort, field, start, pageSize, 0, false)
  199. }
  200. if repl != nil && *repl != nil && len(*repl) > 0 {
  201. BidListConvert(industry, repl)
  202. list = repl
  203. }
  204. }
  205. limitCount := MC.If(ispayed, int64(SearchPageSize_PC*SearchMaxPageNum_PAYED), int64(SearchPageSize_PC*SearchMaxPageNum_PC)).(int64)
  206. if count > limitCount {
  207. count = limitCount
  208. }
  209. totalPage = (count + int64(SearchPageSize_PC) - 1) / int64(SearchPageSize_PC)
  210. return
  211. }
  212. func GetBidSearchQuery(area, city, publishtime, subtype, winner, buyerclass string) string {
  213. query := ``
  214. if area != "" {
  215. query += `{"terms":{"area":[`
  216. for k, v := range strings.Split(area, ",") {
  217. if k > 0 {
  218. query += `,`
  219. }
  220. query += `"` + v + `"`
  221. }
  222. query += `]}}`
  223. }
  224. //
  225. if city != "" {
  226. if len(query) > 0 {
  227. query += ","
  228. }
  229. query += `{"terms":{"city":[`
  230. for k, v := range strings.Split(city, ",") {
  231. if k > 0 {
  232. query += `,`
  233. }
  234. query += `"` + v + `"`
  235. }
  236. query += `]}}`
  237. }
  238. if publishtime != "" {
  239. if len(query) > 0 {
  240. query += ","
  241. }
  242. starttime, endtime := "", ""
  243. now := time.Now()
  244. if publishtime == "lately-7" { //最近7天
  245. starttime = fmt.Sprint(time.Date(now.Year(), now.Month(), now.Day()-7, 0, 0, 0, 0, time.Local).Unix())
  246. } else if publishtime == "lately-30" { //最近30天
  247. starttime = fmt.Sprint(time.Date(now.Year(), now.Month(), now.Day()-30, 0, 0, 0, 0, time.Local).Unix())
  248. } else if publishtime == "thisyear" { //最近一年
  249. starttime = fmt.Sprint(time.Date(now.Year()-1, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Unix())
  250. endtime = fmt.Sprint(now.Unix())
  251. } else if publishtime == "threeyear" { //最近三年
  252. starttime = fmt.Sprint(time.Date(now.Year()-3, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Unix())
  253. endtime = fmt.Sprint(now.Unix())
  254. } else if publishtime == "fiveyear" { //最近五年
  255. starttime = fmt.Sprint(time.Date(now.Year()-5, now.Month(), now.Day(), now.Hour(), now.Minute(), now.Second(), 0, time.Local).Unix())
  256. endtime = fmt.Sprint(now.Unix())
  257. } else {
  258. starttime = strings.Split(publishtime, "_")[0]
  259. endtime = strings.Split(publishtime, "_")[1]
  260. etTime := time.Now()
  261. if endtime != "" {
  262. et, _ := strconv.ParseInt(endtime, 0, 64)
  263. etTime = time.Unix(et, 0)
  264. }
  265. endtime = fmt.Sprint(time.Date(etTime.Year(), etTime.Month(), etTime.Day()+1, 0, 0, 0, 0, time.Local).Unix())
  266. }
  267. query += `{"range":{"publishtime":{`
  268. if starttime != "" {
  269. query += `"gte":` + starttime
  270. }
  271. if starttime != "" && endtime != "" {
  272. query += `,`
  273. }
  274. if endtime != "" {
  275. query += `"lt":` + endtime
  276. }
  277. query += `}}}`
  278. }
  279. if subtype != "" {
  280. if len(query) > 0 {
  281. query += ","
  282. }
  283. query += `{"terms":{"subtype":[`
  284. for k, v := range strings.Split(subtype, ",") {
  285. if k > 0 {
  286. query += `,`
  287. }
  288. query += `"` + v + `"`
  289. }
  290. query += `]}}`
  291. }
  292. if winner != "" {
  293. if len(query) > 0 {
  294. query += ","
  295. }
  296. query += `{"terms":{"s_winner":[`
  297. for k, v := range strings.Split(winner, ",") {
  298. if k > 0 {
  299. query += `,`
  300. }
  301. query += `"` + v + `"`
  302. }
  303. query += `]}}`
  304. }
  305. if buyerclass != "" {
  306. if len(query) > 0 {
  307. query += ","
  308. }
  309. query += `{"terms":{"buyerclass":[`
  310. for k, v := range strings.Split(buyerclass, ",") {
  311. if k > 0 {
  312. query += `,`
  313. }
  314. query += `"` + v + `"`
  315. }
  316. query += `]}}`
  317. }
  318. return query
  319. }
  320. func GetSearchQuery(keyword, industry, minprice, maxprice, hasBuyerTel, hasWinnerTel, fileExists, findfields, mustquery, notkey string) (qstr string) {
  321. multi_match := `{"multi_match": {"query": "%s","type": "phrase", "fields": [%s]}}`
  322. query := `{"query":{"bool":{"must":[%s],"must_not":[%s]}}}`
  323. query_bool_should := `{"bool":{"should":[%s],"minimum_should_match": 1}}`
  324. query_bools_must := `{"bool":{"must":[{"range":{"bidamount":{%s}}}]}},{"bool":{"must":[{"range":{"budget":{%s}}}],"must_not":[{"range":{"bidamount":{"gte":-1}}}]}}`
  325. query_bool_must := `{"bool":{"must":[{"terms":{"s_subscopeclass":[%s]}}]}}`
  326. query_missing := `{"constant_score":{"filter":{"missing":{"field":"%s"}}}}`
  327. gte := `"gte": %s`
  328. lte := `"lte": %s`
  329. musts, must_not := []string{}, []string{}
  330. if mustquery != "" {
  331. musts = append(musts, mustquery)
  332. }
  333. var isFileSearch bool = false //搜索范围是否有附件选择
  334. if keyword != "" {
  335. if strings.Contains(findfields, "filetext") { //搜索范围选择附件,是否有附件条件无效;
  336. isFileSearch = true
  337. }
  338. keyword_multi_match := fmt.Sprintf(multi_match, "%s", findfields)
  339. shoulds := []string{}
  340. for _, v := range strings.Split(keyword, "+") {
  341. shoulds = append(shoulds, fmt.Sprintf(keyword_multi_match, elastic.ReplaceYH(v)))
  342. }
  343. musts = append(musts, fmt.Sprintf(elastic.NgramMust, strings.Join(shoulds, ",")))
  344. }
  345. if industry != "" {
  346. industrys := strings.Split(industry, ",")
  347. musts = append(musts, fmt.Sprintf(query_bool_must, `"`+strings.Join(industrys, `","`)+`"`))
  348. }
  349. if minprice != "" || maxprice != "" {
  350. sq := ``
  351. if minprice != "" {
  352. min, _ := strconv.ParseFloat(minprice, 64)
  353. minprice = fmt.Sprintf("%.0f", min*10000)
  354. if minprice == "0" {
  355. minprice = ""
  356. }
  357. }
  358. if maxprice != "" {
  359. max, _ := strconv.ParseFloat(maxprice, 64)
  360. maxprice = fmt.Sprintf("%.0f", max*10000)
  361. if maxprice == "0" {
  362. maxprice = ""
  363. }
  364. }
  365. if minprice != "" {
  366. sq += fmt.Sprintf(gte, minprice)
  367. }
  368. if minprice != "" && maxprice != "" {
  369. sq += `,`
  370. }
  371. if maxprice != "" {
  372. sq += fmt.Sprintf(lte, maxprice)
  373. }
  374. if minprice != "" || maxprice != "" {
  375. query_price := fmt.Sprintf(query_bool_should, fmt.Sprintf(query_bools_must, sq, sq))
  376. musts = append(musts, query_price)
  377. }
  378. }
  379. if hasBuyerTel != "" {
  380. if hasBuyerTel == "y" {
  381. must_not = append(must_not, fmt.Sprintf(query_missing, "buyertel"))
  382. } else {
  383. musts = append(musts, fmt.Sprintf(query_missing, "buyertel"))
  384. }
  385. }
  386. if hasWinnerTel != "" {
  387. if hasWinnerTel == "y" {
  388. must_not = append(must_not, fmt.Sprintf(query_missing, "winnertel"))
  389. } else {
  390. musts = append(musts, fmt.Sprintf(query_missing, "winnertel"))
  391. }
  392. }
  393. if notkey = strings.TrimSpace(notkey); notkey != "" {
  394. notkey_multi_match := fmt.Sprintf(multi_match, "%s", findfields)
  395. notkey_must_not := []string{}
  396. for _, v := range strings.Split(notkey, " ") {
  397. v = strings.TrimSpace(v)
  398. if v == "" {
  399. continue
  400. }
  401. notkey_must_not = append(notkey_must_not, fmt.Sprintf(notkey_multi_match, elastic.ReplaceYH(v)))
  402. }
  403. must_not = append(must_not, fmt.Sprintf(query_bool_should, strings.Join(notkey_must_not, ",")))
  404. }
  405. if !isFileSearch && fileExists != "" {
  406. if fileExists == "1" { //有附件
  407. must_not = append(must_not, fmt.Sprintf(query_missing, "filetext"))
  408. } else if fileExists == "-1" { //无附件
  409. musts = append(musts, fmt.Sprintf(query_missing, "filetext"))
  410. }
  411. }
  412. qstr = fmt.Sprintf(query, strings.Join(musts, ","), strings.Join(must_not, ","))
  413. log.Println(qstr)
  414. return
  415. }
  416. /*
  417. * 结果列表转换,目前只换行行业字段
  418. * 所有的招标搜索都要调用此方法,列表中有展示行业的也可以用
  419. * industry 搜索条件中的行业,默认为空
  420. */
  421. func BidListConvert(industry string, list *[]map[string]interface{}) {
  422. if list == nil {
  423. return
  424. }
  425. commonSubstring := func(v string) (value string) {
  426. bcs := strings.Split(v, "_")
  427. if len(bcs) == 1 {
  428. value = bcs[0]
  429. } else if len(bcs) == 2 {
  430. value = bcs[0]
  431. if strings.TrimSpace(value) == "" {
  432. value = bcs[0]
  433. }
  434. }
  435. return
  436. }
  437. for _, v := range *list {
  438. budget, _ := v["budget"].(float64)
  439. bidamount, _ := v["bidamount"].(float64)
  440. if budget == 0 || strings.TrimSpace(fmt.Sprint(v["budget"])) == "" {
  441. delete(v, "budget")
  442. }
  443. if bidamount == 0 || strings.TrimSpace(fmt.Sprint(v["bidamount"])) == "" {
  444. delete(v, "bidamount")
  445. }
  446. value := ""
  447. subscopeclass, _ := v["s_subscopeclass"].(string)
  448. subscopeclass = strings.Trim(subscopeclass, ",")
  449. bct := strings.Split(subscopeclass, ",")
  450. if bct == nil || len(bct) == 0 {
  451. continue
  452. }
  453. //搜索条件中没有行业的话,取查询结果中第一个行业
  454. if industry == "" {
  455. value = commonSubstring(bct[0])
  456. } else { //搜索条件中有行业的话,取行业中和搜索条件相对应的第一个
  457. industrys := strings.Split(industry, ",")
  458. L:
  459. for _, bc := range bct {
  460. for _, is := range industrys {
  461. if bc == is {
  462. value = commonSubstring(bc)
  463. break L
  464. }
  465. }
  466. }
  467. }
  468. if strings.TrimSpace(value) == "" {
  469. continue
  470. }
  471. v["industry"] = value
  472. }
  473. }