package dataexport import ( "app.yhyue.com/moapp/jybase/mysql" //"config" "encoding/json" "errors" "fmt" "log" "math" "regexp" "sort" "strconv" "strings" "sync" "time" . "app.yhyue.com/moapp/jybase/date" . "app.yhyue.com/moapp/jybase/encrypt" "github.com/gogf/gf/v2/util/gconv" qutil "app.yhyue.com/moapp/jybase/common" elastic "app.yhyue.com/moapp/jybase/es" mg "app.yhyue.com/moapp/jybase/mongodb" "app.yhyue.com/moapp/jypkg/common/src/qfw/util/jy" ) const ( SearchModeAccurate = 0 // 搜索模式:0:精准搜索; SearchModeFuzzy = 1 // 搜索模式:1:模糊搜索 WordsModeAnd = 0 // 搜索关键词模式;默认0:包含所有 WordsModeOr = 1 // 搜索关键词模式;1:包含任意 SearchGroupAll = 0 // 搜索分组:默认0:全部; SearchGroupBidding = 1 // 搜索分组:1:招标采购公告;2:超前项目 SearchGroupLeadingProject = 2 // 搜索分组:1:招标采购公告;2:超前项目 SearchGroupAnnouncementProject = 3 // 搜索分组:1:招标采购公告;2:超前项目 移动3:公告 4 预告 5结果 SearchGroupPreviewsProject = 4 // 搜索分组:1:招标采购公告;2:超前项目移动3:公告 4 预告 5结果 SearchGroupResultsProject = 5 // 搜索分组:1:招标采购公告;2:超前项目移动3:公告 4 预告 5结果 TopTypesBidding = "招标预告,招标公告,招标结果,招标信用信息" TopTypesLeadingProject = "拟建项目,采购意向" TopTypesAnnouncementProject = "招标公告" TopTypesPreviewsProject = "招标预告" TopTypesResultsProject = "招标结果" ) /*筛选条件--关键词*/ type KeyWord struct { Keyword string `json:"keyword"` //关键词 Appended []string `json:"appended"` //附加词 Exclude []string `json:"exclude"` //排除词 } /*筛选条件*/ type SieveCondition struct { Id string `json:"id"` PublishTime string `json:"publishtime"` //发布时间 Area []string `json:"area"` //地区-省份 City []string `json:"city"` //地区-城市 Region []string `json:"region"` //地区-省份+城市 Industry []string `json:"industry"` //行业 Keyword []KeyWord `json:"keywords"` //关键词 Buyer []string `json:"buyer"` //招标单位(采购单位) Agency []string `json:"agency"` //招标代理机构(采购单位) Buyerclass []string `json:"buyerclass"` //采购单位类型 HasBuyerTel string `json:"hasBuyertel"` //是否有采购单位电话 Winner []string `json:"winner"` //中标单位 HasWinnerTel string `json:"hasWinnertel"` //是否有中标单位电话 ComeInTime int64 `json:"comeintime"` //入库时间(秒) OpenId string `json:"openid"` //用户openid MinPrice string `json:"minprice"` //金额——最少 MaxPrice string `json:"maxprice"` //金额——最多 SelectType string `json:"selectType"` //筛选(正文 or 标题) Subtype string `json:"subtype"` //信息类型 SelectIds []string `json:"selectId"` //选择信息导出 PushKeyWords []string `json:"pushKeyWords"` //推送历史匹配词 Comeinfrom string `json:"comeinfrom"` //查询来源 FileExists string `json:"fileExists"` //是否有附件 SearchTypeSwitch bool `json:"searchTypeSwitch"` //是否开启 正文 标题同时搜索只搜正文的开关 BidField string `json:"bid_field"` // 领域化数据 0101-医疗行业 SearchGroup int `json:"searchGroup"` // 搜索分组:默认0:全部;1:招标采购公告;2:超前项目 SearchMode int `json:"searchMode"` // 搜索模式:0:精准搜索;1:模糊搜索 WordsMode int `json:"wordsMode"` // 搜索关键词模式;默认0:包含所有,1:包含任意 District []string `json:"district"` // UserId string `json:"s_userid"` //用户id 或 职位id MgoUserId string `json:"mgoUserId"` //mgoUserId } const ( INDEX = "bidding" TYPE = "bidding" bidSearch_sort = `{"publishtime":-1}` ) var ClearOther = regexp.MustCompile("[\n\r\\s\u3000\u2003\u00a0]") var ClearHtml = regexp.MustCompile("<[^>]*>") var onceSearchCount = 500 var PreviewSearchCount = 100 var ExportTable string = "export_search" var searchPool = make(chan bool, 8) const ( bidBaseField = `"_id", "title", "detail", "area", "city", "publishtime", "projectname", "buyer", "s_winner", "bidamount", "subtype", "toptype", "filetext", "purchasing"` bidseniorField = bidBaseField + `,"href", "projectcode", "buyerperson", "buyertel", "budget", "bidopentime", "agency", "projectscope", "winnerperson", "winnertel", "bidendtime", "district", "signendtime", "buyeraddr","buyerclass","s_topscopeclass","entidlist","signaturedate"` ) var topType = map[string]string{ "招标预告": "预告", "招标公告": "招标", "招标结果": "结果", "招标信用信息": "其它", "拟建项目": "拟建", "采购意向": "采购意向", } // 包含正文或 附件 不包含标题 func DetailFileORTitle(findfields string) bool { return (strings.Contains(findfields, "detail") || strings.Contains(findfields, "filetext")) && !strings.Contains(findfields, "title") } // 包含正文包含标题 func DetailANDTitle(findfields string) bool { return strings.Contains(findfields, "detail") && strings.Contains(findfields, "title") } var getMatchPhraseSql = func(field string, val ...string) (sql string) { if len(val) == 0 { return } var arr []string for _, s := range val { if s == "" { continue } arr = append(arr, fmt.Sprintf(`{"match_phrase": {"%s": "%s"}}`, field, s)) } if len(arr) == 0 { return "" } return fmt.Sprintf(`{"bool": {"should": [%s],"minimum_should_match": 1}}`, strings.Join(arr, ",")) } // 获取数据导出查询语句 func getDataExportSql(scd *SieveCondition) string { if len(scd.SelectIds) > 0 { query := `{"query":{"bool":{"must":[%s]}}}` doSearchSql := fmt.Sprintf(`{"terms":{"_id":[%s]}}`, `"`+strings.Join(scd.SelectIds, `","`)+`"`) return fmt.Sprintf(query, doSearchSql) } multi_match := `{"multi_match": {"query": %s,"type": "phrase", "fields": [%s]}}` query := `{"query":{"bool":{"filter":[%s],"must_not":[%s],"should":[%s],"minimum_should_match": %d}}}` //query := `{"query": {"function_score": {"query": {"bool": {"must": [%s],"must_not": [%s],"should": [%s],"minimum_should_match": %d}},"field_value_factor": {"field": "dataweight","modifier": "ln1p","missing": 0}}}}` query_bool_should := `{"bool":{"should":[%s],"minimum_should_match": 1}}` query_bool_should_and := `{"bool":{"should":[%s],"minimum_should_match": 1 %s}}` query_price := `{"bool":{"must":[{"range":{"bidamount":{%s}}}]}},{"bool":{"must":[{"range":{"budget":{%s}}}],"must_not":[{"range":{"bidamount":{"gte":-1}}}]}}` query_bool_must := `{"terms":{"%s":[%s]}}` query_bool_must_and := `{"bool":{"must":[%s]%s}}` query_exists := `{"constant_score":{"filter":{"exists":{"field":"%s"}}}}` query_bool_must_term := `{"bool": {"must": [{ "term": {"isValidFile": %t }}]}}` query_bool_must_term_bidField := `{"bool": {"must": [{ "term": {"bid_field": "%s" }}]}}` // 领域化数据类型 gte := `"gte": %s` lte := `"lte": %s` bools := []string{} musts := []string{fmt.Sprintf(`{"range":{"comeintime":{"lt":%d}}}`, scd.ComeInTime)} must_not := []string{} //省份 areaCity := []string{} if len(scd.Area) > 0 { areaquery := `{"terms":{"area":[` for k, v := range scd.Area { if k > 0 { areaquery += `,` } areaquery += `"` + v + `"` } areaquery += `]}}` areaCity = append(areaCity, areaquery) } //城市 if len(scd.City) > 0 { areaquery := `{"terms":{"city":[` for k, v := range scd.City { if k > 0 { areaquery += `,` } areaquery += `"` + v + `"` } areaquery += `]}}` areaCity = append(areaCity, areaquery) } district := scd.District if len(district) > 0 { for _, v := range district { cityName := strings.Split(v, "_")[0] districtName := strings.Split(v, "_")[1] query_bool_must_and_district := `{"bool":{"must":[{"terms":{"city":["%s"]}},{"terms":{"district":["%s"]}}]}}` areaCity = append(areaCity, fmt.Sprintf(query_bool_must_and_district, cityName, districtName)) } } if len(areaCity) > 0 { musts = append(musts, fmt.Sprintf(query_bool_should, strings.Join(areaCity, ","))) } //检索日期 starttime := "" now := time.Unix(scd.ComeInTime, 0) endtime := fmt.Sprintf("%d", now.Unix()) if scd.PublishTime == "lately-7" { //最近7天 starttime = fmt.Sprint(time.Date(now.Year(), now.Month(), now.Day()-7, 0, 0, 0, 0, time.Local).Unix()) } else if scd.PublishTime == "lately-30" { //最近30天 starttime = fmt.Sprint(time.Date(now.Year(), now.Month(), now.Day()-30, 0, 0, 0, 0, time.Local).Unix()) } else if scd.PublishTime == "thisyear" { //去年 starttime = fmt.Sprint(time.Date(now.Year()-1, 1, 1, 0, 0, 0, 0, time.Local).Unix()) endtime = fmt.Sprint(time.Date(now.Year()-1, 12, 31, 23, 59, 59, 0, time.Local).Unix()) } else if strings.Contains(scd.PublishTime, "_") { //设置检索日期 starttime = strings.Split(scd.PublishTime, "_")[0] endTime_tmp := now if etime := strings.Split(scd.PublishTime, "_")[1]; etime != "" { etTime := time.Unix(qutil.Int64All(etime), 0) endTime_tmp = time.Date(etTime.Year(), etTime.Month(), etTime.Day()+1, 0, 0, 0, 0, time.Local) } //结束时间必须小于筛选时间 if endTime_tmp.After(now) { endTime_tmp = now } endtime = fmt.Sprintf("%d", endTime_tmp.Unix()) } timequery := `{"range":{"publishtime":{` if starttime != "" { timequery += `"gte":` + starttime } if starttime != "" && endtime != "" { timequery += `,` } if endtime != "" { timequery += `"lt":` + endtime } timequery += `}}}` musts = append(musts, timequery) if scd.Subtype == "" { switch scd.SearchGroup { case SearchGroupBidding: scd.Subtype = TopTypesBidding case SearchGroupLeadingProject: scd.Subtype = TopTypesLeadingProject case SearchGroupAnnouncementProject: scd.Subtype = TopTypesAnnouncementProject case SearchGroupPreviewsProject: scd.Subtype = TopTypesPreviewsProject case SearchGroupResultsProject: scd.Subtype = TopTypesResultsProject } } if scd.Subtype != "" { var subquery string var topTypes []string var subTypes []string for _, v := range strings.Split(scd.Subtype, ",") { if v1, ok := topType[v]; ok { topTypes = append(topTypes, fmt.Sprintf(`"%s"`, v1)) } else { subTypes = append(subTypes, fmt.Sprintf(`"%s"`, v)) } } log.Println("信息类型搜索:", topTypes, subTypes) if len(subTypes) > 0 && len(topTypes) > 0 { subquery = fmt.Sprintf(`{"bool": {"should": [{"terms": {"subtype": [%s]}},{"terms": {"toptype": [%s]}}]}}`, strings.Join(subTypes, ","), strings.Join(topTypes, ",")) } else if len(subTypes) > 0 { subquery = fmt.Sprintf(`{"terms":{"subtype":[%s]}}`, strings.Join(subTypes, ",")) } else if len(topTypes) > 0 { subquery = fmt.Sprintf(`{"terms":{"toptype":[%s]}}`, strings.Join(topTypes, ",")) } musts = append(musts, subquery) } if len(scd.Industry) > 0 { musts = append(musts, fmt.Sprintf(query_bool_must, "s_subscopeclass", `"`+strings.Join(scd.Industry, `","`)+`"`)) } if len(scd.Buyerclass) > 0 { musts = append(musts, fmt.Sprintf(query_bool_must, "buyerclass", `"`+strings.Join(scd.Buyerclass, `","`)+`"`)) } //P492招标采购搜索匹配采购单位等优化 新增buyer、agency、winner模糊搜索,数据导出同步改为模糊 //query_bool_should := `{"bool":{"should":[%s],"minimum_should_match": 1}}` if len(scd.Buyer) > 0 { if sql := getMatchPhraseSql("buyer.mbuyer", scd.Buyer...); sql != "" { musts = append(musts, sql) } } if len(scd.Agency) > 0 { if sql := getMatchPhraseSql("agency.magency", scd.Agency...); sql != "" { musts = append(musts, sql) } } if len(scd.Winner) > 0 { if sql := getMatchPhraseSql("s_winner.mwinner", scd.Winner...); sql != "" { musts = append(musts, sql) } } _minPrice := "" _maxPrice := "" if scd.MinPrice != "" || scd.MaxPrice != "" { sq := `` if scd.MinPrice != "" { min, _ := strconv.ParseFloat(scd.MinPrice, 64) _minPrice = fmt.Sprintf("%.0f", min*10000) if _minPrice == "0" { _minPrice = "" } } if scd.MaxPrice != "" { max, _ := strconv.ParseFloat(scd.MaxPrice, 64) _maxPrice = fmt.Sprintf("%.0f", max*10000) if _maxPrice == "0" { _maxPrice = "" } } if _minPrice != "" { sq += fmt.Sprintf(gte, _minPrice) } if _minPrice != "" && _maxPrice != "" { sq += `,` } if _maxPrice != "" { sq += fmt.Sprintf(lte, _maxPrice) } if _minPrice != "" || _maxPrice != "" { query_price := fmt.Sprintf(query_bool_should, fmt.Sprintf(query_price, sq, sq)) musts = append(musts, query_price) } } boolsNum := 0 selectType := scd.SelectType //should if len(scd.Keyword) > 0 { boolsNum = 1 queryItem := "" if selectType == "" { queryItem = "title" } else if selectType == "all" { queryItem = "detail\", \"title" } else { if scd.SearchTypeSwitch && DetailANDTitle(selectType) { if strings.Contains(selectType, "title,") { selectType = strings.Replace(selectType, "title,", "", -1) } else if strings.Contains(selectType, ",title") { selectType = strings.Replace(selectType, ",title", "", -1) } } queryItem = strings.ReplaceAll(selectType, ",", "\",\"") } multi_match_new := fmt.Sprintf(multi_match, "%s", "\""+queryItem+"\"") if scd.Comeinfrom == "supersearchPage" { var keywordArr []string if strings.Contains(scd.Keyword[0].Keyword, "+") { //keywordArr = strings.Split(scd.Keyword[0].Keyword, "+") } else if strings.Contains(scd.Keyword[0].Keyword, " ") { //keywordArr = strings.Split(scd.Keyword[0].Keyword, " ") } if len(keywordArr) > 1 { KeyWordSearch := KeyWord{} for _, v := range keywordArr { KeyWordSearch.Appended = append(KeyWordSearch.Appended, v) } scd.Keyword = []KeyWord{KeyWordSearch} } } for _, v := range scd.Keyword { shoulds := []string{} must_not := []string{} keysShoulds := []string{} if v.Keyword != "" { if strings.Contains(v.Keyword, "+") { for _, vk := range strings.Split(v.Keyword, "+") { if len(vk) == 0 { continue } //单个字 搜索范围 有全文或者附件 无标题 例如:学 虚拟机 detail 搜索的时候加上标题 if scd.Comeinfrom == "supersearchPage" && DetailFileORTitle(selectType) && len([]rune(elastic.ReplaceYH(vk))) == 1 { queryItem = strings.ReplaceAll(selectType+",title", ",", "\",\"") shouldsKey := fmt.Sprintf(multi_match, "\""+vk+"\"", "\""+queryItem+"\"") keysShoulds = append(keysShoulds, shouldsKey) } else { keysShoulds = append(keysShoulds, fmt.Sprintf(multi_match_new, "\""+vk+"\"")) } } } else if strings.Contains(v.Keyword, " ") { for _, vk := range strings.Split(v.Keyword, " ") { if len(vk) == 0 { continue } //单个字 搜索范围 有全文或者附件 无标题 例如:学 虚拟机 detail 搜索的时候加上标题 if scd.Comeinfrom == "supersearchPage" && DetailFileORTitle(selectType) && len([]rune(elastic.ReplaceYH(vk))) == 1 { queryItem = strings.ReplaceAll(selectType+",title", ",", "\",\"") shouldsKey := fmt.Sprintf(multi_match, "\""+vk+"\"", "\""+queryItem+"\"") keysShoulds = append(keysShoulds, shouldsKey) } else { keysShoulds = append(keysShoulds, fmt.Sprintf(multi_match_new, "\""+vk+"\"")) } } } else { //单个字 搜索范围 有全文或者附件 无标题 例如:学 虚拟机 detail 搜索的时候加上标题 if scd.Comeinfrom == "supersearchPage" && DetailFileORTitle(selectType) && len([]rune(elastic.ReplaceYH(v.Keyword))) == 1 { queryItem = strings.ReplaceAll(selectType+",title", ",", "\",\"") shouldsKey := fmt.Sprintf(multi_match, "\""+v.Keyword+"\"", "\""+queryItem+"\"") keysShoulds = append(keysShoulds, shouldsKey) } else { keysShoulds = append(keysShoulds, fmt.Sprintf(multi_match_new, "\""+v.Keyword+"\"")) } } // 单个关键词分词后都包含 shoulds = append(shoulds, fmt.Sprintf(query_bool_must_and, strings.Join(keysShoulds, ","), "")) } //附加词 for _, vv := range v.Appended { appendedShoulds := []string{} if vv != "" { // 附加词处理分词 if strings.Contains(vv, "+") { for _, vk := range strings.Split(vv, "+") { if len(vk) == 0 { continue } //单个字 搜索范围 有全文或者附件 无标题 例如:学 虚拟机 detail 搜索的时候加上标题 if scd.Comeinfrom == "supersearchPage" && DetailFileORTitle(selectType) && len([]rune(elastic.ReplaceYH(vk))) == 1 { queryItem = strings.ReplaceAll(selectType+",title", ",", "\",\"") shouldsKey := fmt.Sprintf(multi_match, "\""+vk+"\"", "\""+queryItem+"\"") appendedShoulds = append(appendedShoulds, shouldsKey) } else { appendedShoulds = append(appendedShoulds, fmt.Sprintf(multi_match_new, "\""+vk+"\"")) } } } else { for _, vk := range strings.Split(vv, " ") { if len(vk) == 0 { continue } //单个字 搜索范围 有全文或者附件 无标题 例如:学 虚拟机 detail 搜索的时候加上标题 if scd.Comeinfrom == "supersearchPage" && DetailFileORTitle(selectType) && len([]rune(elastic.ReplaceYH(vk))) == 1 { queryItem = strings.ReplaceAll(selectType+",title", ",", "\",\"") shouldsKey := fmt.Sprintf(multi_match, "\""+vk+"\"", "\""+queryItem+"\"") appendedShoulds = append(appendedShoulds, shouldsKey) } else { appendedShoulds = append(appendedShoulds, fmt.Sprintf(multi_match_new, "\""+vk+"\"")) } } } // 单个关键词分词后都包含 shoulds = append(shoulds, fmt.Sprintf(query_bool_must_and, strings.Join(appendedShoulds, ","), "")) } } //排除词 for _, vv := range v.Exclude { if len(strings.TrimSpace(vv)) == 0 { continue } // 处理每组里面的空格分词 for _, notKeySplit := range strings.Split(vv, " ") { if len(notKeySplit) == 0 { continue } //单个字 搜索范围 有全文或者附件 无标题 例如:学 虚拟机 detail 搜索的时候加上标题 if scd.Comeinfrom == "supersearchPage" && DetailFileORTitle(selectType) && len([]rune(elastic.ReplaceYH(notKeySplit))) == 1 { queryItem = strings.ReplaceAll(selectType+",title", ",", "\",\"") shouldsKey := fmt.Sprintf(multi_match, "\""+notKeySplit+"\"", "\""+queryItem+"\"") must_not = append(must_not, shouldsKey) } else { must_not = append(must_not, fmt.Sprintf(multi_match_new, "\""+notKeySplit+"\"")) } } } //添加 if len(shoulds) > 0 { notStr := "" if len(must_not) > 0 { notStr = fmt.Sprintf(`,"must_not":[%s]`, strings.Join(must_not, ",")) } if scd.WordsMode == WordsModeOr { // 包含任意 bools = append(bools, fmt.Sprintf(query_bool_should_and, strings.Join(shoulds, ","), notStr)) } else { bools = append(bools, fmt.Sprintf(query_bool_must_and, strings.Join(shoulds, ","), notStr)) } } } } if scd.HasBuyerTel != "" { if scd.HasBuyerTel == "y" { musts = append(musts, fmt.Sprintf(query_exists, "buyertel")) } else { must_not = append(must_not, fmt.Sprintf(query_exists, "buyertel")) } } if scd.HasWinnerTel != "" { if scd.HasWinnerTel == "y" { musts = append(musts, fmt.Sprintf(query_exists, "winnertel")) } else { must_not = append(must_not, fmt.Sprintf(query_exists, "winnertel")) } } //搜索范围是否只有附件 //搜索范围只选择附件,是否有附件条件无效; var isFileSearch = strings.ReplaceAll(selectType, ",", "\",\"") == "filetext" if !isFileSearch && scd.FileExists != "" { if scd.FileExists == "1" { //有附件 musts = append(musts, fmt.Sprintf(query_bool_must_term, true)) } else if scd.FileExists == "-1" { //无附件 must_not = append(must_not, fmt.Sprintf(query_bool_must_term, true)) } } // 如果是领域化数据则需要加标签 if scd.BidField != "" { musts = append(musts, fmt.Sprintf(query_bool_must_term_bidField, scd.BidField)) } qstr := fmt.Sprintf(query, strings.Join(musts, ","), strings.Join(must_not, ","), strings.Join(bools, ","), boolsNum) return qstr } // 获取数据导出筛选条件 func GetSqlObjFromId(mongo mg.MongodbSim, _id string) *SieveCondition { var ( query *map[string]interface{} ok bool ) if query, ok = mongo.FindById(ExportTable, _id, nil); !ok { return nil } searchTypeSwitch, _ := (*query)["searchTypeSwitch"].(bool) return &SieveCondition{ Id: _id, Keyword: getKeyWordArrFromDbResult((*query)["keywords"]), Industry: getStringArrFromDbResult((*query)["industry"]), MinPrice: qutil.ObjToString((*query)["minprice"]), MaxPrice: qutil.ObjToString((*query)["maxprice"]), Subtype: qutil.ObjToString((*query)["subtype"]), Area: getStringArrFromDbResult((*query)["area"]), City: getStringArrFromDbResult((*query)["city"]), SelectType: qutil.ObjToString((*query)["selectType"]), PublishTime: qutil.ObjToString((*query)["publishtime"]), Buyer: getStringArrFromDbResult((*query)["buyer"]), Agency: getStringArrFromDbResult((*query)["agency"]), Buyerclass: getStringArrFromDbResult((*query)["buyerclass"]), HasBuyerTel: qutil.ObjToString((*query)["hasBuyertel"]), Winner: getStringArrFromDbResult((*query)["winner"]), HasWinnerTel: qutil.ObjToString((*query)["hasWinnertel"]), ComeInTime: qutil.Int64All((*query)["comeintime"]), Comeinfrom: qutil.ObjToString((*query)["comeinfrom"]), SelectIds: getStringArrFromDbResult((*query)["selectIds"]), PushKeyWords: getStringArrFromDbResult((*query)["pushKeyWords"]), FileExists: qutil.ObjToString((*query)["fileExists"]), SearchTypeSwitch: searchTypeSwitch, BidField: qutil.ObjToString((*query)["bid_field"]), // 领域化数据 SearchGroup: qutil.IntAll((*query)["searchGroup"]), //搜索分组:默认0:全部;1:招标采购公告;2:超前项目 SearchMode: qutil.IntAll((*query)["searchMode"]), // 搜索模式:0:精准搜索;1:模糊搜索 WordsMode: qutil.IntAll((*query)["wordsMode"]), // 搜索关键词模式;默认0:包含所有,1:包含任意 District: gconv.Strings((*query)["district"]), // UserId: qutil.InterfaceToStr((*query)["s_userid"]), // MgoUserId: qutil.InterfaceToStr((*query)["mgoUserId"]), // } } // 数据导出-查询结果数量 func GetDataExportSearchCountByScdId(sim, bid mg.MongodbSim, biddingName, elasticAddress, id string) (count int) { scd := GetSqlObjFromId(sim, id) //用户筛选条件 if scd.SelectIds != nil { //部分数据可能已删除、不存在;此处需要统计返回实际数量 //return len(scd.SelectIds) return int(GetDataExportSelectReallyCountFromMongo(bid, biddingName, scd.SelectIds)) } return GetDataExportSearchCountBySieveCondition(scd, elasticAddress) } func GetDataExportSearchCountBySieveCondition(scd *SieveCondition, elasticAddress string) (count int) { if isNullSearch(scd) { return -1 //程序端返回最大值 } if scd.SearchMode == SearchModeAccurate { qstr := getDataExportSql(scd) log.Printf("GetDataExportSearchCountUseId-%s-sql:%s\n", scd.Id, qstr) count = int(elastic.Count(INDEX, TYPE, qstr)) } // 依据用户选择的搜索模式和搜索范围进行匹配,即不限制只能匹配标题,且不限制最多展示100条、针对输入的单个关键词分词后需要都包含 //超级搜索一致的检索(防止数据导出和超级搜索数据量不一致) if scd.Comeinfrom == "supersearchPage" && (len(scd.Keyword) != 0 || len(scd.Industry) != 0 || IsOnTheWhitelist(scd.UserId, scd.MgoUserId)) && scd.SearchMode == SearchModeFuzzy { if len(scd.Keyword) != 0 { // 关键词分词 searchTextSize := 0 if len(scd.Keyword) > 0 { searchTextSize = len([]rune(scd.Keyword[0].Keyword)) } if searchTextSize > 0 { if secondKWS := jy.HttpEs(scd.Keyword[0].Keyword, "ik_smart", elasticAddress); secondKWS != "" { scd.Keyword[0].Keyword = jy.KeywordsProcessing(strings.ReplaceAll(secondKWS, "+", " "), " ") } } // 附加词分词 for i := 0; i < len(scd.Keyword[0].Appended); i++ { appendTextSize := 0 if len(scd.Keyword[0].Appended[i]) > 0 { appendTextSize = len([]rune(scd.Keyword[0].Appended[i])) } if appendTextSize > 0 { if secondKWS := jy.HttpEs(scd.Keyword[0].Appended[i], "ik_smart", elasticAddress); secondKWS != "" { scd.Keyword[0].Appended[i] = jy.KeywordsProcessing(strings.ReplaceAll(secondKWS, "+", " "), " ") } } } } qstr := getDataExportSql(scd) count = int(elastic.Count(INDEX, TYPE, qstr)) log.Printf("GetDataExportSearchCountUseId-%s-count:%d-分词-sql:%s\n", scd.Id, count, qstr) return count } log.Printf("GetDataExportSearchCountUseId-%s-count:%d\n", scd.Id, count) return } // 合并map数据,去重 func delRepeatMapArr(res *[]map[string]interface{}, res2 *[]map[string]interface{}) *[]map[string]interface{} { if res != nil { for _, v := range *res { for n, m := range *res2 { if qutil.ObjToString(v["_id"]) == qutil.ObjToString(m["_id"]) { *res2 = append((*res2)[0:n], (*res2)[n+1:]...) break } } } *res = append(*res, *res2...) } else { res = res2 } return res } // 查询条件是否为空 func isNullSearch(scd *SieveCondition) (isNull bool) { if scd.PublishTime == "" && len(scd.Area) == 0 && len(scd.Industry) == 0 && len(scd.Keyword) == 0 && len(scd.Buyer) == 0 && len(scd.Winner) == 0 && scd.MinPrice == "" && scd.MaxPrice == "" && scd.Subtype == "" && len(scd.City) == 0 { isNull = true } return isNull } /* * 数据导出 查询结果 * _id 数据库查询条件记录id * dataType 1-普通字段 2-高级字段 * webdomain 三级页域名 * count 返回数量 (-1:预览数据查询) */ func GetDataExportSearchResultByScdId(sim, bid mg.MongodbSim, bidMgoDBName, elasticAddress, id, dataType string, checkCount int) (*[]map[string]interface{}, error) { scd := GetSqlObjFromId(sim, id) list, err := GetDataExportSearchResult(bid, bidMgoDBName, elasticAddress, scd, dataType, checkCount) if list == nil || err != nil { return nil, err } return list, nil } // GetDataExportIdArrByScdId 数据包去重获取导出信息id func GetDataExportIdArrByScdId(sim mg.MongodbSim, elasticAddress, id string, checkCount int) ([]string, error) { scd := GetSqlObjFromId(sim, id) return GetDataExportIds(elasticAddress, scd, checkCount) } // 收藏导出 var contentfilterReg = regexp.MustCompile("<[^>]+>") // GetDataExportSelectReallyCountFromEs 从elasticsearch查询实际可调导出数量 func GetDataExportSelectReallyCountFromEs(ids []string) int64 { pool := make(chan bool, 10) wait := &sync.WaitGroup{} var total int64 var lock sync.Mutex var idArr []string for i, id := range ids { idArr = append(idArr, id) if len(idArr) == 200 || i+1 == len(ids) { pool <- true wait.Add(1) go func(arr []string) { defer func() { wait.Done() <-pool }() log.Println("GetDataExportSelectReallyCountFromEs===", arr[0]) query := fmt.Sprintf(`{"query":{"bool":{"must":[{"terms":{"id":["%s"]}}]}}}`, strings.Join(arr, "\",\"")) tCount := elastic.Count(INDEX, TYPE, query) if tCount > 0 { lock.Lock() total += tCount lock.Unlock() } return }(idArr) idArr = []string{} } } wait.Wait() log.Printf("GetDataExportSelectReallyCount 选择数据共%d条记录,实际查询%d条\n", len(ids), total) return total } // GetDataExportSelectReallyCountFromMongo 从mongodb查询实际可调导出数量 func GetDataExportSelectReallyCountFromMongo(bid mg.MongodbSim, biddingName string, ids []string) int64 { sess := bid.GetMgoConn() defer bid.DestoryMongoConn(sess) if ids == nil || len(ids) == 0 { return 0 } var ( count int64 lock sync.Mutex ) pool := make(chan bool, 10) wait := &sync.WaitGroup{} var idArr []string for i, id := range ids { idArr = append(idArr, id) if len(idArr) == 200 || i+1 == len(ids) { pool <- true wait.Add(1) go func(arr []string) { defer func() { wait.Done() <-pool }() log.Println("GetDataExportSelectReallyCountFromMongo===", arr[0]) lenNum := int64(len(arr)) var ( queryIds []interface{} num1, num2 int64 err error ) for _, idStr := range arr { queryIds = append(queryIds, mg.StringTOBsonId(idStr)) } num1, err = sess.DB(biddingName).C("bidding").Find(map[string]interface{}{"_id": map[string]interface{}{ "$in": queryIds, }}).Count() if err == nil { if num1 == lenNum { lock.Lock() count += num1 lock.Unlock() return } num2, err = sess.DB(biddingName).C("bidding_back").Find(map[string]interface{}{"_id": map[string]interface{}{ "$in": queryIds, }}).Count() if err == nil { lock.Lock() count += qutil.If(num2+num1 >= lenNum, lenNum, num2+num1).(int64) lock.Unlock() } } }(idArr) idArr = []string{} } } wait.Wait() return qutil.If(count > 0, count, ResCount).(int64) } func GetDataExportSelectResultFromEs(bidding mg.MongodbSim, biddingName string, scd *SieveCondition, dataType string, checkCount int) (*[]map[string]interface{}, error) { bidField := bidBaseField if dataType == "2" { bidField = bidseniorField } if checkCount == -1 && len(scd.SelectIds) > 500 { scd.SelectIds = scd.SelectIds[:500] } returnLsit := make([]map[string]interface{}, 0, len(scd.SelectIds)) var idArr []string for i, id := range scd.SelectIds { idArr = append(idArr, id) if len(idArr) == 200 || i+1 == len(scd.SelectIds) { log.Println(scd.Id, "GetDataExportSelectResultFromEs===", idArr[0]) query := fmt.Sprintf(`{"query":{"bool":{"must":[{"terms":{"id":["%s"]}}]}},"_source": [%s],"size":%d}`, strings.Join(idArr, "\",\""), bidField, len(idArr)) log.Println("GetDataExportSelectResultFromEs 数据流量包 es count 信息查询:", query) data := *elastic.Get(INDEX, TYPE, query) if data != nil && len(data) > 0 { for _, bv := range data { bv["_id"] = mg.BsonIdToSId(bv["_id"]) detail, _ := bv["detail"].(string) if detail != "" { bv["detail"] = contentfilterReg.ReplaceAllString(detail, "") } returnLsit = append(returnLsit, bv) } } idArr = []string{} } } if len(returnLsit) == checkCount || checkCount == -1 || checkCount == -2 { return &returnLsit, nil } else { return nil, fmt.Errorf("GetDataExportSelectResultFromEs 选择数据导出异常 数据量期望%d条,实际查询%d条", checkCount, len(returnLsit)) } } func GetDataExportSelectResultFromMongoDb(bidding mg.MongodbSim, biddingName string, scd *SieveCondition, dataType string, checkCount int) (*[]map[string]interface{}, error) { sess := bidding.GetMgoConn() defer bidding.DestoryMongoConn(sess) selectMap := map[string]interface{}{ "_id": 1, "title": 1, "detail": 1, "area": 1, "city": 1, "publishtime": 1, "projectname": 1, "buyer": 1, "s_winner": 1, "bidamount": 1, "subtype": 1, "toptype": 1, "filetext": 1, "purchasing": 1, } if dataType == "2" { for _, key := range []string{"href", "projectcode", "buyerperson", "buyertel", "budget", "bidopentime", "agency", "projectscope", "winnerperson", "winnertel", "bidendtime", "district", "signendtime", "buyeraddr", "filetext", "buyerclass", "s_topscopeclass", "entidlist", "signaturedate"} { selectMap[key] = 1 } } if checkCount == -1 && len(scd.SelectIds) > 500 { scd.SelectIds = scd.SelectIds[:500] } returnLsit := make([]map[string]interface{}, 0, len(scd.SelectIds)) var idArr []string for i, id := range scd.SelectIds { idArr = append(idArr, id) if len(idArr) == 200 || i+1 == len(scd.SelectIds) { log.Println(scd.Id, "GetDataExportSelectResultFromMongoDb===", idArr[0]) var ( queryIds []interface{} count int ) for _, idStr := range idArr { queryIds = append(queryIds, mg.StringTOBsonId(idStr)) } iter := sess.DB(biddingName).C("bidding").Select(selectMap).Find(map[string]interface{}{"_id": map[string]interface{}{ "$in": queryIds, }}).Iter() for m := make(map[string]interface{}); iter.Next(&m); { m["_id"] = mg.BsonIdToSId(m["_id"]) detail, _ := m["detail"].(string) if detail != "" { m["detail"] = contentfilterReg.ReplaceAllString(detail, "") } count++ returnLsit = append(returnLsit, m) m = make(map[string]interface{}) } if count != len(idArr) { iter_back := sess.DB(biddingName).C("bidding_back").Select(selectMap).Find(map[string]interface{}{"_id": map[string]interface{}{ "$in": queryIds, }}).Iter() for m := make(map[string]interface{}); iter_back.Next(&m); { m["_id"] = mg.BsonIdToSId(m["_id"]) detail, _ := m["detail"].(string) if detail != "" { m["detail"] = contentfilterReg.ReplaceAllString(detail, "") } returnLsit = append(returnLsit, m) m = make(map[string]interface{}) } } idArr = []string{} } } if len(returnLsit) == checkCount || checkCount == -1 || checkCount == -2 { return &returnLsit, nil } else { return nil, fmt.Errorf("GetDataExportSelectResultFromMongoDb 选择数据导出异常 数据量期望%d条,实际查询%d条", checkCount, len(returnLsit)) } } func GetDataExportIds(elasticAddress string, scd *SieveCondition, checkCount int) ([]string, error) { defer qutil.Catch() if scd == nil { return nil, errors.New("GetDataExportIds-未获取到查询信息") } if scd.SelectIds != nil { return scd.SelectIds, nil } var qstr string if scd.SearchMode == SearchModeAccurate { //获取查询语句 qstr = getDataExportSql(scd) log.Printf("GetDataExportIds-%s-sql:%s\n", scd.Id, qstr) } //超级搜索一致的检索(防止数据导出和超级搜索数据量不一致) // 依据用户选择的搜索模式和搜索范围进行匹配,即不限制只能匹配标题,且不限制最多展示100条、针对输入的单个关键词分词后需要都包含 if scd.Comeinfrom == "supersearchPage" && (len(scd.Keyword) != 0 || len(scd.Industry) != 0 || IsOnTheWhitelist(scd.UserId, scd.MgoUserId)) && len(scd.SelectIds) == 0 && scd.SearchMode == SearchModeFuzzy { if len(scd.Keyword) != 0 { searchTextSize := 0 // 关键词分词 if len(scd.Keyword) > 0 { searchTextSize = len([]rune(scd.Keyword[0].Keyword)) } if searchTextSize > 0 { if secondKWS := jy.HttpEs(scd.Keyword[0].Keyword, "ik_smart", elasticAddress); secondKWS != "" { scd.Keyword[0].Keyword = jy.KeywordsProcessing(strings.ReplaceAll(secondKWS, "+", " "), " ") } } // 附加词分词 for i := 0; i < len(scd.Keyword[0].Appended); i++ { appendTextSize := 0 if len(scd.Keyword[0].Appended[i]) > 0 { appendTextSize = len([]rune(scd.Keyword[0].Appended[i])) } if appendTextSize > 0 { if secondKWS := jy.HttpEs(scd.Keyword[0].Appended[i], "ik_smart", elasticAddress); secondKWS != "" { scd.Keyword[0].Appended[i] = jy.KeywordsProcessing(strings.ReplaceAll(secondKWS, "+", " "), " ") } } } } qstr = getDataExportSql(scd) log.Printf("GetDataExportIds-%s-分词查询-sql:%s\n", scd.Id, qstr) } res := doSearchByBatch(qstr, "1", checkCount, fmt.Sprintf("%s-%s", "GetDataExportSearchResult", scd.Id)) //获取信息id idArr := make([]string, 0, 0) for _, v := range res { if id := qutil.ObjToString(v["_id"]); id != "" { idArr = append(idArr, id) } } if checkCount != len(idArr) { return nil, fmt.Errorf("GetDataExportIds-%s-数据总量校验异常,期望:%d,实际:%d", scd.Id, checkCount, len(res)) } return idArr, nil } // GetDataExportSearchResult 获取数据导出内容 // entmg 高级字段包查询企业电话邮箱等字段 // checkCount -1 预览500条 func GetDataExportSearchResult(bid mg.MongodbSim, bidMgoDBName, elasticAddress string, scd *SieveCondition, dataType string, checkCount int) (*[]map[string]interface{}, error) { defer qutil.Catch() tm := time.Now() if scd == nil { return nil, errors.New("GetDataExportSearchResult-未获取到查询信息") } if scd.SelectIds != nil { idSelectDates, idSelectErr := GetDataExportSelectResultFromMongoDb(bid, bidMgoDBName, scd, dataType, checkCount) if idSelectErr != nil { return nil, idSelectErr } log.Printf("id:%s,查询耗时:%s\n", scd.Id, time.Now().Sub(tm)) GetDataExportMatchKey(scd, idSelectDates) log.Printf("id:%s key匹配耗时:%s\n", scd.Id, time.Now().Sub(tm)) return idSelectDates, idSelectErr } selectType := scd.SelectType var qstr string var res []map[string]interface{} if scd.SearchMode == SearchModeAccurate { // 搜索模式为精确查找 //获取查询语句 qstr = getDataExportSql(scd) log.Printf("GetDataExportSearchResult-%s-sql:%s\n", scd.Id, qstr) //数据导出数据查询 res = doSearchByBatch(qstr, dataType, checkCount, fmt.Sprintf("%s-%s", "GetDataExportSearchResult", scd.Id)) } //超级搜索一致的检索(防止数据导出和超级搜索数据量不一致) if scd.Comeinfrom == "supersearchPage" && (len(scd.Keyword) != 0 || len(scd.Industry) != 0 || IsOnTheWhitelist(scd.UserId, scd.MgoUserId)) && len(scd.SelectIds) == 0 && scd.SearchMode == SearchModeFuzzy { if len(scd.Keyword) != 0 { searchTextSize := 0 // 关键词分词 if len(scd.Keyword) > 0 { searchTextSize = len([]rune(scd.Keyword[0].Keyword)) } if searchTextSize > 0 { if secondKWS := jy.HttpEs(scd.Keyword[0].Keyword, "ik_smart", elasticAddress); secondKWS != "" { scd.Keyword[0].Keyword = jy.KeywordsProcessing(strings.ReplaceAll(secondKWS, "+", " "), " ") } } // 附加词分词 for i := 0; i < len(scd.Keyword[0].Appended); i++ { appendTextSize := 0 if len(scd.Keyword[0].Appended[i]) > 0 { appendTextSize = len([]rune(scd.Keyword[0].Appended[i])) } if appendTextSize > 0 { if secondKWS := jy.HttpEs(scd.Keyword[0].Appended[i], "ik_smart", elasticAddress); secondKWS != "" { scd.Keyword[0].Appended[i] = jy.KeywordsProcessing(strings.ReplaceAll(secondKWS, "+", " "), " ") } } } } qstr = getDataExportSql(scd) log.Printf("GetDataExportSearchResult-%s-分词查询-sql:%s\n", scd.Id, qstr) res = doSearchByBatch(qstr, dataType, checkCount, fmt.Sprintf("%s-%s", "GetDataExportSearchResult", scd.Id)) } //校验数量 if checkCount != len(res) && checkCount != -1 { return nil, fmt.Errorf("GetDataExportSearchResult-%s-数据总量校验异常,期望:%d,实际:%d", scd.Id, checkCount, len(res)) } if selectType != scd.SelectType { scd.SelectType = selectType } log.Printf("id:%s 查询耗时:%s\n", scd.Id, time.Now().Sub(tm)) GetDataExportMatchKey(scd, &res) log.Printf("id:%s key匹配耗时:%s\n", scd.Id, time.Now().Sub(tm)) return &res, nil } // 数据导出内容标签 func GetDataExportMatchKeyTag(mysqlSess *mysql.Mysql, res []map[string]interface{}, userId string, limit int) { userBidTagMap := make(map[string]string) userBidTag := mysqlSess.SelectBySql(fmt.Sprintf(`select a.bid,b.labelname from bdcollection a LEFT JOIN bdlabel b on a.labelid = b.id WHERE a.userid = '%s' order by a.createdate desc limit %d`, userId, limit)) if userBidTag != nil && len(*userBidTag) > 0 { for _, s2 := range *userBidTag { userBidTagMap[qutil.InterfaceToStr(s2["bid"])] = qutil.InterfaceToStr(s2["labelname"]) } } for _, re := range res { var tName string if tagName := userBidTagMap[qutil.InterfaceToStr(re["_id"])]; tagName != "" { tName = tagName } else { tName = "默认收藏" } re["tagName"] = tName } } func doSearchByBatch(query, dataType string, searchCount int, flag string) (res []map[string]interface{}) { if searchCount > onceSearchCount { //分批次查询 batchNum := qutil.IntAll(math.Ceil(float64(searchCount) / float64(onceSearchCount))) var searchWaitGroup = &sync.WaitGroup{} var lock sync.Mutex for n := 0; n < batchNum; n++ { searchWaitGroup.Add(1) searchPool <- true go func(start int) { defer func() { searchWaitGroup.Done() <-searchPool }() checkNum, checkOk := onceSearchCount, false if start == (batchNum - 1) { if searchCount%onceSearchCount != 0 { checkNum = searchCount % onceSearchCount } } var tmp *[]map[string]interface{} for i := 0; i < 3; i++ { tmp = doSearch(query, start*onceSearchCount, onceSearchCount, dataType) if tmp != nil && (len(*tmp) == checkNum) { //校验数据量是否够 checkOk = true break } } if tmp == nil { log.Printf("%s-第%d页数据查询结果为空\n", flag, start+1) return } if checkOk { log.Printf("%s-第%d页数据加载完成,共%d条\n", flag, start+1, len(*tmp)) } else { log.Printf("%s-第%d页数据加载异常,共%d条,预期%d条\n", flag, start+1, len(*tmp), checkNum) } lock.Lock() res = append(res, *tmp...) lock.Unlock() }(n) } searchWaitGroup.Wait() log.Printf("%s-分批次加载数据总量为%d\n", flag, len(res)) } else { //queryCount := qutil.If(searchCount == -1, onceSearchCount, searchCount).(int) queryCount := qutil.If(searchCount == -1, PreviewSearchCount, searchCount).(int) searchPool <- true tmp := doSearch(query, 0, queryCount, dataType) <-searchPool if tmp == nil || len(*tmp) == 0 { log.Printf("%s-一次性加载数据异常\n", flag) } else { res = *tmp log.Printf("%s-一次性加载数据总量为%d\n", flag, len(res)) } } return } func FormatExportData(entmg mg.MongodbSim, data *[]map[string]interface{}, webdomain string, dataType string, encry ...bool) *[]map[string]interface{} { //格式化输出 isEncry := false if len(encry) > 0 { isEncry = true } if data == nil { return data } sort.Slice(*data, func(i, j int) bool { time1 := qutil.Int64All((*data)[i]["publishtime"]) time2 := qutil.Int64All((*data)[j]["publishtime"]) return time1 > time2 }) qyxyMap := make(map[string]bool) qyxyEsMap := make(map[string]map[string]interface{}) projectsetMap := make(map[string]bool) projectsetEsMap := make(map[string]map[string]interface{}) if dataType == "2" { wait := &sync.WaitGroup{} for _, m := range *data { entidlist, ok := m["entidlist"].([]interface{}) if ok && len(entidlist) > 0 { //var winnerMaps []map[string]interface{} for _, entIdObj := range entidlist { entId := qutil.ObjToString(entIdObj) if entId == "" { continue } qyxyMap[entId] = true } } if m["toptype"] == "结果" && !(m["agency"] != nil && m["budget"] != nil && m["buyerperson"] != nil && m["buyertel"] != nil) { projectsetMap[qutil.InterfaceToStr(m["_id"])] = true } } log.Println(fmt.Sprintf("查询企业:%d,查询项目:%d", len(qyxyMap), len(projectsetMap))) if len(qyxyMap) > 0 { //查询企业 wait.Add(1) go func() { var ( qyxyArr []string qCount int ) defer wait.Done() pool := make(chan bool, 5) qyxyWait := &sync.WaitGroup{} lock := &sync.Mutex{} for s := range qyxyMap { qCount++ qyxyArr = append(qyxyArr, fmt.Sprintf(`%s`, s)) if len(qyxyArr) == 200 || (qCount == len(qyxyMap) && len(qyxyArr) > 0) { pool <- true qyxyWait.Add(1) go func(qArr []string) { defer func() { <-pool qyxyWait.Done() }() qyxyEsData := elastic.Get("qyxy", "qyxy", fmt.Sprintf(`{"query":{"bool":{"should":[{"terms":{"id":["%s"]}}]}},"size":%d,"_source":["company_name","company_email","company_phone","legal_person","id"]}`, strings.Join(qArr, `","`), len(qArr))) if qyxyEsData != nil && len(*qyxyEsData) > 0 { for _, m := range *qyxyEsData { lock.Lock() qyxyEsMap[qutil.InterfaceToStr(m["id"])] = m lock.Unlock() } } }(qyxyArr) qyxyArr = []string{} } } qyxyWait.Wait() }() } if len(projectsetMap) > 0 { //查询项目 wait.Add(1) go func() { defer wait.Done() var ( projectsetArr []string pCount int ) pool := make(chan bool, 5) projectsetWait := &sync.WaitGroup{} lock := &sync.Mutex{} for s := range projectsetMap { pCount++ projectsetArr = append(projectsetArr, fmt.Sprintf(`%s`, s)) if len(projectsetArr) == 200 || (pCount == len(projectsetMap) && len(projectsetArr) > 0) { pool <- true projectsetWait.Add(1) go func(pArr []string) { defer func() { <-pool projectsetWait.Done() }() projectsetEsData := elastic.Get("projectset", "projectset", fmt.Sprintf(`{"query": {"bool": {"should": [{"terms": {"list.infoid": ["%s"]}}]}},"_source": ["list"],"size": %d}`, strings.Join(pArr, `","`), len(pArr)*2)) //查询双倍数量 避免缺失数据 if projectsetEsData != nil && len(*projectsetEsData) > 0 { for _, m := range *projectsetEsData { MsgList := m["list"] if MsgList != nil { list := qutil.ObjArrToMapArr(MsgList.([]interface{})) for _, m2 := range list { if projectsetMap[qutil.InterfaceToStr(m2["infoid"])] { lock.Lock() projectsetEsMap[qutil.InterfaceToStr(m2["infoid"])] = m lock.Unlock() } } } } } }(projectsetArr) projectsetArr = []string{} } } projectsetWait.Wait() }() } wait.Wait() } var entCacheMap = map[string]map[string]interface{}{} pool := make(chan bool, 5) wait := &sync.WaitGroup{} lock := &sync.Mutex{} for index := 0; index < len(*data); index++ { pool <- true wait.Add(1) go func(v map[string]interface{}) { defer qutil.Catch() defer func() { <-pool wait.Done() }() //有中标企业 且 高级字段查询 if dataType == "2" { //查询企业公示 法人 公司电话 公司邮箱地址 entidlist, ok := v["entidlist"].([]interface{}) if ok && len(entidlist) > 0 { var winnerMaps []map[string]interface{} for _, entIdObj := range entidlist { entId := qutil.ObjToString(entIdObj) if entId == "" { continue } lock.Lock() ecm := entCacheMap[entId] lock.Unlock() if ecm != nil { winnerMaps = append(winnerMaps, ecm) //} else if entDetail := elastic.Get("qyxy", "qyxy", fmt.Sprintf(`{"query":{"bool":{"must":[{"term":{"id":"%s"}}]}},"size":1,"_source":["company_name","company_email","company_phone","legal_person"]}`, entId)); entDetail != nil && len(*entDetail) > 0 { } else if entDetail := qyxyEsMap[entId]; entDetail != nil && len(entDetail) > 0 { thisEntMap := map[string]interface{}{} legal_person := "" if (entDetail)["legal_person"] != nil { legal_person = (entDetail)["legal_person"].(string) if isEncry { var xx = "*" switch len([]rune(legal_person)) { case 3: xx = "**" case 4: xx = "***" } legal_person = string([]rune(legal_person)[:1]) + xx } } company_phone := "" if entDetail["company_phone"] != nil { company_phone = entDetail["company_phone"].(string) if isEncry { if len([]rune(company_phone)) > 7 { company_phone = company_phone[:7] + "****" } else { company_phone = "****" } } } company_email := "" if entDetail["company_email"] != nil && entDetail["company_email"] != "无" { company_email = entDetail["company_email"].(string) if isEncry { if len(strings.Split(company_email, "@")) > 1 { company_email = "******" + "@" + strings.Split(company_email, "@")[1] } } } company_name := "" if entDetail["company_name"] != nil { company_name = entDetail["company_name"].(string) } thisEntMap["legal_person"] = legal_person thisEntMap["company_phone"] = company_phone thisEntMap["company_email"] = company_email thisEntMap["company_name"] = company_name lock.Lock() entCacheMap[entId] = thisEntMap lock.Unlock() winnerMaps = append(winnerMaps, thisEntMap) } } if len(winnerMaps) > 0 { v["winnerMaps"] = winnerMaps } } delete(v, "entidlist") } //====================字段补漏========================= //if v["toptype"] == "结果" && dataType == "2" && !(v["agency"] != nil && v["budget"] != nil && v["buyerperson"] != nil && v["buyertel"] != nil) { if r := projectsetEsMap[qutil.InterfaceToStr(v["_id"])]; r != nil && len(r) > 0 { //r := elastic.Get("projectset", "projectset", fmt.Sprintf(`{"query":{"term":{"list.infoid":"%s"}},"_source": ["list"]}`, v["_id"])) //if r != nil && len(*r) > 0 { MsgList := r["list"] if MsgList != nil { list := qutil.ObjArrToMapArr(MsgList.([]interface{})) for _, vv := range list { if vv["subtype"] == "招标" { if v["agency"] == nil && vv["agency"] != nil { v["agency"] = vv["agency"] } if v["budget"] == nil && vv["budget"] != nil { v["budget"] = vv["budget"] } if v["buyerperson"] == nil && vv["buyerperson"] != nil { v["buyerperson"] = vv["buyerperson"] } if v["buyertel"] == nil && vv["buyertel"] != nil { v["buyertel"] = vv["buyertel"] } break } } } //} } if v["area"] == "A" { v["area"] = "全国" } if v["bidamount"] != nil { v["bidamount"] = formatFloat(qutil.Float64All(v["bidamount"])) } if v["budget"] != nil { v["budget"] = formatFloat(qutil.Float64All(v["budget"])) } if v["publishtime"] != nil { date := v["publishtime"] v["publishtime"] = FormatDateWithObj(&date, Date_Short_Layout) } if v["bidopentime"] != nil { date := v["bidopentime"] v["bidopentime"] = FormatDateWithObj(&date, Date_Short_Layout) } if qutil.IntAll(v["signendtime"]) != 0 { date := v["signendtime"] v["signendtime"] = FormatDateWithObj(&date, Date_Short_Layout) } if v["bidendtime"] != nil { date := v["bidendtime"] v["bidendtime"] = FormatDateWithObj(&date, Date_Short_Layout) } if v["signaturedate"] != nil { date := v["signaturedate"] v["signaturedate"] = FormatDateWithObj(&date, Date_Short_Layout) } if v["_id"] != nil { encodeId := CommonEncodeArticle("content", v["_id"].(string)) v["url"] = webdomain + "/article/content/" + encodeId + ".html" v["url_jump"] = webdomain + "/front/reloadTo/article/content/" + encodeId + ".html" } if v["currency"] == "" || v["currency"] == nil { v["currency"] = "人民币" } if isEncry { if v["projectscope"] != "" && v["projectscope"] != nil { str := ClearHtml.ReplaceAllString(v["projectscope"].(string), "") str = ClearOther.ReplaceAllString(str, "") str = strings.Replace(str, " ", "", -1) if len([]rune(str)) > 100 { str = qutil.SubString(str, 0, 100) + "..." } v["projectscope"] = str } if v["detail"] != "" && v["detail"] != nil { str := ClearHtml.ReplaceAllString(v["detail"].(string), "") str = ClearOther.ReplaceAllString(str, "") str = strings.Replace(str, " ", "", -1) if len([]rune(str)) > 100 { str = qutil.SubString(str, 0, 100) + "..." } v["detail"] = str } if v["title"] != "" && v["title"] != nil { str := ClearHtml.ReplaceAllString(v["title"].(string), "") str = ClearOther.ReplaceAllString(str, "") str = strings.Replace(str, " ", "", -1) if len([]rune(str)) > 100 { str = qutil.SubString(str, 0, 100) + "..." } v["title"] = str } } if v["subtype"] == nil && v["toptype"] != nil { v["subtype"] = v["toptype"] } }((*data)[index]) } wait.Wait() return data } // 保留到0.01分 func formatFloat(value float64) string { str := strings.TrimRight(fmt.Sprintf("%.7f", value*10000/100000000), "0") if str[len(str)-1:] == "." { return str[:len(str)-1] } return str } func doSearch(sql string, start, count int, dataType string) *[]map[string]interface{} { if sql != "" { //筛选字段 if dataType != "" { dataexport_field := bidBaseField if dataType == "2" { dataexport_field = bidseniorField } sql = sql[:len(sql)-1] + `,"_source":[` + dataexport_field + "]}" } //分页排序 sql = sql[:len(sql)-1] + `,"sort": {"dataweight": "desc","publishtime":"desc","id":"desc"},"from":` + strconv.Itoa(start) + `,"size":` + strconv.Itoa(count) + "}" } log.Println("doSearch", sql) return elastic.Get(INDEX, TYPE, sql) } func getKeyWordArrFromDbResult(k interface{}) (arr []KeyWord) { if k == nil { return } kArr := k.([]interface{}) for _, v := range kArr { kw := KeyWord{} b, e := json.Marshal(v) if e != nil { log.Println(e.Error()) } json.Unmarshal(b, &kw) arr = append(arr, kw) } return } func getStringArrFromDbResult(c interface{}) (arr []string) { if c != nil { cArr := c.([]interface{}) arr = qutil.ObjArrToStringArr(cArr) } return } // 获取结果,空字段最少的数据 func ScreenData(arr *[]map[string]interface{}, dataType string, resultNum int, kws []KeyWord) (res []map[string]interface{}) { AllMap := map[int][]map[string]interface{}{} NoKwsMap := map[int][]map[string]interface{}{} lastNum := resultNum for _, v := range *arr { emptyNum := countOfTheEmpty(v, dataType) if emptyNum == -1 { continue } if len(kws) > 0 && kws[0].Keyword != "" { var kwsFlag = true for _, vk := range kws { if strings.Contains(qutil.ObjToString(v["title"]), strings.Replace(vk.Keyword, "+", "", -1)) { kwsFlag = false continue } } if kwsFlag { if NoKwsMap[emptyNum] == nil { NoKwsMap[emptyNum] = []map[string]interface{}{v} } else { NoKwsMap[emptyNum] = append(NoKwsMap[emptyNum], v) } continue } } if AllMap[emptyNum] == nil { AllMap[emptyNum] = []map[string]interface{}{v} continue } AllMap[emptyNum] = append(AllMap[emptyNum], v) } //获取key keys := []int{} for k, _ := range AllMap { keys = append(keys, k) } sort.Ints(keys) //选取结果 for _, v := range keys { if len(AllMap[v]) >= resultNum { return append(res, AllMap[v][:resultNum]...) } else { resultNum = resultNum - len(AllMap[v]) tmp := append(res, AllMap[v][:len(AllMap[v])]...) res = tmp } } if len(res) < lastNum { resultNum = lastNum - len(res) //获取key Nokeys := []int{} for k, _ := range NoKwsMap { Nokeys = append(Nokeys, k) } sort.Ints(Nokeys) log.Println("没关键词的空字段数量", Nokeys) //选取结果 for _, v := range Nokeys { if len(NoKwsMap[v]) >= resultNum { return append(res, NoKwsMap[v][:resultNum]...) } else { resultNum = resultNum - len(NoKwsMap[v]) tmp := append(res, NoKwsMap[v][:len(NoKwsMap[v])]...) res = tmp } } } return res } func countOfTheEmpty(m map[string]interface{}, dataType string) int { MsgType := m["subtype"] // if MsgType == "拟建" { // return -1 // } //计算空字段数量 var count int = 0 //高级字段包 if dataType == "2" { if m["href"] == "" || m["href"] == nil { count++ } if m["projectcode"] == "" || m["projectcode"] == nil { count++ } if m["buyerperson"] == "" || m["buyerperson"] == nil { count++ } if m["buyertel"] == "" || m["buyertel"] == nil { count++ } if m["budget"] == "" || m["budget"] == nil { count++ } if m["bidopentime"] == "" || m["bidopentime"] == nil { count++ } if m["agency"] == "" || m["agency"] == nil { count++ } if m["projectscope"] == "" || m["projectscope"] == nil { count++ } } if m["city"] == "" || m["city"] == nil { count++ } if m["publishtime"] == "" || m["publishtime"] == nil { count++ } if m["projectname"] == "" || m["projectname"] == nil { count++ } if m["buyer"] == "" || m["buyer"] == nil { count++ } if m["s_winner"] == "" || m["s_winner"] == nil { if MsgType != "招标" { count++ } } if m["bidamount"] == "" || m["bidamount"] == nil { if MsgType != "招标" { count++ } } if m["subtype"] == "" || m["subtype"] == nil { count++ } return count }