package main import ( elastic "app.yhyue.com/moapp/jybase/es" "fmt" "go.mongodb.org/mongo-driver/bson/primitive" u "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis" "log" "reflect" "sort" "strconv" "strings" "time" ) func InitEs() { Es = elastic.NewEs("v7", conf.Config.Es.Addr, conf.Config.Es.Size, conf.Config.Es.User, conf.Config.Es.Password) } // @Description subscopeclass、topscopeclass、package // 20230523 多包处理 subpackage = 1 // @Author J 2022/6/7 5:54 PM func fieldFun(data map[string]interface{}) { // 附件重采,数据同步时不更新判重标识(正常抽取判重) if u.IntAll(data["repeat"]) == 1 { data["extracttype"] = -1 //update["repeat_id"] = compare["repeat_id"] } else { data["extracttype"] = 1 } //未抽取、判重处理 if u.IntAll(data["dataging"]) == 1 { //修改未抽取的bidding数据的dataging data["dataging"] = 0 // } if subscopeclass, ok := data["subscopeclass"].(primitive.A); ok && len(subscopeclass) > 0 { //subscopeclass m1 := map[string]bool{} var newclass []string for _, sc := range subscopeclass { sclass, _ := sc.(string) if !m1[sclass] { m1[sclass] = true newclass = append(newclass, sclass) } } //data["subscopeclass"] = newclass data["s_subscopeclass"] = strings.Join(newclass, ",") } if topscopeclass, ok := data["topscopeclass"].(primitive.A); ok && len(topscopeclass) > 0 { //topscopeclass m2 := map[string]bool{} var newclass []string for _, tc := range topscopeclass { tclass, _ := tc.(string) tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母 if !m2[tclass] { m2[tclass] = true newclass = append(newclass, tclass) } } //data["topscopeclass"] = topscopeclass data["s_topscopeclass"] = strings.Join(newclass, ",") } //package if package1 := data["package"]; package1 != nil { packageM, _ := package1.(map[string]interface{}) //data["package"] = packageM for _, p := range packageM { pm, _ := p.(map[string]interface{}) if u.ObjToString(pm["winner"]) != "" || u.Float64All(pm["budget"]) > 0 || u.Float64All(pm["bidamount"]) > 0 { data["multipackage"] = 1 break } } } else { data["multipackage"] = 0 } // subpackage if data["package"] != nil && data["s_winner"] != nil && data["bidamount"] != nil { if pg, ok := data["package"].(map[string]interface{}); ok && len(pg) > 1 { var bmt []float64 var swn []string for _, p := range pg { p1 := p.(map[string]interface{}) if p1["bidamount"] != nil { bmt = append(bmt, u.Float64All(p1["bidamount"])) } if w := u.ObjToString(p1["winner"]); w != "" { swn = append(swn, w) } } if len(bmt) > 1 && len(swn) > 1 { sn := strings.Split(u.ObjToString(data["s_winner"]), ",") sort.Strings(sn) sort.Strings(swn) swn1 := u.ObjArrToStringArr(Duplicate(swn)) // 去重 if strings.Join(swn1, ",") == strings.Join(sn, ",") { bidamount := 0.0 for _, f := range bmt { bidamount += f } if bidamount == u.Float64All(data["bidamount"]) { data["subpackage"] = 1 } } } } } } // Duplicate // @Description 去重 // @Author J 2023/5/24 09:53 func Duplicate(a interface{}) (ret []interface{}) { va := reflect.ValueOf(a) for i := 0; i < va.Len(); i++ { if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) { continue } ret = append(ret, va.Index(i).Interface()) } return ret } // @Description entidlist // @Author J 2022/6/7 2:36 PM func companyFun(s_winner string) (cid []string) { sWinnerarr := strings.Split(s_winner, ",") for _, w := range sWinnerarr { if w != "" { id := redis.GetStr("qyxy_id", w) if id == "" { ents, _ := MgoQ.Find(conf.Config.MongodbQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1) if len(*ents) > 0 { id = u.ObjToString((*ents)[0]["_id"]) redis.PutCKV("qyxy_id", w, id) } else { ent, _ := MgoP.FindOne(conf.Config.MongodbP.Coll, map[string]interface{}{"history_name": w}) if len(*ent) > 0 { id = u.ObjToString((*ent)["company_id"]) redis.PutCKV("qyxy_id", w, id) } } } if id == "" { id = "-" } cid = append(cid, id) } } return cid } // @Description update 修改bidding表,extractM修改抽取表 // @Author J 2022/6/10 10:29 AM func typeFunc(data map[string]interface{}) { if jyData, ok := data["jyfb_data"].(map[string]interface{}); ok { if t := u.ObjToString(jyData["type"]); t != "" { switch t { //case "采购信息": case "招标公告": if u.ObjToString(data["toptype"]) != "招标" { data["toptype"] = "招标" delete(data, "subtype") } case "采购意向": if u.ObjToString(data["toptype"]) != "采购意向" { data["toptype"] = "采购意向" data["subtype"] = "采购意向" } case "招标预告": if u.ObjToString(data["toptype"]) != "预告" { data["toptype"] = "预告" delete(data, "subtype") } case "招标结果": if u.ObjToString(data["toptype"]) != "结果" { data["toptype"] = "结果" delete(data, "subtype") } } } } } // @Description 附件有效字段(isValidFile) // @Author J 2022/7/8 14:41 func validFile(tmp map[string]interface{}) int { isContinue := false if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o { if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 { for _, att := range atts { if att == nil { continue } if reflect.TypeOf(att).String() == "string" { continue } att1 := att.(map[string]interface{}) if fid := u.ObjToString(att1["fid"]); fid != "" { isContinue = true break } } if isContinue { if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o { if len(attachTxt) > 0 { for _, at := range attachTxt { if at1, ok := at.(map[string]interface{}); ok && len(at1) > 0 { for k, _ := range at1 { if reflect.TypeOf(at1[k]).String() == "string" { continue } at2 := at1[k].(map[string]interface{}) s := strings.ToLower(u.ObjToString(at2["file_name"])) //和王江含确认,此处判断老版本存在问题,正确为排除jpg、jpeg、png、pdf类型(2024-03-12) //if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") || strings.Contains(s, "pdf") { if !filterFileType.MatchString(s) { if strings.Contains(s, "swf") || strings.Contains(s, "html") { return -1 } else if AnalysisFile(OssGetObject(u.ObjToString(at2["attach_url"]))) { return 1 } } } break } else { break } } } } flag := false for _, att := range atts { if att == nil { continue } if reflect.TypeOf(att).String() == "string" { continue } att1 := att.(map[string]interface{}) if fid := u.ObjToString(att1["fid"]); fid != "" { ftype := strings.ToLower(u.ObjToString(tmp["ftype"])) if ftype != "swf" && ftype != "html" && OssObjExists("jy-datafile", fid) { return 1 } else { flag = true } } } if flag { return -1 } } } } return 0 } var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"} // @Description 发布时间处理 // @Author J 2023/5/23 14:32 func methodPb(tmp map[string]interface{}) { if tmp["ext_publishtime"] != nil { if newPb := u.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 { tmp["publishtime"] = newPb return } } for _, d := range DateTimeSelect { if tmp[d] != nil && u.Int64All(tmp[d]) < time.Now().Unix() { tmp["publishtime"] = u.Int64All(tmp[d]) return } } return } // @Description 获取情报标签 // @Author 徐志恒 2024/2/21 09:53 func getTagSet(data map[string]interface{}) map[string]map[string]interface{} { tagSet := map[string]map[string]interface{}{} wuye := map[string]interface{}{} buyer := u.ObjToString(data["buyer"]) publishtime := u.Int64All(data["publishtime"]) bidamount := u.Float64All(data["bidamount"]) wuye["isfirsthand"] = 62 if buyer != "" { sql := `{ "query": { "bool": { "must": [ { "term": { "buyer": "` + buyer + `" } }, { "term": { "tag_topinformation": "情报_物业" } }, { "term": { "subtype": "合同" } }, { "range": { "publishtime": { "lte": ` + fmt.Sprint(publishtime) + ` } } } ] } }, "sort": { "publishtime": "asc" }, "_source": [ "s_winner" ], "size": 10000 }` data := Es.Get("bidding", "bidding", sql) if data != nil && len(*data) > 0 { count := 0 first := u.ObjToString((*data)[0]["s_winner"]) for k, v := range *data { winner := u.ObjToString(v["s_winner"]) if k > 0 && first != winner { first = winner count++ } } changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data))) changehands, _ := strconv.ParseFloat(changehand, 64) wuye["changehand"] = changehands log.Println("changehands", count, len(*data), changehands) if changehands > 0.3 { wuye["changehandindex"] = 61 } if len(*data) > 1 { wuye["isfirsthand"] = 0 } } } if data["projectinfo"] != nil { projectInfo := u.ObjToMap(data["projectinfo"]) if projectInfo != nil && len(*projectInfo) > 0 { if (*projectInfo)["attachments"] != nil { wuye["isfile"] = 63 } } } wuye["period"] = getperiod(data) wuye["scale"] = getBidamountRange(bidamount) if data["property_form"] != nil { property_form := u.ObjArrToStringArr(data["property_form"].([]interface{})) wuye["property_form"] = getpropertyform(property_form) } tagSet["wuye"] = wuye return tagSet } func getBidamountRange(value float64) int { switch { case value < 500000: return 1 case value >= 500000 && value < 1000000: return 2 case value >= 1000000 && value < 2000000: return 3 case value >= 2000000 && value < 5000000: return 4 default: return 5 } } func getpropertyform(value []string) string { arr := []string{} categories := map[string]string{ "住宅": "21", "政府办公楼": "22", "学校": "23", "医院": "24", "产业园区": "25", "旅游景区": "26", "交通运输": "27", "商务办公楼": "28", "酒店": "29", } for _, v := range value { if categories[v] != "" { arr = append(arr, categories[v]) } } return strings.Join(arr, ",") } func getperiod(data map[string]interface{}) int { res := 16 signaturedate := u.Int64All(data["signaturedate"]) //合同签订日期 expiredate := u.Int64All(data["expiredate"]) //合同截止日期 // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限 project_duration := u.IntAll(data["project_duration"]) //工期时长 project_timeunit := u.ObjToString(data["project_timeunit"]) //工期单位 result := float64(0) if expiredate > 0 && signaturedate > 0 { result = calculateYearDifference(signaturedate, expiredate) } else if project_duration > 0 && project_timeunit != "" { if strings.Contains(project_timeunit, "年") { if project_duration == 1 { res = 12 } else if project_duration == 2 { res = 13 } else if project_duration == 3 { res = 14 } else if project_duration == 5 { res = 15 } return res } else if strings.Contains(project_timeunit, "月") { result = float64(project_duration) / 12 } else if strings.Contains(project_timeunit, "周") { result = float64(project_duration) * 7 / 365 } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") { result = float64(project_duration) / 365 } } if result == 0 { res = 16 } else if result < 1 { res = 11 } else if result >= 1 && result < 2 { res = 12 } else if result >= 2 && result < 3 { res = 13 } else if result >= 3 && result < 4 { res = 14 } else if result >= 5 { res = 15 } return res } func calculateYearDifference(startTime int64, endTime int64) float64 { start := time.Unix(startTime, 0) end := time.Unix(endTime, 0) duration := end.Sub(start) years := duration.Hours() / 24 / 365 return years }