package main import ( "createindex/oss" "reflect" "regexp" "strconv" "strings" "time" "unicode/utf8" util "utils" "utils/mongodb" "utils/redis" ) var date1 = regexp.MustCompile("20[0-2][0-9][年|\\-/.][0-9]{1,2}[月|\\-/.][0-9]{1,2}[日]?") // @Description 合并extract 字段到bidding表 // @Author J 2022/6/7 2:25 PM func MergeExtract(tmp, compare, update map[string]interface{}, extractMap map[string]map[string]interface{}, tasktype string) (map[string]interface{}, map[string]interface{}) { tid := mongodb.BsonIdToSId(tmp["_id"]) if extractMap[tid] != nil { compare = extractMap[tid] if tasktype == "bidding" { // 增量id段 正常数据 if num := util.IntAll(compare["dataging"]); num == 1 { //extract中dataging=1跳过 tmp = make(map[string]interface{}) compare = nil return compare, update } delete(extractMap, tid) } if tasktype == "bidding_history" { //增量id段 历史数据 if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过 tmp = make(map[string]interface{}) compare = nil return compare, update } delete(extractMap, tid) } //更新bidding表,生成索引;bidding表modifyinfo中的字段不更新 modifyinfo := make(map[string]bool) if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil { for k, _ := range tmpmodifyinfo { modifyinfo[k] = true } } //更新bidding表,生成索引 for _, k := range biddingMgoFields { v1 := compare[k] //extract v2 := tmp[k] //bidding if v2 == nil && v1 != nil && !modifyinfo[k] { update[k] = v1 } else if v2 != nil && v1 != nil && !modifyinfo[k] { //update[k+"_b"] = v2 update[k] = v1 } else if v2 != nil && v1 == nil { //update[k+"_b"] = v2 if k == "area" || k == "city" || k == "district" { update[k] = "" } } } if util.IntAll(compare["repeat"]) == 1 { update["extracttype"] = -1 } else { update["extracttype"] = 1 } } else { compare = nil if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging update["dataging"] = 0 } } return compare, update } // @Description subscopeclass、topscopeclass、package // @Author J 2022/6/7 5:54 PM func FieldMethod(compare, update map[string]interface{}) { subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass if subscopeclass != nil { m1 := map[string]bool{} newclass := []string{} for _, sc := range subscopeclass { sclass, _ := sc.(string) if !m1[sclass] { m1[sclass] = true newclass = append(newclass, sclass) } } update["s_subscopeclass"] = strings.Join(newclass, ",") update["subscopeclass"] = newclass } topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass if topscopeclass != nil { m2 := map[string]bool{} newclass := []string{} for _, tc := range topscopeclass { tclass, _ := tc.(string) tclass = reg_letter.ReplaceAllString(tclass, "") // 去除字母 if !m2[tclass] { m2[tclass] = true newclass = append(newclass, tclass) } } update["s_topscopeclass"] = strings.Join(newclass, ",") } if package1 := compare["package"]; package1 != nil { packageM, _ := package1.(map[string]interface{}) for _, p := range packageM { pm, _ := p.(map[string]interface{}) if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 || util.Float64All(pm["bidamount"]) > 0 { update["multipackage"] = 1 break } } } else { update["multipackage"] = 0 } } // @Description ES保存字段 // @Author J 2022/6/7 11:34 AM func GetEsField(tmp, update map[string]interface{}, stype string) map[string]interface{} { newTmp := make(map[string]interface{}) for field, ftype := range biddingEsFields { if tmp[field] != nil { // if field == "projectinfo" { mp, _ := tmp[field].(map[string]interface{}) if mp != nil { newmap := map[string]interface{}{} for k, ktype := range projectinfoFields { mpv := mp[k] if mpv != nil && reflect.TypeOf(mpv).String() == ktype { newmap[k] = mp[k] } } if len(newmap) > 0 { newTmp[field] = newmap } } } else if field == "purchasinglist" { //标的物处理 purchasinglist_new := []map[string]interface{}{} if pcl, _ := tmp[field].([]interface{}); len(pcl) > 0 { for _, ls := range pcl { lsm_new := make(map[string]interface{}) lsm := ls.(map[string]interface{}) for pf, pftype := range purchasinglistFields { lsmv := lsm[pf] if lsmv != nil && reflect.TypeOf(lsmv).String() == pftype { lsm_new[pf] = lsm[pf] } } if lsm_new != nil && len(lsm_new) > 0 { purchasinglist_new = append(purchasinglist_new, lsm_new) } } } if len(purchasinglist_new) > 0 { newTmp[field] = purchasinglist_new } } else if field == "procurementlist" { if tmp["procurementlist"] != nil { var arr []interface{} plist := tmp["procurementlist"].([]interface{}) for _, p := range plist { p1 := p.(map[string]interface{}) p2 := make(map[string]interface{}) for k, v := range procurementlisFields { if k == "projectname" && util.ObjToString(p1[k]) == "" { p2[k] = util.ObjToString(tmp["projectname"]) } else if k == "buyer" && util.ObjToString(p1[k]) == "" && util.ObjToString(tmp["buyer"]) != "" { p2[k] = util.ObjToString(tmp["buyer"]) } else if k == "expurasingtime" && util.ObjToString(p1[k]) != "" { res := getMethod(util.ObjToString(p1[k])) if res != 0 { p2[k] = res } } else if p1[k] != nil && reflect.TypeOf(p1[k]).String() == v { p2[k] = p1[k] } } arr = append(arr, p2) } if len(arr) > 0 { newTmp[field] = arr } } } else if field == "projectscope" { ps, _ := tmp["projectscope"].(string) if len(ps) > pscopeLength { newTmp["projectscope"] = string(([]rune(ps))[:pscopeLength]) } else { newTmp["projectscope"] = ps } } else if field == "winnerorder" { //中标候选 winnerorder_new := []map[string]interface{}{} if winnerorder, _ := tmp[field].([]interface{}); len(winnerorder) > 0 { for _, win := range winnerorder { winMap_new := make(map[string]interface{}) winMap := win.(map[string]interface{}) for wf, wftype := range winnerorderlistFields { wfv := winMap[wf] if wfv != nil && reflect.TypeOf(wfv).String() == wftype { if wf == "sort" && util.Int64All(wfv) > 100 { continue } winMap_new[wf] = winMap[wf] } } if winMap_new != nil && len(winMap_new) > 0 { winnerorder_new = append(winnerorder_new, winMap_new) } } } if len(winnerorder_new) > 0 { newTmp[field] = winnerorder_new } } else if field == "qualifies" { //项目资质 qs := []string{} if q, _ := tmp[field].([]interface{}); len(q) > 0 { for _, v := range q { v1 := v.(map[string]interface{}) qs = append(qs, util.ObjToString(v1["key"])) } } if len(qs) > 0 { newTmp[field] = strings.Join(qs, ",") } } else if field == "review_experts" { // 评审专家 if arr, ok := tmp["review_experts"].([]interface{}); ok && len(arr) > 0 { arr1 := util.ObjArrToStringArr(arr) newTmp[field] = strings.Join(arr1, ",") } } else if field == "bidopentime" { if tmp[field] != nil && tmp["bidendtime"] == nil { newTmp["bidendtime"] = tmp[field] newTmp[field] = tmp[field] } else if tmp[field] == nil && tmp["bidendtime"] != nil { newTmp["bidendtime"] = tmp[field] newTmp[field] = tmp["bidendtime"] } else { if tmp["bidopentime"] != nil { newTmp[field] = tmp["bidopentime"] } } } else if field == "detail" { //过滤 detail, _ := tmp[field].(string) if len([]rune(detail)) > detailLength { detail = detail[:detailLength] } detail = filterSpace.ReplaceAllString(detail, "") if stype == "bidding" || stype == "bidding_history" { text, b := FilterDetail(detail) newTmp[field] = util.ObjToString(tmp["title"]) + " " + text update["cleartag"] = b } else { if tmp["cleartag"] != nil && tmp["cleartag"].(bool) { text, _ := FilterDetail(detail) newTmp[field] = util.ObjToString(tmp["title"]) + " " + text } else { newTmp[field] = util.ObjToString(tmp["title"]) + " " + detail } } } else if field == "_id" || field == "topscopeclass" || field == "entidlist" { //不做处理 newTmp[field] = tmp[field] } else if field == "publishtime" || field == "comeintime" { //字段类型不正确,特别处理 if tmp[field] != nil && util.Int64All(tmp[field]) > 0 { newTmp[field] = util.Int64All(tmp[field]) } } else { //其它字段判断数据类型,不正确舍弃 if fieldval := tmp[field]; reflect.TypeOf(fieldval).String() != ftype { continue } else { if fieldval != "" { newTmp[field] = fieldval } } } } } filetext := getFileText(tmp) if len([]rune(filetext)) > 10 { newTmp["filetext"] = filetext } //else { // // 附件未识别到内容,有附件且附件能够下载 filetext="" // if pinfo, o1 := tmp["projectinfo"].(map[string]interface{}); o1 { // if atts, o2 := pinfo["attachments"].(map[string]interface{}); o2 { // tag := false // for _, at := range atts { // at1 := at.(map[string]interface{}) // if at1["fid"] != nil { // tag = true // break // } // } // if tag { // newTmp["filetext"] = "" // } // } // } //} YuceEndtime(newTmp) // 预测结果时间 if stype == "bidding" || stype == "bidding_history" { newTmp["createtime"] = time.Now().Unix() // es库数据创建时间,只有增量数据有 } return newTmp } // @Description 附件内容 // @Author J 2022/6/7 1:54 PM func getFileText(tmp map[string]interface{}) (filetext string) { if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok { for _, tmpData1 := range attchMap { if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok { for _, result := range tmpData2 { if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok { if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" { bs := oss.OssGetObject(attach_url) //oss读数据 if utf8.RuneCountInString(filetext+bs) < fileLength { filetext += bs + "\n" } else { if utf8.RuneCountInString(bs) > fileLength { filetext = bs[0:fileLength] } else { filetext = bs } break } } } } } } } return } // 预测结果时间 func YuceEndtime(tmp map[string]interface{}) { flag := true scope := []string{"服务采购_法律咨询", "服务采购_会计", "服务采购_物业", "服务采购_审计", "服务采购_安保", "服务采购_仓储物流", "服务采购_广告宣传印刷"} subscopeclass := util.ObjToString(tmp["s_subscopeclass"]) for _, v := range scope { if strings.Contains(subscopeclass, v) { flag = false break } } if flag { return } subtype := util.ObjToString(tmp["subtype"]) if subtype == "成交" || subtype == "合同" { // yucestarttime、yuceendtime yucestarttime, yuceendtime := int64(0), int64(0) // 项目周期中 if util.ObjToString(tmp["projectperiod"]) != "" { dateStr := date1.FindStringSubmatch(util.ObjToString(tmp["projectperiod"])) if len(dateStr) == 2 { sdate := FormatDateStr(dateStr[0]) edate := FormatDateStr(dateStr[1]) if sdate < edate && sdate != 0 && edate != 0 { yucestarttime = sdate yuceendtime = edate } } } if yucestarttime > 0 && yuceendtime > yucestarttime { tmp["yuceendtime"] = yuceendtime return } // 预测开始时间 合同签订日期 if yucestarttime == 0 { if util.IntAll(tmp["signaturedate"]) <= 0 { if util.IntAll(tmp["publishtime"]) <= 0 { return } else { yucestarttime = util.Int64All(tmp["publishtime"]) } } else { yucestarttime = util.Int64All(tmp["signaturedate"]) } } // 预测结束时间 if yucestarttime > 0 && yuceendtime == 0 { if util.IntAll(tmp["project_duration"]) > 0 && util.ObjToString(tmp["project_timeunit"]) != "" { yuceendtime = YcEndTime(yucestarttime, util.IntAll(tmp["project_duration"]), util.ObjToString(tmp["project_timeunit"])) tmp["yuceendtime"] = yuceendtime } } } } func YcEndTime(starttime int64, num int, unit string) int64 { yuceendtime := int64(0) if unit == "日历天" || unit == "天" || unit == "日" { yuceendtime = starttime + int64(num*86400) } else if unit == "周" { yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num*7).Unix() } else if unit == "月" { yuceendtime = time.Unix(starttime, 0).AddDate(0, num, 0).Unix() } else if unit == "年" { yuceendtime = time.Unix(starttime, 0).AddDate(num, 0, 0).Unix() } else if unit == "工作日" { n := num / 7 * 2 yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num+n).Unix() } return yuceendtime } func FormatDateStr(ds string) int64 { ds = strings.Replace(ds, "年", "-", -1) ds = strings.Replace(ds, "月", "-", -1) ds = strings.Replace(ds, "日", "", -1) ds = strings.Replace(ds, "/", "-", -1) ds = strings.Replace(ds, ".", "-", -1) location, err := time.ParseInLocation(util.Date_Short_Layout, ds, time.Local) if err != nil { util.Debug(err) return 0 } else { return location.Unix() } } // @Description entidlist // @Author J 2022/6/7 2:36 PM func FieldFun(tmp map[string]interface{}) (cid []string) { sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",") for _, w := range sWinnerarr { if w != "" { id := redis.GetStr("qyxy_id", w) if id == "" { ents, _ := standardMgo.Find("qyxy_std", map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, nil, false, -1, -1) if len(*ents) > 0 { id = util.ObjToString((*ents)[0]["_id"]) redis.PutCKV("qyxy_id", w, id) } else { ent, _ := qyxyMgo.FindOne("company_history_name", map[string]interface{}{"history_name": w}) if len(*ent) > 0 { id = util.ObjToString((*ent)["company_id"]) redis.PutCKV("qyxy_id", w, id) } } } if id == "" { id = "-" } cid = append(cid, id) } } return cid } var filterSpace = regexp.MustCompile("<[^>]*?>|[\\s\u3000\u2003\u00a0]") func FilterDetail(text string) (string, bool) { b := false // 清理标记 for _, s := range FilterKeyword { reg := regexp.MustCompile(s) if reg.MatchString(text) { text = reg.ReplaceAllString(text, "") if !b { b = true } } } return text, b } // 正则判断是否包含 func checkContains(s, sub string) bool { reg := regexp.MustCompile(`(?i)(^|([\s\t\n]+))(` + sub + `)($|([\s\t\n]+))`) return reg.MatchString(s) } var TimeV1 = regexp.MustCompile("(\\d{4})[年.]?$") var TimeV2 = regexp.MustCompile("(\\d{4}[年.\\-/]?)(\\d{1,2}[月.\\-/]?$)") var TimeClear = regexp.MustCompile("[年|月|/|.|-]") // @Description 采购意向 预计采购时间处理 // @Author J 2022/6/7 8:04 PM func getMethod(str string) int64 { if TimeV1.MatchString(str) { arr := TimeV1.FindStringSubmatch(str) st := arr[1] + "0000" parseInt, err := strconv.ParseInt(st, 10, 64) if err == nil { return parseInt } } else if TimeV2.MatchString(str) { arr := TimeV2.FindStringSubmatch(str) str1 := arr[2] if len(str1) == 1 { str1 = "0" + str1 } str2 := TimeClear.ReplaceAllString(arr[1], "") + TimeClear.ReplaceAllString(str1, "") + "00" parseInt, err := strconv.ParseInt(str2, 10, 64) if err == nil { return parseInt } } return 0 } // @Description 字段空值处理 // @Author J 2022/6/7 8:04 PM func clearMap(tmp map[string]interface{}) { for k := range tmp { if tmp[k] == nil { continue } if purchasinglist, ok := tmp["purchasinglist"].([]interface{}); ok && len(purchasinglist) == 0 { delete(tmp, "purchasinglist") } else if reflect.TypeOf(tmp[k]).String() == "string" && util.ObjToString(tmp[k]) == "" { delete(tmp, k) } } } // @Description 附件有效字段(isValidFile) // @Author J 2022/7/8 14:41 func validFile(tmp map[string]interface{}) int { isContinue := false if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o { if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 { for _, att := range atts { if att == nil { util.Debug(tmp["_id"]) continue } if reflect.TypeOf(att).String() == "string" { util.Debug(tmp["_id"]) continue } att1 := att.(map[string]interface{}) if fid := util.ObjToString(att1["fid"]); fid != "" { isContinue = true break } } if isContinue { if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o { if len(attachTxt) > 0 { for _, at := range attachTxt { at1 := at.(map[string]interface{}) if len(at1) > 0 { for k, _ := range at1 { if reflect.TypeOf(at1[k]).String() == "string" { util.Debug(tmp["_id"]) continue } at2 := at1[k].(map[string]interface{}) s := strings.ToLower(util.ObjToString(at2["file_name"])) if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") || strings.Contains(s, "pdf") { if strings.Contains(s, "swf") || strings.Contains(s, "html") { return -1 } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) { return 1 } } } break } else { break } } } } flag := false for _, att := range atts { if att == nil { continue } if reflect.TypeOf(att).String() == "string" { continue } att1 := att.(map[string]interface{}) if fid := util.ObjToString(att1["fid"]); fid != "" { ftype := strings.ToLower(util.ObjToString(tmp["ftype"])) if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) { return 1 } else { flag = true } } } if flag { return -1 } } } } return 0 }