123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606 |
- package main
- import (
- "createindex/oss"
- "reflect"
- "regexp"
- "strconv"
- "strings"
- "time"
- "unicode/utf8"
- util "utils"
- "utils/mongodb"
- "utils/redis"
- )
- var date1 = regexp.MustCompile("20[0-2][0-9][年|\\-/.][0-9]{1,2}[月|\\-/.][0-9]{1,2}[日]?")
- // @Description 合并extract 字段到bidding表
- // @Author J 2022/6/7 2:25 PM
- func MergeExtract(tmp, compare, update map[string]interface{}, extractMap map[string]map[string]interface{}, tasktype string) (map[string]interface{}, map[string]interface{}) {
- tid := mongodb.BsonIdToSId(tmp["_id"])
- if extractMap[tid] != nil {
- compare = extractMap[tid]
- if tasktype == "bidding" {
- // 增量id段 正常数据
- if num := util.IntAll(compare["dataging"]); num == 1 { //extract中dataging=1跳过
- tmp = make(map[string]interface{})
- compare = nil
- return compare, update
- }
- delete(extractMap, tid)
- }
- if tasktype == "bidding_history" {
- //增量id段 历史数据
- if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
- tmp = make(map[string]interface{})
- compare = nil
- return compare, update
- }
- delete(extractMap, tid)
- }
- //更新bidding表,生成索引;bidding表modifyinfo中的字段不更新
- modifyinfo := make(map[string]bool)
- if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil {
- for k, _ := range tmpmodifyinfo {
- modifyinfo[k] = true
- }
- }
- //更新bidding表,生成索引
- for _, k := range biddingMgoFields {
- v1 := compare[k] //extract
- v2 := tmp[k] //bidding
- if v2 == nil && v1 != nil && !modifyinfo[k] {
- update[k] = v1
- } else if v2 != nil && v1 != nil && !modifyinfo[k] {
- //update[k+"_b"] = v2
- update[k] = v1
- } else if v2 != nil && v1 == nil {
- //update[k+"_b"] = v2
- if k == "area" || k == "city" || k == "district" {
- update[k] = ""
- }
- }
- }
- if util.IntAll(compare["repeat"]) == 1 {
- update["extracttype"] = -1
- } else {
- update["extracttype"] = 1
- }
- } else {
- compare = nil
- if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
- update["dataging"] = 0
- }
- }
- return compare, update
- }
- // @Description subscopeclass、topscopeclass、package
- // @Author J 2022/6/7 5:54 PM
- func FieldMethod(compare, update map[string]interface{}) {
- subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
- if subscopeclass != nil {
- m1 := map[string]bool{}
- newclass := []string{}
- for _, sc := range subscopeclass {
- sclass, _ := sc.(string)
- if !m1[sclass] {
- m1[sclass] = true
- newclass = append(newclass, sclass)
- }
- }
- update["s_subscopeclass"] = strings.Join(newclass, ",")
- update["subscopeclass"] = newclass
- }
- topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
- if topscopeclass != nil {
- m2 := map[string]bool{}
- newclass := []string{}
- for _, tc := range topscopeclass {
- tclass, _ := tc.(string)
- tclass = reg_letter.ReplaceAllString(tclass, "") // 去除字母
- if !m2[tclass] {
- m2[tclass] = true
- newclass = append(newclass, tclass)
- }
- }
- update["s_topscopeclass"] = strings.Join(newclass, ",")
- }
- if package1 := compare["package"]; package1 != nil {
- packageM, _ := package1.(map[string]interface{})
- for _, p := range packageM {
- pm, _ := p.(map[string]interface{})
- if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
- util.Float64All(pm["bidamount"]) > 0 {
- update["multipackage"] = 1
- break
- }
- }
- } else {
- update["multipackage"] = 0
- }
- }
- // @Description ES保存字段
- // @Author J 2022/6/7 11:34 AM
- func GetEsField(tmp, update map[string]interface{}, stype string) map[string]interface{} {
- newTmp := make(map[string]interface{})
- for field, ftype := range biddingEsFields {
- if tmp[field] != nil { //
- if field == "projectinfo" {
- mp, _ := tmp[field].(map[string]interface{})
- if mp != nil {
- newmap := map[string]interface{}{}
- for k, ktype := range projectinfoFields {
- mpv := mp[k]
- if mpv != nil && reflect.TypeOf(mpv).String() == ktype {
- newmap[k] = mp[k]
- }
- }
- if len(newmap) > 0 {
- newTmp[field] = newmap
- }
- }
- } else if field == "purchasinglist" { //标的物处理
- purchasinglist_new := []map[string]interface{}{}
- if pcl, _ := tmp[field].([]interface{}); len(pcl) > 0 {
- for _, ls := range pcl {
- lsm_new := make(map[string]interface{})
- lsm := ls.(map[string]interface{})
- for pf, pftype := range purchasinglistFields {
- lsmv := lsm[pf]
- if lsmv != nil && reflect.TypeOf(lsmv).String() == pftype {
- lsm_new[pf] = lsm[pf]
- }
- }
- if lsm_new != nil && len(lsm_new) > 0 {
- purchasinglist_new = append(purchasinglist_new, lsm_new)
- }
- }
- }
- if len(purchasinglist_new) > 0 {
- newTmp[field] = purchasinglist_new
- }
- } else if field == "procurementlist" {
- if tmp["procurementlist"] != nil {
- var arr []interface{}
- plist := tmp["procurementlist"].([]interface{})
- for _, p := range plist {
- p1 := p.(map[string]interface{})
- p2 := make(map[string]interface{})
- for k, v := range procurementlisFields {
- if k == "projectname" && util.ObjToString(p1[k]) == "" {
- p2[k] = util.ObjToString(tmp["projectname"])
- } else if k == "buyer" && util.ObjToString(p1[k]) == "" && util.ObjToString(tmp["buyer"]) != "" {
- p2[k] = util.ObjToString(tmp["buyer"])
- } else if k == "expurasingtime" && util.ObjToString(p1[k]) != "" {
- res := getMethod(util.ObjToString(p1[k]))
- if res != 0 {
- p2[k] = res
- }
- } else if p1[k] != nil && reflect.TypeOf(p1[k]).String() == v {
- p2[k] = p1[k]
- }
- }
- arr = append(arr, p2)
- }
- if len(arr) > 0 {
- newTmp[field] = arr
- }
- }
- } else if field == "projectscope" {
- ps, _ := tmp["projectscope"].(string)
- if len(ps) > pscopeLength {
- newTmp["projectscope"] = string(([]rune(ps))[:pscopeLength])
- } else {
- newTmp["projectscope"] = ps
- }
- } else if field == "winnerorder" { //中标候选
- winnerorder_new := []map[string]interface{}{}
- if winnerorder, _ := tmp[field].([]interface{}); len(winnerorder) > 0 {
- for _, win := range winnerorder {
- winMap_new := make(map[string]interface{})
- winMap := win.(map[string]interface{})
- for wf, wftype := range winnerorderlistFields {
- wfv := winMap[wf]
- if wfv != nil && reflect.TypeOf(wfv).String() == wftype {
- if wf == "sort" && util.Int64All(wfv) > 100 {
- continue
- }
- winMap_new[wf] = winMap[wf]
- }
- }
- if winMap_new != nil && len(winMap_new) > 0 {
- winnerorder_new = append(winnerorder_new, winMap_new)
- }
- }
- }
- if len(winnerorder_new) > 0 {
- newTmp[field] = winnerorder_new
- }
- } else if field == "qualifies" {
- //项目资质
- qs := []string{}
- if q, _ := tmp[field].([]interface{}); len(q) > 0 {
- for _, v := range q {
- v1 := v.(map[string]interface{})
- qs = append(qs, util.ObjToString(v1["key"]))
- }
- }
- if len(qs) > 0 {
- newTmp[field] = strings.Join(qs, ",")
- }
- } else if field == "review_experts" {
- // 评审专家
- if arr, ok := tmp["review_experts"].([]interface{}); ok && len(arr) > 0 {
- arr1 := util.ObjArrToStringArr(arr)
- newTmp[field] = strings.Join(arr1, ",")
- }
- } else if field == "bidopentime" {
- if tmp[field] != nil && tmp["bidendtime"] == nil {
- newTmp["bidendtime"] = tmp[field]
- newTmp[field] = tmp[field]
- } else if tmp[field] == nil && tmp["bidendtime"] != nil {
- newTmp["bidendtime"] = tmp[field]
- newTmp[field] = tmp["bidendtime"]
- } else {
- if tmp["bidopentime"] != nil {
- newTmp[field] = tmp["bidopentime"]
- }
- }
- } else if field == "detail" { //过滤
- detail, _ := tmp[field].(string)
- if len([]rune(detail)) > detailLength {
- detail = detail[:detailLength]
- }
- detail = filterSpace.ReplaceAllString(detail, "")
- if stype == "bidding" || stype == "bidding_history" {
- text, b := FilterDetail(detail)
- newTmp[field] = util.ObjToString(tmp["title"]) + " " + text
- update["cleartag"] = b
- } else {
- if tmp["cleartag"] != nil && tmp["cleartag"].(bool) {
- text, _ := FilterDetail(detail)
- newTmp[field] = util.ObjToString(tmp["title"]) + " " + text
- } else {
- newTmp[field] = util.ObjToString(tmp["title"]) + " " + detail
- }
- }
- } else if field == "_id" || field == "topscopeclass" || field == "entidlist" { //不做处理
- newTmp[field] = tmp[field]
- } else if field == "publishtime" || field == "comeintime" {
- //字段类型不正确,特别处理
- if tmp[field] != nil && util.Int64All(tmp[field]) > 0 {
- newTmp[field] = util.Int64All(tmp[field])
- }
- } else { //其它字段判断数据类型,不正确舍弃
- if fieldval := tmp[field]; reflect.TypeOf(fieldval).String() != ftype {
- continue
- } else {
- if fieldval != "" {
- newTmp[field] = fieldval
- }
- }
- }
- }
- }
- filetext := getFileText(tmp)
- if len([]rune(filetext)) > 10 {
- newTmp["filetext"] = filetext
- }
- //else {
- // // 附件未识别到内容,有附件且附件能够下载 filetext=""
- // if pinfo, o1 := tmp["projectinfo"].(map[string]interface{}); o1 {
- // if atts, o2 := pinfo["attachments"].(map[string]interface{}); o2 {
- // tag := false
- // for _, at := range atts {
- // at1 := at.(map[string]interface{})
- // if at1["fid"] != nil {
- // tag = true
- // break
- // }
- // }
- // if tag {
- // newTmp["filetext"] = ""
- // }
- // }
- // }
- //}
- YuceEndtime(newTmp) // 预测结果时间
- if stype == "bidding" || stype == "bidding_history" {
- newTmp["createtime"] = time.Now().Unix() // es库数据创建时间,只有增量数据有
- }
- return newTmp
- }
- // @Description 附件内容
- // @Author J 2022/6/7 1:54 PM
- func getFileText(tmp map[string]interface{}) (filetext string) {
- if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok {
- for _, tmpData1 := range attchMap {
- if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok {
- for _, result := range tmpData2 {
- if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
- if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
- bs := oss.OssGetObject(attach_url) //oss读数据
- if utf8.RuneCountInString(filetext+bs) < fileLength {
- filetext += bs + "\n"
- } else {
- if utf8.RuneCountInString(bs) > fileLength {
- filetext = bs[0:fileLength]
- } else {
- filetext = bs
- }
- break
- }
- }
- }
- }
- }
- }
- }
- return
- }
- // 预测结果时间
- func YuceEndtime(tmp map[string]interface{}) {
- flag := true
- scope := []string{"服务采购_法律咨询", "服务采购_会计", "服务采购_物业", "服务采购_审计", "服务采购_安保", "服务采购_仓储物流",
- "服务采购_广告宣传印刷"}
- subscopeclass := util.ObjToString(tmp["s_subscopeclass"])
- for _, v := range scope {
- if strings.Contains(subscopeclass, v) {
- flag = false
- break
- }
- }
- if flag {
- return
- }
- subtype := util.ObjToString(tmp["subtype"])
- if subtype == "成交" || subtype == "合同" {
- // yucestarttime、yuceendtime
- yucestarttime, yuceendtime := int64(0), int64(0)
- // 项目周期中
- if util.ObjToString(tmp["projectperiod"]) != "" {
- dateStr := date1.FindStringSubmatch(util.ObjToString(tmp["projectperiod"]))
- if len(dateStr) == 2 {
- sdate := FormatDateStr(dateStr[0])
- edate := FormatDateStr(dateStr[1])
- if sdate < edate && sdate != 0 && edate != 0 {
- yucestarttime = sdate
- yuceendtime = edate
- }
- }
- }
- if yucestarttime > 0 && yuceendtime > yucestarttime {
- tmp["yuceendtime"] = yuceendtime
- return
- }
- // 预测开始时间 合同签订日期
- if yucestarttime == 0 {
- if util.IntAll(tmp["signaturedate"]) <= 0 {
- if util.IntAll(tmp["publishtime"]) <= 0 {
- return
- } else {
- yucestarttime = util.Int64All(tmp["publishtime"])
- }
- } else {
- yucestarttime = util.Int64All(tmp["signaturedate"])
- }
- }
- // 预测结束时间
- if yucestarttime > 0 && yuceendtime == 0 {
- if util.IntAll(tmp["project_duration"]) > 0 && util.ObjToString(tmp["project_timeunit"]) != "" {
- yuceendtime = YcEndTime(yucestarttime, util.IntAll(tmp["project_duration"]), util.ObjToString(tmp["project_timeunit"]))
- tmp["yuceendtime"] = yuceendtime
- }
- }
- }
- }
- func YcEndTime(starttime int64, num int, unit string) int64 {
- yuceendtime := int64(0)
- if unit == "日历天" || unit == "天" || unit == "日" {
- yuceendtime = starttime + int64(num*86400)
- } else if unit == "周" {
- yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num*7).Unix()
- } else if unit == "月" {
- yuceendtime = time.Unix(starttime, 0).AddDate(0, num, 0).Unix()
- } else if unit == "年" {
- yuceendtime = time.Unix(starttime, 0).AddDate(num, 0, 0).Unix()
- } else if unit == "工作日" {
- n := num / 7 * 2
- yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num+n).Unix()
- }
- return yuceendtime
- }
- func FormatDateStr(ds string) int64 {
- ds = strings.Replace(ds, "年", "-", -1)
- ds = strings.Replace(ds, "月", "-", -1)
- ds = strings.Replace(ds, "日", "", -1)
- ds = strings.Replace(ds, "/", "-", -1)
- ds = strings.Replace(ds, ".", "-", -1)
- location, err := time.ParseInLocation(util.Date_Short_Layout, ds, time.Local)
- if err != nil {
- util.Debug(err)
- return 0
- } else {
- return location.Unix()
- }
- }
- // @Description entidlist
- // @Author J 2022/6/7 2:36 PM
- func FieldFun(tmp map[string]interface{}) (cid []string) {
- sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
- for _, w := range sWinnerarr {
- if w != "" {
- id := redis.GetStr("qyxy_id", w)
- if id == "" {
- ents, _ := standardMgo.Find("qyxy_std", map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, nil, false, -1, -1)
- if len(*ents) > 0 {
- id = util.ObjToString((*ents)[0]["_id"])
- redis.PutCKV("qyxy_id", w, id)
- } else {
- ent, _ := qyxyMgo.FindOne("company_history_name", map[string]interface{}{"history_name": w})
- if len(*ent) > 0 {
- id = util.ObjToString((*ent)["company_id"])
- redis.PutCKV("qyxy_id", w, id)
- }
- }
- }
- if id == "" {
- id = "-"
- }
- cid = append(cid, id)
- }
- }
- return cid
- }
- var filterSpace = regexp.MustCompile("<[^>]*?>|[\\s\u3000\u2003\u00a0]")
- func FilterDetail(text string) (string, bool) {
- b := false // 清理标记
- for _, s := range FilterKeyword {
- reg := regexp.MustCompile(s)
- if reg.MatchString(text) {
- text = reg.ReplaceAllString(text, "")
- if !b {
- b = true
- }
- }
- }
- return text, b
- }
- // 正则判断是否包含
- func checkContains(s, sub string) bool {
- reg := regexp.MustCompile(`(?i)(^|([\s\t\n]+))(` + sub + `)($|([\s\t\n]+))`)
- return reg.MatchString(s)
- }
- var TimeV1 = regexp.MustCompile("(\\d{4})[年.]?$")
- var TimeV2 = regexp.MustCompile("(\\d{4}[年.\\-/]?)(\\d{1,2}[月.\\-/]?$)")
- var TimeClear = regexp.MustCompile("[年|月|/|.|-]")
- // @Description 采购意向 预计采购时间处理
- // @Author J 2022/6/7 8:04 PM
- func getMethod(str string) int64 {
- if TimeV1.MatchString(str) {
- arr := TimeV1.FindStringSubmatch(str)
- st := arr[1] + "0000"
- parseInt, err := strconv.ParseInt(st, 10, 64)
- if err == nil {
- return parseInt
- }
- } else if TimeV2.MatchString(str) {
- arr := TimeV2.FindStringSubmatch(str)
- str1 := arr[2]
- if len(str1) == 1 {
- str1 = "0" + str1
- }
- str2 := TimeClear.ReplaceAllString(arr[1], "") + TimeClear.ReplaceAllString(str1, "") + "00"
- parseInt, err := strconv.ParseInt(str2, 10, 64)
- if err == nil {
- return parseInt
- }
- }
- return 0
- }
- // @Description 字段空值处理
- // @Author J 2022/6/7 8:04 PM
- func clearMap(tmp map[string]interface{}) {
- for k := range tmp {
- if tmp[k] == nil {
- continue
- }
- if purchasinglist, ok := tmp["purchasinglist"].([]interface{}); ok && len(purchasinglist) == 0 {
- delete(tmp, "purchasinglist")
- } else if reflect.TypeOf(tmp[k]).String() == "string" && util.ObjToString(tmp[k]) == "" {
- delete(tmp, k)
- }
- }
- }
- // @Description 附件有效字段(isValidFile)
- // @Author J 2022/7/8 14:41
- func validFile(tmp map[string]interface{}) int {
- isContinue := false
- if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
- if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
- for _, att := range atts {
- if att == nil {
- util.Debug(tmp["_id"])
- continue
- }
- if reflect.TypeOf(att).String() == "string" {
- util.Debug(tmp["_id"])
- continue
- }
- att1 := att.(map[string]interface{})
- if fid := util.ObjToString(att1["fid"]); fid != "" {
- isContinue = true
- break
- }
- }
- if isContinue {
- if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
- if len(attachTxt) > 0 {
- for _, at := range attachTxt {
- at1 := at.(map[string]interface{})
- if len(at1) > 0 {
- for k, _ := range at1 {
- if reflect.TypeOf(at1[k]).String() == "string" {
- util.Debug(tmp["_id"])
- continue
- }
- at2 := at1[k].(map[string]interface{})
- s := strings.ToLower(util.ObjToString(at2["file_name"]))
- if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
- strings.Contains(s, "pdf") {
- if strings.Contains(s, "swf") || strings.Contains(s, "html") {
- return -1
- } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
- return 1
- }
- }
- }
- break
- } else {
- break
- }
- }
- }
- }
- flag := false
- for _, att := range atts {
- if att == nil {
- continue
- }
- if reflect.TypeOf(att).String() == "string" {
- continue
- }
- att1 := att.(map[string]interface{})
- if fid := util.ObjToString(att1["fid"]); fid != "" {
- ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
- if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
- return 1
- } else {
- flag = true
- }
- }
- }
- if flag {
- return -1
- }
- }
- }
- }
- return 0
- }
|