123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991 |
- package main
- import (
- "encoding/json"
- "field_sync/config"
- "field_sync/oss"
- "fmt"
- "net"
- "reflect"
- "regexp"
- "sort"
- "strconv"
- "strings"
- "time"
- "log"
- "go.mongodb.org/mongo-driver/bson"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
- )
- var (
- regLetter = regexp.MustCompile("[a-z]*")
- cityEndReg = regexp.MustCompile("(区|县|市)$")
- )
- func biddingTask(data []byte, mapInfo map[string]interface{}) {
- defer util.Catch()
- stype := util.ObjToString(mapInfo["stype"])
- q, _ := mapInfo["query"].(map[string]interface{})
- bkey, _ := mapInfo["bkey"].(string)
- if q == nil {
- q = map[string]interface{}{
- "_id": map[string]interface{}{
- "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
- "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
- },
- }
- }
- //extract库
- // extractConn := MgoE.GetMgoConn()
- // defer MgoE.DestoryMongoConn(extractConn)
- // extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
- // "field_source": 0,
- // "kvtext": 0,
- // }).Sort("_id").Iter()
- // eMap := map[string]map[string]interface{}{}
- // extCount, repeatCount := 0, 0
- // for tmp := make(map[string]interface{}); extractResult.Next(tmp); extCount++ {
- // if util.IntAll(tmp["repeat"]) == 1 {
- // repeatCount++
- // }
- // tid := mongodb.BsonIdToSId(tmp["_id"])
- // eMap[tid] = tmp
- // tmp = make(map[string]interface{})
- // }
- // log.Println("抽取表 数据量", extCount, "重复数据量", repeatCount)
- //bidding库
- biddingConn := MgoB.GetMgoConn()
- count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
- log.Println("bidding表 同步总数:", count)
- c := 0
- // if count < 500000 {
- var res []map[string]interface{}
- result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
- "contenthtml": 0,
- }).Iter()
- for tmp := make(map[string]interface{}); result.Next(tmp); {
- res = append(res, tmp)
- tmp = make(map[string]interface{})
- }
- MgoB.DestoryMongoConn(biddingConn)
- // log.Println("查询结果 bidding", count, "抽取:", extCount)
- c = doIndex(res, bkey, stype)
- // } else {
- // log.Println("查询结果 数据量太大,放弃", count)
- // MgoB.DestoryMongoConn(biddingConn)
- // }
- log.Println("bidding sync...over all", count, "extract sync ", c)
- NextNode(mapInfo, stype)
- // NextNodePro(mapInfo, stype)
- NextNodeTidb(mapInfo, stype)
- if stype == "bidding_history" {
- NextNodeBidData(mapInfo) // bidding-data数据
- NextNodeTidbQyxy(mapInfo) // tidb-企业数据
- NextNodeHn(mapInfo)
- }
- if stype == "bidding" {
- uq := map[string]interface{}{
- "gtid": map[string]interface{}{
- "$gte": util.ObjToString(mapInfo["gtid"]),
- },
- "lteid": map[string]interface{}{
- "$lte": util.ObjToString(mapInfo["lteid"]),
- },
- }
- MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 7, "updatetime": time.Now().Unix()}}, false, true)
- }
- //领域标签处理的数据 id段
- if stype == "bidding_history" {
- MgoB.Save("field_data_record", map[string]interface{}{"gtid": mapInfo["gtid"], "lteid": mapInfo["lteid"], "status": 0})
- }
- }
- func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
- defer util.Catch()
- q, _ := mapInfo["query"].(map[string]interface{})
- if q == nil {
- q = map[string]interface{}{
- "_id": map[string]interface{}{
- "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
- "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
- },
- }
- }
- //extract库
- extractConn := MgoE.GetMgoConn()
- defer MgoE.DestoryMongoConn(extractConn)
- extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
- "field_source": 0,
- "kvtext": 0,
- }).Sort("-_id").Iter()
- //bidding库
- biddingConn := MgoB.GetMgoConn()
- defer MgoB.DestoryMongoConn(biddingConn)
- count := 0
- var compare map[string]interface{}
- result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
- "contenthtml": 0,
- "field_source": 0,
- }).Sort("-_id").Iter()
- for tmp := make(map[string]interface{}); result.Next(tmp); count++ {
- update := map[string]interface{}{}
- del := map[string]interface{}{} //记录extract没有值而bidding中有值的字段
- //对比方法----------------
- for {
- if compare == nil {
- compare = make(map[string]interface{})
- if !extractResult.Next(compare) {
- break
- }
- }
- if compare != nil {
- cid := mongodb.BsonIdToSId(compare["_id"])
- tid := mongodb.BsonIdToSId(tmp["_id"])
- if cid == tid {
- //更新bidding表;bidding表modifyinfo中的字段不更新
- modifyinfo := make(map[string]bool)
- if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil {
- for k := range tmpmodifyinfo {
- modifyinfo[k] = true
- }
- }
- for _, k := range config.Conf.Serve.FieldS {
- v1 := compare[k] //extract
- v2 := tmp[k] //bidding
- if v2 == nil && v1 != nil {
- update[k] = v1
- } else if v2 != nil && v1 != nil && !modifyinfo[k] {
- update[k] = v1
- } else if v2 != nil && v1 == nil && !modifyinfo[k] {
- if k == "s_subscopeclass" && del["subscopeclass"] == nil {
- continue
- } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
- continue
- }
- del[k] = 1
- //util.Debug("抽取结果没有值,bidding有值:field--", k, del)
- }
- }
- if util.IntAll(compare["repeat"]) == 1 {
- update["extracttype"] = -1
- update["dataprocess"] = 7
- if compare["repeat_id"] != nil {
- update["repeat_id"] = compare["repeat_id"]
- }
- } else {
- update["extracttype"] = 1
- update["dataprocess"] = 8
- }
- break
- } else {
- if cid < tid {
- compare = nil
- continue
- } else {
- break
- }
- }
- } else {
- break
- }
- }
- //------------------对比结束
- //处理分类
- if compare != nil { //extract
- fieldFun(compare, update)
- compare = nil
- }
- // entidlist
- extractMap := make(map[string]interface{})
- if update["s_winner"] != "" {
- cid := companyFun(update)
- if len(cid) > 0 {
- update["entidlist"] = cid
- extractMap["entidlist"] = cid
- }
- }
- if len(extractMap) > 0 {
- updateExtPool <- []map[string]interface{}{
- {"_id": tmp["_id"]},
- {"$set": extractMap},
- }
- }
- // 附件有效字段
- if i := validFile(tmp); i != 0 {
- if i == -1 {
- update["isValidFile"] = false
- } else {
- update["isValidFile"] = true
- }
- }
- if len(update) > 0 {
- if len(del) > 0 { //删除的字段
- updateBidPool <- []map[string]interface{}{{
- "_id": tmp["_id"],
- },
- {"$set": update, "$unset": del},
- }
- } else {
- updateBidPool <- []map[string]interface{}{{
- "_id": tmp["_id"],
- },
- {"$set": update},
- }
- }
- }
- if count%50000 == 0 {
- log.Println("biddingTask current", count)
- }
- tmp = make(map[string]interface{})
- }
- log.Println("biddingAll sync...over all", count)
- }
- func doIndex(infos []map[string]interface{}, bkey, stype string) int {
- syncNo := 0 //抽取表数据同步数量
- //对比两张表数据,减少查询次数
- var compare map[string]interface{}
- var bidUpdate [][]map[string]interface{}
- var extUpdate [][]map[string]interface{}
- //SaveEsLock := &sync.Mutex{}
- log.Println("start ...")
- for n, tmp := range infos {
- tid := mongodb.BsonIdToSId(tmp["_id"])
- update := map[string]interface{}{} //要更新的mongo数据
- del := map[string]interface{}{}
- edata, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, tid, nil)
- //对比方法----------------
- if edata != nil && len(*edata) > 0 {
- compare = *edata
- if stype == "bidding" {
- // 增量id段 正常数据
- if dg := util.IntAll(compare["dataging"]); dg == 1 { //extract中dataging=1跳过
- tmp = make(map[string]interface{})
- compare = nil
- continue
- }
- // delete(eMap, tid)
- }
- if stype == "bidding_history" {
- //增量id段 历史数据
- if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
- tmp = make(map[string]interface{})
- compare = nil
- continue
- }
- // delete(eMap, tid)
- }
- syncNo++
- log.Println("抽取区域 省", compare["area"], " 市 ", compare["city"], " 区 ", compare["district"], " id ", tid)
- modifyinfo := make(map[string]bool)
- if tmp["modifyinfo"] != nil {
- if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok {
- for k := range tmpmodifyinfo {
- modifyinfo[k] = true
- }
- }
- }
- for _, k := range config.Conf.Serve.FieldS {
- v1 := compare[k] //extract
- v2 := tmp[k] //bidding
- if v2 == nil && v1 != nil {
- update[k] = v1
- } else if v2 != nil && v1 != nil && !modifyinfo[k] {
- update[k] = v1
- } else if v2 != nil && v1 == nil && !modifyinfo[k] {
- if k == "s_subscopeclass" && del["subscopeclass"] == nil {
- continue
- } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
- continue
- } else if k == "city" || k == "district" {
- update[k] = ""
- } else {
- del[k] = 1
- }
- }
- }
- // 附件重采,数据同步时不更新判重标识
- if util.IntAll(compare["repeat"]) == 1 {
- update["extracttype"] = -1
- update["dataprocess"] = 7
- if compare["repeat_id"] != nil {
- update["repeat_id"] = compare["repeat_id"]
- }
- } else {
- update["extracttype"] = 1
- update["dataprocess"] = 8
- }
- } else {
- compare = nil
- if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
- update["dataging"] = 0
- }
- update["dataprocess"] = 8
- }
- //下面可以多线程跑的--->
- //处理分类
- if compare != nil { //extract
- fieldFun(compare, update)
- // publishtime 20230523
- if util.IntAll(tmp["publishtime"]) == -1 {
- if pb := methodPb(compare); pb > 0 {
- update["publishtime"] = pb
- }
- }
- compare = nil
- }
- //------------------对比结束
- //处理key descript
- if bkey == "" {
- DealInfo(&tmp, &update)
- }
- // entidlist
- extractMap := make(map[string]interface{})
- if update["s_winner"] != "" {
- cid := companyFun(update)
- if len(cid) > 0 {
- tmp["entidlist"] = cid
- update["entidlist"] = cid
- extractMap["entidlist"] = cid
- }
- }
- // 6.10 剑鱼发布信息分类处理, 写在这里是为了修改抽取表
- typeFunc(tmp, update, extractMap)
- if len(extractMap) > 0 {
- if extractMap["toptype"] != nil && extractMap["subtype"] == nil {
- extUpdate = append(extUpdate, []map[string]interface{}{
- {"_id": tmp["_id"]},
- {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
- })
- } else {
- extUpdate = append(extUpdate, []map[string]interface{}{
- {"_id": tmp["_id"]},
- {"$set": extractMap},
- })
- }
- if len(extUpdate) >= MgoBulkSize {
- tmps := extUpdate
- MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
- extUpdate = [][]map[string]interface{}{}
- }
- }
- // 附件有效字段
- if i := validFile(tmp); i != 0 {
- if i == -1 {
- tmp["isValidFile"] = false
- update["isValidFile"] = false
- } else {
- tmp["isValidFile"] = true
- update["isValidFile"] = true
- }
- }
- // 2024-02-21 徐志恒 情报标签字段
- toptype := util.ObjToString(tmp["toptype"])
- subtype := util.ObjToString(tmp["subtype"])
- buyerclass := util.ObjToString(update["buyerclass"])
- if buyerclass != "" {
- update["buyer_type"] = getStr(buyerclass)
- }
- s_topscopeclass := util.ObjToString(update["s_topscopeclass"])
- if (tmp["tag_topinformation"] != nil && (subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "采购意向" || toptype == "招标")) || (tmp["tag_topinformation"] == nil && toptype == "拟建" && strings.Contains(s_topscopeclass, "建筑工程")) {
- update["tag_set"] = getTagSet(tmp, compare)
- }
- if len(update) > 0 {
- log.Println("保存bidding区域 省", update["area"], " 市 ", update["city"], " 区 ", update["district"], " buyerclass ", update["buyerclass"], update["buyer_type"], " id ", tid)
- if len(del) > 0 {
- bidUpdate = append(bidUpdate, []map[string]interface{}{{
- "_id": tmp["_id"],
- },
- {"$set": update, "$unset": del},
- })
- } else {
- bidUpdate = append(bidUpdate, []map[string]interface{}{{
- "_id": tmp["_id"],
- },
- {"$set": update},
- })
- }
- if len(bidUpdate) >= MgoBulkSize {
- tmps := bidUpdate
- MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
- bidUpdate = [][]map[string]interface{}{}
- }
- }
- if n%500 == 0 {
- log.Println("biddingTask current ", n)
- }
- tmp = make(map[string]interface{})
- }
- //SaveEsLock.Lock()
- if len(bidUpdate) > 0 {
- tmps := bidUpdate
- MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
- bidUpdate = [][]map[string]interface{}{}
- }
- if len(extUpdate) > 0 {
- tmps := extUpdate
- MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
- extUpdate = [][]map[string]interface{}{}
- }
- //SaveEsLock.Unlock()
- return syncNo
- }
- // @Description subscopeclass、topscopeclass、package
- // 20230523 多包处理 subpackage = 1
- // @Author J 2022/6/7 5:54 PM
- func fieldFun(compare, update map[string]interface{}) {
- subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
- if subscopeclass != nil {
- m1 := map[string]bool{}
- newclass := []string{}
- for _, sc := range subscopeclass {
- sclass, _ := sc.(string)
- if !m1[sclass] {
- m1[sclass] = true
- newclass = append(newclass, sclass)
- }
- }
- update["s_subscopeclass"] = strings.Join(newclass, ",")
- update["subscopeclass"] = newclass
- }
- topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
- if topscopeclass != nil {
- m2 := map[string]bool{}
- newclass := []string{}
- for _, tc := range topscopeclass {
- tclass, _ := tc.(string)
- tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
- if !m2[tclass] {
- m2[tclass] = true
- newclass = append(newclass, tclass)
- }
- }
- update["topscopeclass"] = topscopeclass
- update["s_topscopeclass"] = strings.Join(newclass, ",")
- }
- if package1 := compare["package"]; package1 != nil {
- packageM, _ := package1.(map[string]interface{})
- update["package"] = packageM
- for _, p := range packageM {
- pm, _ := p.(map[string]interface{})
- if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
- util.Float64All(pm["bidamount"]) > 0 {
- update["multipackage"] = 1
- break
- }
- }
- } else {
- update["multipackage"] = 0
- }
- // subpackage
- if compare["package"] != nil && compare["s_winner"] != nil && compare["bidamount"] != nil {
- pg := compare["package"].(map[string]interface{})
- if len(pg) > 1 {
- var bmt []float64
- var swn []string
- for _, p := range pg {
- p1 := p.(map[string]interface{})
- if p1["bidamount"] != nil {
- bmt = append(bmt, util.Float64All(p1["bidamount"]))
- }
- if w := util.ObjToString(p1["winner"]); w != "" {
- swn = append(swn)
- }
- }
- if len(bmt) > 1 && len(swn) > 1 {
- sn := strings.Split(util.ObjToString(compare["s_winner"]), ",")
- sort.Strings(sn)
- sort.Strings(swn)
- swn1 := util.ObjArrToStringArr(Duplicate(swn)) // 去重
- if strings.Join(swn1, ",") == strings.Join(sn, ",") {
- bidamount := 0.0
- for _, f := range bmt {
- bidamount += f
- }
- if bidamount == util.Float64All(compare["bidamount"]) {
- update["subpackage"] = 1
- }
- }
- }
- }
- }
- }
- // @Description entidlist
- // @Author J 2022/6/7 2:36 PM
- func companyFun(tmp map[string]interface{}) (cid []string) {
- sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
- for _, w := range sWinnerarr {
- if w != "" {
- id := redis.GetStr("qyxy_id", w)
- if id == "" {
- ents, _ := MgoQ.Find(config.Conf.DB.MongoQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
- if len(*ents) > 0 {
- id = util.ObjToString((*ents)[0]["_id"])
- redis.PutCKV("qyxy_id", w, id)
- } else {
- ent, _ := MgoP.FindOne(config.Conf.DB.MongoP.Coll, map[string]interface{}{"history_name": w})
- if len(*ent) > 0 {
- id = util.ObjToString((*ent)["company_id"])
- redis.PutCKV("qyxy_id", w, id)
- }
- }
- }
- if id == "" {
- id = "-"
- }
- cid = append(cid, id)
- }
- }
- return cid
- }
- // @Description update 修改bidding表,extractM修改抽取表
- // @Author J 2022/6/10 10:29 AM
- func typeFunc(tmp, update, extractM map[string]interface{}) {
- if jyData, ok := tmp["jyfb_data"].(map[string]interface{}); ok {
- if t := util.ObjToString(jyData["type"]); t != "" {
- switch t {
- //case "采购信息":
- case "招标公告":
- if util.ObjToString(tmp["toptype"]) != "招标" {
- update["toptype"] = "招标"
- extractM["toptype"] = "招标"
- delete(update, "subtype")
- }
- case "采购意向":
- if util.ObjToString(tmp["toptype"]) != "采购意向" {
- update["toptype"] = "采购意向"
- update["subtype"] = "采购意向"
- extractM["toptype"] = "采购意向"
- extractM["subtype"] = "采购意向"
- }
- case "招标预告":
- if util.ObjToString(tmp["toptype"]) != "预告" {
- update["toptype"] = "预告"
- extractM["toptype"] = "预告"
- delete(update, "subtype")
- }
- case "招标结果":
- if util.ObjToString(tmp["toptype"]) != "结果" {
- update["toptype"] = "结果"
- extractM["toptype"] = "结果"
- delete(update, "subtype")
- }
- }
- }
- }
- }
- // @Description 附件有效字段(isValidFile)
- // @Author J 2022/7/8 14:41
- func validFile(tmp map[string]interface{}) int {
- isContinue := false
- if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
- if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
- for _, att := range atts {
- if att == nil {
- continue
- }
- if reflect.TypeOf(att).String() == "string" {
- continue
- }
- att1 := att.(map[string]interface{})
- if fid := util.ObjToString(att1["fid"]); fid != "" {
- isContinue = true
- break
- }
- }
- if isContinue {
- if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
- if len(attachTxt) > 0 {
- for _, at := range attachTxt {
- at1 := at.(map[string]interface{})
- if len(at1) > 0 {
- for k, _ := range at1 {
- if reflect.TypeOf(at1[k]).String() == "string" {
- continue
- }
- at2 := at1[k].(map[string]interface{})
- s := strings.ToLower(util.ObjToString(at2["file_name"]))
- if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
- strings.Contains(s, "pdf") {
- if strings.Contains(s, "swf") || strings.Contains(s, "html") {
- return -1
- } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
- return 1
- }
- }
- }
- break
- } else {
- break
- }
- }
- }
- }
- flag := false
- for _, att := range atts {
- if att == nil {
- continue
- }
- if reflect.TypeOf(att).String() == "string" {
- continue
- }
- att1 := att.(map[string]interface{})
- if fid := util.ObjToString(att1["fid"]); fid != "" {
- ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
- if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
- return 1
- } else {
- flag = true
- }
- }
- }
- if flag {
- return -1
- }
- }
- }
- }
- return 0
- }
- // @Description id不变,内容变化 重新索引数据
- // @Author J 2022/8/10 13:29
- func taskinfo(id string) {
- tmp, _ := MgoB.FindById("bidding", id, nil)
- if tmp == nil || len(*tmp) == 0 {
- log.Println(fmt.Sprintf("taskinfo bidding id=%s 未查询到数据", id))
- return
- }
- extractM, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, id, nil)
- if extractM == nil || len(*extractM) == 0 {
- extractM, _ = MgoE.FindById(config.Conf.DB.MongoE.Coll1, id, nil)
- if extractM == nil || len(*extractM) == 0 {
- log.Println(fmt.Sprintf("taskinfo extract id=%s 未查询到数据", id))
- return
- }
- }
- update := map[string]interface{}{} //要更新的mongo数据
- //更新bidding表字段
- for _, k := range config.Conf.Serve.FieldS {
- v1 := (*extractM)[k] //extract
- v2 := (*tmp)[k] //bidding
- if v2 == nil && v1 != nil {
- update[k] = v1
- } else if v2 != nil && v1 != nil {
- update[k] = v1
- } else if v2 != nil && v1 == nil {
- if k == "city" || k == "district" {
- update[k] = ""
- }
- }
- }
- if util.IntAll((*extractM)["repeat"]) == 1 {
- update["extracttype"] = -1
- update["dataprocess"] = 7
- if (*extractM)["repeat_id"] != nil {
- update["repeat_id"] = (*extractM)["repeat_id"]
- }
- } else {
- update["extracttype"] = 1
- update["dataprocess"] = 8
- }
- //处理分类
- fieldFun(*extractM, update)
- extractMap := make(map[string]interface{})
- if util.ObjToString((*tmp)["s_winner"]) != "" {
- cid := companyFun(*tmp)
- if len(cid) > 0 {
- update["entidlist"] = cid
- extractMap["entidlist"] = cid
- }
- MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
- }
- // 附件有效字段
- if i := validFile(*tmp); i != 0 {
- if i == -1 {
- update["isValidFile"] = false
- } else {
- update["isValidFile"] = true
- }
- }
- if len(update) > 0 {
- MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
- }
- mapinfo := map[string]interface{}{
- "infoid": id,
- "stype": "index-by-id",
- }
- datas, _ := json.Marshal(mapinfo)
- var next = &net.UDPAddr{
- IP: net.ParseIP(config.Conf.Udp.Next.Addr),
- Port: util.IntAll(config.Conf.Udp.Next.Port),
- }
- log.Println("nsq data over es ", next, " mapinfo ", string(datas))
- _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, next)
- }
- var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
- // @Description 发布时间处理
- // @Author J 2023/5/23 14:32
- func methodPb(tmp map[string]interface{}) int64 {
- if tmp["ext_publishtime"] != nil {
- if newPb := util.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
- return newPb
- }
- }
- for _, d := range DateTimeSelect {
- if tmp[d] != nil && util.Int64All(tmp[d]) < time.Now().Unix() {
- return util.Int64All(tmp[d])
- }
- }
- return 0
- }
- // Duplicate
- // @Description 去重
- // @Author J 2023/5/24 09:53
- func Duplicate(a interface{}) (ret []interface{}) {
- va := reflect.ValueOf(a)
- for i := 0; i < va.Len(); i++ {
- if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
- continue
- }
- ret = append(ret, va.Index(i).Interface())
- }
- return ret
- }
- // @Description 获取情报标签
- // @Author 徐志恒 2024/2/21 09:53
- func getTagSet(tmp, compare map[string]interface{}) map[string]map[string]interface{} {
- tagSet := map[string]map[string]interface{}{}
- wuye := map[string]interface{}{}
- buyer := util.ObjToString(compare["buyer"])
- publishtime := util.Int64All(tmp["publishtime"])
- bidamount := util.Float64All(compare["bidamount"])
- toptype := util.ObjToString(tmp["toptype"])
- subtype := util.ObjToString(tmp["subtype"])
- if subtype == "合同" {
- wuye["isfirsthand"] = 62
- if buyer != "" {
- sql := `{
- "query": {
- "bool": {
- "must": [
- {
- "term": {
- "buyer": "` + buyer + `"
- }
- },
- {
- "term": {
- "tag_topinformation": "情报_物业"
- }
- },
- {
- "term": {
- "subtype": "合同"
- }
- },
- {
- "range": {
- "publishtime": {
- "lte": ` + fmt.Sprint(publishtime) + `
- }
- }
- }
- ]
- }
- },
- "sort": {
- "publishtime": "asc"
- },
- "_source": [
- "s_winner"
- ],
- "size": 10000
- }`
- data := Es.Get("bidding", "bidding", sql)
- if data != nil && len(*data) > 0 {
- count := 0
- first := util.ObjToString((*data)[0]["s_winner"])
- for k, v := range *data {
- winner := util.ObjToString(v["s_winner"])
- if k > 0 && first != winner {
- first = winner
- count++
- }
- }
- changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
- changehands, _ := strconv.ParseFloat(changehand, 64)
- wuye["changehand"] = changehands
- if changehands > 0.3 {
- wuye["changehandindex"] = 61
- }
- if len(*data) > 1 {
- wuye["isfirsthand"] = 0
- }
- }
- }
- wuye["period"] = getperiod(compare)
- } else if toptype == "招标" || toptype == "采购意向" {
- bidamount = util.Float64All(compare["budget"])
- }
- if tmp["projectinfo"] != nil {
- projectInfo := util.ObjToMap(tmp["projectinfo"])
- if projectInfo != nil && len(*projectInfo) > 0 {
- if (*projectInfo)["attachments"] != nil {
- wuye["isfile"] = 63
- }
- }
- }
- wuye["scale"] = getBidamountRange(bidamount)
- if tmp["property_form"] != nil {
- property_form := util.ObjArrToStringArr(tmp["property_form"].([]interface{}))
- wuye["property_form"] = getpropertyform(property_form)
- }
- tagSet["wuye"] = wuye
- return tagSet
- }
- func getBidamountRange(value float64) int {
- switch {
- case value < 500000:
- return 1
- case value >= 500000 && value < 1000000:
- return 2
- case value >= 1000000 && value < 2000000:
- return 3
- case value >= 2000000 && value < 5000000:
- return 4
- default:
- return 5
- }
- }
- func getpropertyform(value []string) string {
- arr := []string{}
- categories := map[string]string{
- "住宅": "21",
- "政府办公楼": "22",
- "学校": "23",
- "医院": "24",
- "产业园区": "25",
- "旅游景区": "26",
- "交通运输": "27",
- "商务办公楼": "28",
- "酒店": "29",
- }
- for _, v := range value {
- if categories[v] != "" {
- arr = append(arr, categories[v])
- }
- }
- return strings.Join(arr, ",")
- }
- func getperiod(data map[string]interface{}) int {
- res := 16
- signaturedate := util.Int64All(data["signaturedate"]) //合同签订日期
- expiredate := util.Int64All(data["expiredate"]) //合同截止日期
- // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
- project_duration := util.IntAll(data["project_duration"]) //工期时长
- project_timeunit := util.ObjToString(data["project_timeunit"]) //工期单位
- result := float64(0)
- if expiredate > 0 && signaturedate > 0 {
- result = calculateYearDifference(signaturedate, expiredate)
- } else if project_duration > 0 && project_timeunit != "" {
- if strings.Contains(project_timeunit, "年") {
- if project_duration == 1 {
- res = 12
- } else if project_duration == 2 {
- res = 13
- } else if project_duration == 3 {
- res = 14
- } else if project_duration == 5 {
- res = 15
- }
- return res
- } else if strings.Contains(project_timeunit, "月") {
- result = float64(project_duration) / 12
- } else if strings.Contains(project_timeunit, "周") {
- result = float64(project_duration) * 7 / 365
- } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
- result = float64(project_duration) / 365
- }
- }
- if result == 0 {
- res = 16
- } else if result < 1 {
- res = 11
- } else if result >= 1 && result < 2 {
- res = 12
- } else if result >= 2 && result < 3 {
- res = 13
- } else if result >= 3 && result < 4 {
- res = 14
- } else if result >= 5 {
- res = 15
- }
- return res
- }
- func calculateYearDifference(startTime int64, endTime int64) float64 {
- start := time.Unix(startTime, 0)
- end := time.Unix(endTime, 0)
- duration := end.Sub(start)
- years := duration.Hours() / 24 / 365
- return years
- }
- func getStr(b string) string {
- if b == "" {
- return "其它"
- }
- a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
- a2 := "(卫健委|医疗)"
- a3 := "(教育|学校)"
- a4 := "(人行|金融业)"
- a5 := "(信息技术|电信行业|农林牧渔|建筑业|传媒|制造业|住宿餐饮|采矿业|能源化工|批发零售)"
- if strings.Contains(a1, b) {
- return "政府机构"
- } else if strings.Contains(a2, b) {
- return "医疗单位"
- } else if strings.Contains(a3, b) {
- return "教育单位"
- } else if strings.Contains(a4, b) {
- return "金融企业"
- } else if strings.Contains(a5, b) {
- return "商业公司"
- } else {
- return "其它"
- }
- return "其它"
- }
|