123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457 |
- package main
- import (
- elastic "app.yhyue.com/moapp/jybase/es"
- "fmt"
- "go.mongodb.org/mongo-driver/bson/primitive"
- u "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
- "log"
- "reflect"
- "sort"
- "strconv"
- "strings"
- "time"
- )
- func InitEs() {
- Es = elastic.NewEs("v7", conf.Config.Es.Addr, conf.Config.Es.Size, conf.Config.Es.User, conf.Config.Es.Password)
- }
- // @Description subscopeclass、topscopeclass、package
- // 20230523 多包处理 subpackage = 1
- // @Author J 2022/6/7 5:54 PM
- func fieldFun(data map[string]interface{}) {
- // 附件重采,数据同步时不更新判重标识(正常抽取判重)
- if u.IntAll(data["repeat"]) == 1 {
- data["extracttype"] = -1
- //update["repeat_id"] = compare["repeat_id"]
- } else {
- data["extracttype"] = 1
- }
- //未抽取、判重处理
- if u.IntAll(data["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
- data["dataging"] = 0 //
- }
- if subscopeclass, ok := data["subscopeclass"].(primitive.A); ok && len(subscopeclass) > 0 { //subscopeclass
- m1 := map[string]bool{}
- var newclass []string
- for _, sc := range subscopeclass {
- sclass, _ := sc.(string)
- if !m1[sclass] {
- m1[sclass] = true
- newclass = append(newclass, sclass)
- }
- }
- //data["subscopeclass"] = newclass
- data["s_subscopeclass"] = strings.Join(newclass, ",")
- }
- if topscopeclass, ok := data["topscopeclass"].(primitive.A); ok && len(topscopeclass) > 0 { //topscopeclass
- m2 := map[string]bool{}
- var newclass []string
- for _, tc := range topscopeclass {
- tclass, _ := tc.(string)
- tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
- if !m2[tclass] {
- m2[tclass] = true
- newclass = append(newclass, tclass)
- }
- }
- //data["topscopeclass"] = topscopeclass
- data["s_topscopeclass"] = strings.Join(newclass, ",")
- }
- //package
- if package1 := data["package"]; package1 != nil {
- packageM, _ := package1.(map[string]interface{})
- //data["package"] = packageM
- for _, p := range packageM {
- pm, _ := p.(map[string]interface{})
- if u.ObjToString(pm["winner"]) != "" || u.Float64All(pm["budget"]) > 0 || u.Float64All(pm["bidamount"]) > 0 {
- data["multipackage"] = 1
- break
- }
- }
- } else {
- data["multipackage"] = 0
- }
- // subpackage
- if data["package"] != nil && data["s_winner"] != nil && data["bidamount"] != nil {
- if pg, ok := data["package"].(map[string]interface{}); ok && len(pg) > 1 {
- var bmt []float64
- var swn []string
- for _, p := range pg {
- p1 := p.(map[string]interface{})
- if p1["bidamount"] != nil {
- bmt = append(bmt, u.Float64All(p1["bidamount"]))
- }
- if w := u.ObjToString(p1["winner"]); w != "" {
- swn = append(swn, w)
- }
- }
- if len(bmt) > 1 && len(swn) > 1 {
- sn := strings.Split(u.ObjToString(data["s_winner"]), ",")
- sort.Strings(sn)
- sort.Strings(swn)
- swn1 := u.ObjArrToStringArr(Duplicate(swn)) // 去重
- if strings.Join(swn1, ",") == strings.Join(sn, ",") {
- bidamount := 0.0
- for _, f := range bmt {
- bidamount += f
- }
- if bidamount == u.Float64All(data["bidamount"]) {
- data["subpackage"] = 1
- }
- }
- }
- }
- }
- }
- // Duplicate
- // @Description 去重
- // @Author J 2023/5/24 09:53
- func Duplicate(a interface{}) (ret []interface{}) {
- va := reflect.ValueOf(a)
- for i := 0; i < va.Len(); i++ {
- if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
- continue
- }
- ret = append(ret, va.Index(i).Interface())
- }
- return ret
- }
- // @Description entidlist
- // @Author J 2022/6/7 2:36 PM
- func companyFun(s_winner string) (cid []string) {
- sWinnerarr := strings.Split(s_winner, ",")
- for _, w := range sWinnerarr {
- if w != "" {
- id := redis.GetStr("qyxy_id", w)
- if id == "" {
- ents, _ := MgoQ.Find(conf.Config.MongodbQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
- if len(*ents) > 0 {
- id = u.ObjToString((*ents)[0]["_id"])
- redis.PutCKV("qyxy_id", w, id)
- } else {
- ent, _ := MgoP.FindOne(conf.Config.MongodbP.Coll, map[string]interface{}{"history_name": w})
- if len(*ent) > 0 {
- id = u.ObjToString((*ent)["company_id"])
- redis.PutCKV("qyxy_id", w, id)
- }
- }
- }
- if id == "" {
- id = "-"
- }
- cid = append(cid, id)
- }
- }
- return cid
- }
- // @Description update 修改bidding表,extractM修改抽取表
- // @Author J 2022/6/10 10:29 AM
- func typeFunc(data map[string]interface{}) {
- if jyData, ok := data["jyfb_data"].(map[string]interface{}); ok {
- if t := u.ObjToString(jyData["type"]); t != "" {
- switch t {
- //case "采购信息":
- case "招标公告":
- if u.ObjToString(data["toptype"]) != "招标" {
- data["toptype"] = "招标"
- delete(data, "subtype")
- }
- case "采购意向":
- if u.ObjToString(data["toptype"]) != "采购意向" {
- data["toptype"] = "采购意向"
- data["subtype"] = "采购意向"
- }
- case "招标预告":
- if u.ObjToString(data["toptype"]) != "预告" {
- data["toptype"] = "预告"
- delete(data, "subtype")
- }
- case "招标结果":
- if u.ObjToString(data["toptype"]) != "结果" {
- data["toptype"] = "结果"
- delete(data, "subtype")
- }
- }
- }
- }
- }
- // @Description 附件有效字段(isValidFile)
- // @Author J 2022/7/8 14:41
- func validFile(tmp map[string]interface{}) int {
- isContinue := false
- if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
- if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
- for _, att := range atts {
- if att == nil {
- continue
- }
- if reflect.TypeOf(att).String() == "string" {
- continue
- }
- att1 := att.(map[string]interface{})
- if fid := u.ObjToString(att1["fid"]); fid != "" {
- isContinue = true
- break
- }
- }
- if isContinue {
- if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
- if len(attachTxt) > 0 {
- for _, at := range attachTxt {
- if at1, ok := at.(map[string]interface{}); ok && len(at1) > 0 {
- for k, _ := range at1 {
- if reflect.TypeOf(at1[k]).String() == "string" {
- continue
- }
- at2 := at1[k].(map[string]interface{})
- s := strings.ToLower(u.ObjToString(at2["file_name"]))
- //和王江含确认,此处判断老版本存在问题,正确为排除jpg、jpeg、png、pdf类型(2024-03-12)
- //if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") || strings.Contains(s, "pdf") {
- if !filterFileType.MatchString(s) {
- if strings.Contains(s, "swf") || strings.Contains(s, "html") {
- return -1
- } else if AnalysisFile(OssGetObject(u.ObjToString(at2["attach_url"]))) {
- return 1
- }
- }
- }
- break
- } else {
- break
- }
- }
- }
- }
- flag := false
- for _, att := range atts {
- if att == nil {
- continue
- }
- if reflect.TypeOf(att).String() == "string" {
- continue
- }
- att1 := att.(map[string]interface{})
- if fid := u.ObjToString(att1["fid"]); fid != "" {
- ftype := strings.ToLower(u.ObjToString(tmp["ftype"]))
- if ftype != "swf" && ftype != "html" && OssObjExists("jy-datafile", fid) {
- return 1
- } else {
- flag = true
- }
- }
- }
- if flag {
- return -1
- }
- }
- }
- }
- return 0
- }
- var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
- // @Description 发布时间处理
- // @Author J 2023/5/23 14:32
- func methodPb(tmp map[string]interface{}) {
- if tmp["ext_publishtime"] != nil {
- if newPb := u.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
- tmp["publishtime"] = newPb
- return
- }
- }
- for _, d := range DateTimeSelect {
- if tmp[d] != nil && u.Int64All(tmp[d]) < time.Now().Unix() {
- tmp["publishtime"] = u.Int64All(tmp[d])
- return
- }
- }
- return
- }
- // @Description 获取情报标签
- // @Author 徐志恒 2024/2/21 09:53
- func getTagSet(data map[string]interface{}) map[string]map[string]interface{} {
- tagSet := map[string]map[string]interface{}{}
- wuye := map[string]interface{}{}
- buyer := u.ObjToString(data["buyer"])
- publishtime := u.Int64All(data["publishtime"])
- bidamount := u.Float64All(data["bidamount"])
- wuye["isfirsthand"] = 62
- if buyer != "" {
- sql := `{
- "query": {
- "bool": {
- "must": [
- {
- "term": {
- "buyer": "` + buyer + `"
- }
- },
- {
- "term": {
- "tag_topinformation": "情报_物业"
- }
- },
- {
- "term": {
- "subtype": "合同"
- }
- },
- {
- "range": {
- "publishtime": {
- "lte": ` + fmt.Sprint(publishtime) + `
- }
- }
- }
- ]
- }
- },
- "sort": {
- "publishtime": "asc"
- },
- "_source": [
- "s_winner"
- ],
- "size": 10000
- }`
- data := Es.Get("bidding", "bidding", sql)
- if data != nil && len(*data) > 0 {
- count := 0
- first := u.ObjToString((*data)[0]["s_winner"])
- for k, v := range *data {
- winner := u.ObjToString(v["s_winner"])
- if k > 0 && first != winner {
- first = winner
- count++
- }
- }
- changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
- changehands, _ := strconv.ParseFloat(changehand, 64)
- wuye["changehand"] = changehands
- log.Println("changehands", count, len(*data), changehands)
- if changehands > 0.3 {
- wuye["changehandindex"] = 61
- }
- if len(*data) > 1 {
- wuye["isfirsthand"] = 0
- }
- }
- }
- if data["projectinfo"] != nil {
- projectInfo := u.ObjToMap(data["projectinfo"])
- if projectInfo != nil && len(*projectInfo) > 0 {
- if (*projectInfo)["attachments"] != nil {
- wuye["isfile"] = 63
- }
- }
- }
- wuye["period"] = getperiod(data)
- wuye["scale"] = getBidamountRange(bidamount)
- if data["property_form"] != nil {
- property_form := u.ObjArrToStringArr(data["property_form"].([]interface{}))
- wuye["property_form"] = getpropertyform(property_form)
- }
- tagSet["wuye"] = wuye
- return tagSet
- }
- func getBidamountRange(value float64) int {
- switch {
- case value < 500000:
- return 1
- case value >= 500000 && value < 1000000:
- return 2
- case value >= 1000000 && value < 2000000:
- return 3
- case value >= 2000000 && value < 5000000:
- return 4
- default:
- return 5
- }
- }
- func getpropertyform(value []string) string {
- arr := []string{}
- categories := map[string]string{
- "住宅": "21",
- "政府办公楼": "22",
- "学校": "23",
- "医院": "24",
- "产业园区": "25",
- "旅游景区": "26",
- "交通运输": "27",
- "商务办公楼": "28",
- "酒店": "29",
- }
- for _, v := range value {
- if categories[v] != "" {
- arr = append(arr, categories[v])
- }
- }
- return strings.Join(arr, ",")
- }
- func getperiod(data map[string]interface{}) int {
- res := 16
- signaturedate := u.Int64All(data["signaturedate"]) //合同签订日期
- expiredate := u.Int64All(data["expiredate"]) //合同截止日期
- // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
- project_duration := u.IntAll(data["project_duration"]) //工期时长
- project_timeunit := u.ObjToString(data["project_timeunit"]) //工期单位
- result := float64(0)
- if expiredate > 0 && signaturedate > 0 {
- result = calculateYearDifference(signaturedate, expiredate)
- } else if project_duration > 0 && project_timeunit != "" {
- if strings.Contains(project_timeunit, "年") {
- if project_duration == 1 {
- res = 12
- } else if project_duration == 2 {
- res = 13
- } else if project_duration == 3 {
- res = 14
- } else if project_duration == 5 {
- res = 15
- }
- return res
- } else if strings.Contains(project_timeunit, "月") {
- result = float64(project_duration) / 12
- } else if strings.Contains(project_timeunit, "周") {
- result = float64(project_duration) * 7 / 365
- } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
- result = float64(project_duration) / 365
- }
- }
- if result == 0 {
- res = 16
- } else if result < 1 {
- res = 11
- } else if result >= 1 && result < 2 {
- res = 12
- } else if result >= 2 && result < 3 {
- res = 13
- } else if result >= 3 && result < 4 {
- res = 14
- } else if result >= 5 {
- res = 15
- }
- return res
- }
- func calculateYearDifference(startTime int64, endTime int64) float64 {
- start := time.Unix(startTime, 0)
- end := time.Unix(endTime, 0)
- duration := end.Sub(start)
- years := duration.Hours() / 24 / 365
- return years
- }
|