task.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990
  1. package main
  2. import (
  3. "encoding/json"
  4. "field_sync/config"
  5. "field_sync/oss"
  6. "fmt"
  7. "net"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "log"
  15. "go.mongodb.org/mongo-driver/bson"
  16. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  17. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  18. "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
  19. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  20. )
  21. var (
  22. regLetter = regexp.MustCompile("[a-z]*")
  23. cityEndReg = regexp.MustCompile("(区|县|市)$")
  24. )
  25. func biddingTask(data []byte, mapInfo map[string]interface{}) {
  26. defer util.Catch()
  27. stype := util.ObjToString(mapInfo["stype"])
  28. q, _ := mapInfo["query"].(map[string]interface{})
  29. bkey, _ := mapInfo["bkey"].(string)
  30. if q == nil {
  31. q = map[string]interface{}{
  32. "_id": map[string]interface{}{
  33. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  34. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  35. },
  36. }
  37. }
  38. //extract库
  39. extractConn := MgoE.GetMgoConn()
  40. defer MgoE.DestoryMongoConn(extractConn)
  41. extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  42. "field_source": 0,
  43. "kvtext": 0,
  44. }).Sort("_id").Iter()
  45. eMap := map[string]map[string]interface{}{}
  46. extCount, repeatCount := 0, 0
  47. for tmp := make(map[string]interface{}); extractResult.Next(tmp); extCount++ {
  48. if util.IntAll(tmp["repeat"]) == 1 {
  49. repeatCount++
  50. }
  51. tid := mongodb.BsonIdToSId(tmp["_id"])
  52. eMap[tid] = tmp
  53. tmp = make(map[string]interface{})
  54. }
  55. log.Println("抽取表 数据量", extCount, "重复数据量", repeatCount)
  56. //bidding库
  57. biddingConn := MgoB.GetMgoConn()
  58. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  59. log.Println("bidding表 同步总数:", count)
  60. c := 0
  61. if count < 500000 {
  62. var res []map[string]interface{}
  63. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  64. "contenthtml": 0,
  65. }).Iter()
  66. for tmp := make(map[string]interface{}); result.Next(tmp); {
  67. res = append(res, tmp)
  68. tmp = make(map[string]interface{})
  69. }
  70. MgoB.DestoryMongoConn(biddingConn)
  71. log.Println("查询结果 bidding", count, "抽取:", extCount)
  72. c = doIndex(res, eMap, bkey, stype)
  73. } else {
  74. log.Println("查询结果 数据量太大,放弃", count)
  75. MgoB.DestoryMongoConn(biddingConn)
  76. }
  77. log.Println("bidding sync...over all", count, "extract sync ", c)
  78. NextNode(mapInfo, stype)
  79. // NextNodePro(mapInfo, stype)
  80. NextNodeTidb(mapInfo, stype)
  81. if stype == "bidding_history" {
  82. NextNodeBidData(mapInfo) // bidding-data数据
  83. NextNodeTidbQyxy(mapInfo) // tidb-企业数据
  84. NextNodeHn(mapInfo)
  85. }
  86. if stype == "bidding" {
  87. uq := map[string]interface{}{
  88. "gtid": map[string]interface{}{
  89. "$gte": util.ObjToString(mapInfo["gtid"]),
  90. },
  91. "lteid": map[string]interface{}{
  92. "$lte": util.ObjToString(mapInfo["lteid"]),
  93. },
  94. }
  95. MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 7, "updatetime": time.Now().Unix()}}, false, true)
  96. }
  97. //领域标签处理的数据 id段
  98. if stype == "bidding_history" {
  99. MgoB.Save("field_data_record", map[string]interface{}{"gtid": mapInfo["gtid"], "lteid": mapInfo["lteid"], "status": 0})
  100. }
  101. }
  102. func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
  103. defer util.Catch()
  104. q, _ := mapInfo["query"].(map[string]interface{})
  105. if q == nil {
  106. q = map[string]interface{}{
  107. "_id": map[string]interface{}{
  108. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  109. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  110. },
  111. }
  112. }
  113. //extract库
  114. extractConn := MgoE.GetMgoConn()
  115. defer MgoE.DestoryMongoConn(extractConn)
  116. extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  117. "field_source": 0,
  118. "kvtext": 0,
  119. }).Sort("-_id").Iter()
  120. //bidding库
  121. biddingConn := MgoB.GetMgoConn()
  122. defer MgoB.DestoryMongoConn(biddingConn)
  123. count := 0
  124. var compare map[string]interface{}
  125. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  126. "contenthtml": 0,
  127. "field_source": 0,
  128. }).Sort("-_id").Iter()
  129. for tmp := make(map[string]interface{}); result.Next(tmp); count++ {
  130. update := map[string]interface{}{}
  131. del := map[string]interface{}{} //记录extract没有值而bidding中有值的字段
  132. //对比方法----------------
  133. for {
  134. if compare == nil {
  135. compare = make(map[string]interface{})
  136. if !extractResult.Next(compare) {
  137. break
  138. }
  139. }
  140. if compare != nil {
  141. cid := mongodb.BsonIdToSId(compare["_id"])
  142. tid := mongodb.BsonIdToSId(tmp["_id"])
  143. if cid == tid {
  144. //更新bidding表;bidding表modifyinfo中的字段不更新
  145. modifyinfo := make(map[string]bool)
  146. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil {
  147. for k := range tmpmodifyinfo {
  148. modifyinfo[k] = true
  149. }
  150. }
  151. for _, k := range config.Conf.Serve.FieldS {
  152. v1 := compare[k] //extract
  153. v2 := tmp[k] //bidding
  154. if v2 == nil && v1 != nil {
  155. update[k] = v1
  156. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  157. update[k] = v1
  158. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  159. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  160. continue
  161. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  162. continue
  163. }
  164. del[k] = 1
  165. //util.Debug("抽取结果没有值,bidding有值:field--", k, del)
  166. }
  167. }
  168. if util.IntAll(compare["repeat"]) == 1 {
  169. update["extracttype"] = -1
  170. update["dataprocess"] = 7
  171. if compare["repeat_id"] != nil {
  172. update["repeat_id"] = compare["repeat_id"]
  173. }
  174. } else {
  175. update["extracttype"] = 1
  176. update["dataprocess"] = 8
  177. }
  178. break
  179. } else {
  180. if cid < tid {
  181. compare = nil
  182. continue
  183. } else {
  184. break
  185. }
  186. }
  187. } else {
  188. break
  189. }
  190. }
  191. //------------------对比结束
  192. //处理分类
  193. if compare != nil { //extract
  194. fieldFun(compare, update)
  195. compare = nil
  196. }
  197. // entidlist
  198. extractMap := make(map[string]interface{})
  199. if update["s_winner"] != "" {
  200. cid := companyFun(update)
  201. if len(cid) > 0 {
  202. update["entidlist"] = cid
  203. extractMap["entidlist"] = cid
  204. }
  205. }
  206. if len(extractMap) > 0 {
  207. updateExtPool <- []map[string]interface{}{
  208. {"_id": tmp["_id"]},
  209. {"$set": extractMap},
  210. }
  211. }
  212. // 附件有效字段
  213. if i := validFile(tmp); i != 0 {
  214. if i == -1 {
  215. update["isValidFile"] = false
  216. } else {
  217. update["isValidFile"] = true
  218. }
  219. }
  220. if len(update) > 0 {
  221. if len(del) > 0 { //删除的字段
  222. updateBidPool <- []map[string]interface{}{{
  223. "_id": tmp["_id"],
  224. },
  225. {"$set": update, "$unset": del},
  226. }
  227. } else {
  228. updateBidPool <- []map[string]interface{}{{
  229. "_id": tmp["_id"],
  230. },
  231. {"$set": update},
  232. }
  233. }
  234. }
  235. if count%50000 == 0 {
  236. log.Println("biddingTask current", count)
  237. }
  238. tmp = make(map[string]interface{})
  239. }
  240. log.Println("biddingAll sync...over all", count)
  241. }
  242. func doIndex(infos []map[string]interface{}, eMap map[string]map[string]interface{}, bkey, stype string) int {
  243. syncNo := 0 //抽取表数据同步数量
  244. //对比两张表数据,减少查询次数
  245. var compare map[string]interface{}
  246. var bidUpdate [][]map[string]interface{}
  247. var extUpdate [][]map[string]interface{}
  248. //SaveEsLock := &sync.Mutex{}
  249. log.Println("start ...")
  250. for n, tmp := range infos {
  251. tid := mongodb.BsonIdToSId(tmp["_id"])
  252. update := map[string]interface{}{} //要更新的mongo数据
  253. del := map[string]interface{}{}
  254. //对比方法----------------
  255. if eMap[tid] != nil {
  256. compare = eMap[tid]
  257. if stype == "bidding" {
  258. // 增量id段 正常数据
  259. if dg := util.IntAll(compare["dataging"]); dg == 1 { //extract中dataging=1跳过
  260. tmp = make(map[string]interface{})
  261. compare = nil
  262. continue
  263. }
  264. delete(eMap, tid)
  265. }
  266. if stype == "bidding_history" {
  267. //增量id段 历史数据
  268. if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
  269. tmp = make(map[string]interface{})
  270. compare = nil
  271. continue
  272. }
  273. delete(eMap, tid)
  274. }
  275. syncNo++
  276. log.Println("抽取区域 省", compare["area"], " 市 ", compare["city"], " 区 ", compare["district"], " id ", tid)
  277. modifyinfo := make(map[string]bool)
  278. if tmp["modifyinfo"] != nil {
  279. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok {
  280. for k := range tmpmodifyinfo {
  281. modifyinfo[k] = true
  282. }
  283. }
  284. }
  285. for _, k := range config.Conf.Serve.FieldS {
  286. v1 := compare[k] //extract
  287. v2 := tmp[k] //bidding
  288. if v2 == nil && v1 != nil {
  289. update[k] = v1
  290. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  291. update[k] = v1
  292. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  293. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  294. continue
  295. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  296. continue
  297. } else if k == "city" || k == "district" {
  298. update[k] = ""
  299. } else {
  300. del[k] = 1
  301. }
  302. }
  303. }
  304. // 附件重采,数据同步时不更新判重标识
  305. if util.IntAll(compare["repeat"]) == 1 {
  306. update["extracttype"] = -1
  307. update["dataprocess"] = 7
  308. if compare["repeat_id"] != nil {
  309. update["repeat_id"] = compare["repeat_id"]
  310. }
  311. } else {
  312. update["extracttype"] = 1
  313. update["dataprocess"] = 8
  314. }
  315. } else {
  316. compare = nil
  317. if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
  318. update["dataging"] = 0
  319. }
  320. update["dataprocess"] = 8
  321. }
  322. //下面可以多线程跑的--->
  323. //处理分类
  324. if compare != nil { //extract
  325. fieldFun(compare, update)
  326. // publishtime 20230523
  327. if util.IntAll(tmp["publishtime"]) == -1 {
  328. if pb := methodPb(compare); pb > 0 {
  329. update["publishtime"] = pb
  330. }
  331. }
  332. compare = nil
  333. }
  334. //------------------对比结束
  335. //处理key descript
  336. if bkey == "" {
  337. DealInfo(&tmp, &update)
  338. }
  339. // entidlist
  340. extractMap := make(map[string]interface{})
  341. if update["s_winner"] != "" {
  342. cid := companyFun(update)
  343. if len(cid) > 0 {
  344. tmp["entidlist"] = cid
  345. update["entidlist"] = cid
  346. extractMap["entidlist"] = cid
  347. }
  348. }
  349. // 6.10 剑鱼发布信息分类处理, 写在这里是为了修改抽取表
  350. typeFunc(tmp, update, extractMap)
  351. if len(extractMap) > 0 {
  352. if extractMap["toptype"] != nil && extractMap["subtype"] == nil {
  353. extUpdate = append(extUpdate, []map[string]interface{}{
  354. {"_id": tmp["_id"]},
  355. {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
  356. })
  357. } else {
  358. extUpdate = append(extUpdate, []map[string]interface{}{
  359. {"_id": tmp["_id"]},
  360. {"$set": extractMap},
  361. })
  362. }
  363. if len(extUpdate) >= MgoBulkSize {
  364. tmps := extUpdate
  365. MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  366. extUpdate = [][]map[string]interface{}{}
  367. }
  368. }
  369. // 附件有效字段
  370. if i := validFile(tmp); i != 0 {
  371. if i == -1 {
  372. tmp["isValidFile"] = false
  373. update["isValidFile"] = false
  374. } else {
  375. tmp["isValidFile"] = true
  376. update["isValidFile"] = true
  377. }
  378. }
  379. // 2024-02-21 徐志恒 情报标签字段
  380. toptype := util.ObjToString(tmp["toptype"])
  381. subtype := util.ObjToString(tmp["subtype"])
  382. buyerclass := util.ObjToString(tmp["buyerclass"])
  383. if buyerclass != "" {
  384. update["buyer_type"] = getStr(buyerclass)
  385. }
  386. s_topscopeclass := util.ObjToString(update["s_topscopeclass"])
  387. if (tmp["tag_topinformation"] != nil && (subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "采购意向" || toptype == "招标")) || (tmp["tag_topinformation"] == nil && toptype == "拟建" && strings.Contains(s_topscopeclass, "建筑工程")) {
  388. update["tag_set"] = getTagSet(tmp, compare)
  389. }
  390. if len(update) > 0 {
  391. log.Println("保存bidding区域 省", update["area"], " 市 ", update["city"], " 区 ", update["district"], " id ", tid)
  392. if len(del) > 0 {
  393. bidUpdate = append(bidUpdate, []map[string]interface{}{{
  394. "_id": tmp["_id"],
  395. },
  396. {"$set": update, "$unset": del},
  397. })
  398. } else {
  399. bidUpdate = append(bidUpdate, []map[string]interface{}{{
  400. "_id": tmp["_id"],
  401. },
  402. {"$set": update},
  403. })
  404. }
  405. if len(bidUpdate) >= MgoBulkSize {
  406. tmps := bidUpdate
  407. MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  408. bidUpdate = [][]map[string]interface{}{}
  409. }
  410. }
  411. if n%500 == 0 {
  412. log.Println("biddingTask current ", n)
  413. }
  414. tmp = make(map[string]interface{})
  415. }
  416. //SaveEsLock.Lock()
  417. if len(bidUpdate) > 0 {
  418. tmps := bidUpdate
  419. MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  420. bidUpdate = [][]map[string]interface{}{}
  421. }
  422. if len(extUpdate) > 0 {
  423. tmps := extUpdate
  424. MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  425. extUpdate = [][]map[string]interface{}{}
  426. }
  427. //SaveEsLock.Unlock()
  428. return syncNo
  429. }
  430. // @Description subscopeclass、topscopeclass、package
  431. // 20230523 多包处理 subpackage = 1
  432. // @Author J 2022/6/7 5:54 PM
  433. func fieldFun(compare, update map[string]interface{}) {
  434. subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
  435. if subscopeclass != nil {
  436. m1 := map[string]bool{}
  437. newclass := []string{}
  438. for _, sc := range subscopeclass {
  439. sclass, _ := sc.(string)
  440. if !m1[sclass] {
  441. m1[sclass] = true
  442. newclass = append(newclass, sclass)
  443. }
  444. }
  445. update["s_subscopeclass"] = strings.Join(newclass, ",")
  446. update["subscopeclass"] = newclass
  447. }
  448. topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
  449. if topscopeclass != nil {
  450. m2 := map[string]bool{}
  451. newclass := []string{}
  452. for _, tc := range topscopeclass {
  453. tclass, _ := tc.(string)
  454. tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
  455. if !m2[tclass] {
  456. m2[tclass] = true
  457. newclass = append(newclass, tclass)
  458. }
  459. }
  460. update["topscopeclass"] = topscopeclass
  461. update["s_topscopeclass"] = strings.Join(newclass, ",")
  462. }
  463. if package1 := compare["package"]; package1 != nil {
  464. packageM, _ := package1.(map[string]interface{})
  465. update["package"] = packageM
  466. for _, p := range packageM {
  467. pm, _ := p.(map[string]interface{})
  468. if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
  469. util.Float64All(pm["bidamount"]) > 0 {
  470. update["multipackage"] = 1
  471. break
  472. }
  473. }
  474. } else {
  475. update["multipackage"] = 0
  476. }
  477. // subpackage
  478. if compare["package"] != nil && compare["s_winner"] != nil && compare["bidamount"] != nil {
  479. pg := compare["package"].(map[string]interface{})
  480. if len(pg) > 1 {
  481. var bmt []float64
  482. var swn []string
  483. for _, p := range pg {
  484. p1 := p.(map[string]interface{})
  485. if p1["bidamount"] != nil {
  486. bmt = append(bmt, util.Float64All(p1["bidamount"]))
  487. }
  488. if w := util.ObjToString(p1["winner"]); w != "" {
  489. swn = append(swn)
  490. }
  491. }
  492. if len(bmt) > 1 && len(swn) > 1 {
  493. sn := strings.Split(util.ObjToString(compare["s_winner"]), ",")
  494. sort.Strings(sn)
  495. sort.Strings(swn)
  496. swn1 := util.ObjArrToStringArr(Duplicate(swn)) // 去重
  497. if strings.Join(swn1, ",") == strings.Join(sn, ",") {
  498. bidamount := 0.0
  499. for _, f := range bmt {
  500. bidamount += f
  501. }
  502. if bidamount == util.Float64All(compare["bidamount"]) {
  503. update["subpackage"] = 1
  504. }
  505. }
  506. }
  507. }
  508. }
  509. }
  510. // @Description entidlist
  511. // @Author J 2022/6/7 2:36 PM
  512. func companyFun(tmp map[string]interface{}) (cid []string) {
  513. sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
  514. for _, w := range sWinnerarr {
  515. if w != "" {
  516. id := redis.GetStr("qyxy_id", w)
  517. if id == "" {
  518. ents, _ := MgoQ.Find(config.Conf.DB.MongoQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
  519. if len(*ents) > 0 {
  520. id = util.ObjToString((*ents)[0]["_id"])
  521. redis.PutCKV("qyxy_id", w, id)
  522. } else {
  523. ent, _ := MgoP.FindOne(config.Conf.DB.MongoP.Coll, map[string]interface{}{"history_name": w})
  524. if len(*ent) > 0 {
  525. id = util.ObjToString((*ent)["company_id"])
  526. redis.PutCKV("qyxy_id", w, id)
  527. }
  528. }
  529. }
  530. if id == "" {
  531. id = "-"
  532. }
  533. cid = append(cid, id)
  534. }
  535. }
  536. return cid
  537. }
  538. // @Description update 修改bidding表,extractM修改抽取表
  539. // @Author J 2022/6/10 10:29 AM
  540. func typeFunc(tmp, update, extractM map[string]interface{}) {
  541. if jyData, ok := tmp["jyfb_data"].(map[string]interface{}); ok {
  542. if t := util.ObjToString(jyData["type"]); t != "" {
  543. switch t {
  544. //case "采购信息":
  545. case "招标公告":
  546. if util.ObjToString(tmp["toptype"]) != "招标" {
  547. update["toptype"] = "招标"
  548. extractM["toptype"] = "招标"
  549. delete(update, "subtype")
  550. }
  551. case "采购意向":
  552. if util.ObjToString(tmp["toptype"]) != "采购意向" {
  553. update["toptype"] = "采购意向"
  554. update["subtype"] = "采购意向"
  555. extractM["toptype"] = "采购意向"
  556. extractM["subtype"] = "采购意向"
  557. }
  558. case "招标预告":
  559. if util.ObjToString(tmp["toptype"]) != "预告" {
  560. update["toptype"] = "预告"
  561. extractM["toptype"] = "预告"
  562. delete(update, "subtype")
  563. }
  564. case "招标结果":
  565. if util.ObjToString(tmp["toptype"]) != "结果" {
  566. update["toptype"] = "结果"
  567. extractM["toptype"] = "结果"
  568. delete(update, "subtype")
  569. }
  570. }
  571. }
  572. }
  573. }
  574. // @Description 附件有效字段(isValidFile)
  575. // @Author J 2022/7/8 14:41
  576. func validFile(tmp map[string]interface{}) int {
  577. isContinue := false
  578. if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
  579. if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
  580. for _, att := range atts {
  581. if att == nil {
  582. continue
  583. }
  584. if reflect.TypeOf(att).String() == "string" {
  585. continue
  586. }
  587. att1 := att.(map[string]interface{})
  588. if fid := util.ObjToString(att1["fid"]); fid != "" {
  589. isContinue = true
  590. break
  591. }
  592. }
  593. if isContinue {
  594. if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
  595. if len(attachTxt) > 0 {
  596. for _, at := range attachTxt {
  597. at1 := at.(map[string]interface{})
  598. if len(at1) > 0 {
  599. for k, _ := range at1 {
  600. if reflect.TypeOf(at1[k]).String() == "string" {
  601. continue
  602. }
  603. at2 := at1[k].(map[string]interface{})
  604. s := strings.ToLower(util.ObjToString(at2["file_name"]))
  605. if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
  606. strings.Contains(s, "pdf") {
  607. if strings.Contains(s, "swf") || strings.Contains(s, "html") {
  608. return -1
  609. } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
  610. return 1
  611. }
  612. }
  613. }
  614. break
  615. } else {
  616. break
  617. }
  618. }
  619. }
  620. }
  621. flag := false
  622. for _, att := range atts {
  623. if att == nil {
  624. continue
  625. }
  626. if reflect.TypeOf(att).String() == "string" {
  627. continue
  628. }
  629. att1 := att.(map[string]interface{})
  630. if fid := util.ObjToString(att1["fid"]); fid != "" {
  631. ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
  632. if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
  633. return 1
  634. } else {
  635. flag = true
  636. }
  637. }
  638. }
  639. if flag {
  640. return -1
  641. }
  642. }
  643. }
  644. }
  645. return 0
  646. }
  647. // @Description id不变,内容变化 重新索引数据
  648. // @Author J 2022/8/10 13:29
  649. func taskinfo(id string) {
  650. tmp, _ := MgoB.FindById("bidding", id, nil)
  651. if tmp == nil || len(*tmp) == 0 {
  652. log.Println(fmt.Sprintf("taskinfo bidding id=%s 未查询到数据", id))
  653. return
  654. }
  655. extractM, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, id, nil)
  656. if extractM == nil || len(*extractM) == 0 {
  657. extractM, _ = MgoE.FindById(config.Conf.DB.MongoE.Coll1, id, nil)
  658. if extractM == nil || len(*extractM) == 0 {
  659. log.Println(fmt.Sprintf("taskinfo extract id=%s 未查询到数据", id))
  660. return
  661. }
  662. }
  663. update := map[string]interface{}{} //要更新的mongo数据
  664. //更新bidding表字段
  665. for _, k := range config.Conf.Serve.FieldS {
  666. v1 := (*extractM)[k] //extract
  667. v2 := (*tmp)[k] //bidding
  668. if v2 == nil && v1 != nil {
  669. update[k] = v1
  670. } else if v2 != nil && v1 != nil {
  671. update[k] = v1
  672. } else if v2 != nil && v1 == nil {
  673. if k == "city" || k == "district" {
  674. update[k] = ""
  675. }
  676. }
  677. }
  678. if util.IntAll((*extractM)["repeat"]) == 1 {
  679. update["extracttype"] = -1
  680. update["dataprocess"] = 7
  681. if (*extractM)["repeat_id"] != nil {
  682. update["repeat_id"] = (*extractM)["repeat_id"]
  683. }
  684. } else {
  685. update["extracttype"] = 1
  686. update["dataprocess"] = 8
  687. }
  688. //处理分类
  689. fieldFun(*extractM, update)
  690. extractMap := make(map[string]interface{})
  691. if util.ObjToString((*tmp)["s_winner"]) != "" {
  692. cid := companyFun(*tmp)
  693. if len(cid) > 0 {
  694. update["entidlist"] = cid
  695. extractMap["entidlist"] = cid
  696. }
  697. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
  698. }
  699. // 附件有效字段
  700. if i := validFile(*tmp); i != 0 {
  701. if i == -1 {
  702. update["isValidFile"] = false
  703. } else {
  704. update["isValidFile"] = true
  705. }
  706. }
  707. if len(update) > 0 {
  708. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
  709. }
  710. mapinfo := map[string]interface{}{
  711. "infoid": id,
  712. "stype": "index-by-id",
  713. }
  714. datas, _ := json.Marshal(mapinfo)
  715. var next = &net.UDPAddr{
  716. IP: net.ParseIP(config.Conf.Udp.Next.Addr),
  717. Port: util.IntAll(config.Conf.Udp.Next.Port),
  718. }
  719. log.Println("nsq data over es ", next, " mapinfo ", string(datas))
  720. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, next)
  721. }
  722. var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
  723. // @Description 发布时间处理
  724. // @Author J 2023/5/23 14:32
  725. func methodPb(tmp map[string]interface{}) int64 {
  726. if tmp["ext_publishtime"] != nil {
  727. if newPb := util.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
  728. return newPb
  729. }
  730. }
  731. for _, d := range DateTimeSelect {
  732. if tmp[d] != nil && util.Int64All(tmp[d]) < time.Now().Unix() {
  733. return util.Int64All(tmp[d])
  734. }
  735. }
  736. return 0
  737. }
  738. // Duplicate
  739. // @Description 去重
  740. // @Author J 2023/5/24 09:53
  741. func Duplicate(a interface{}) (ret []interface{}) {
  742. va := reflect.ValueOf(a)
  743. for i := 0; i < va.Len(); i++ {
  744. if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
  745. continue
  746. }
  747. ret = append(ret, va.Index(i).Interface())
  748. }
  749. return ret
  750. }
  751. // @Description 获取情报标签
  752. // @Author 徐志恒 2024/2/21 09:53
  753. func getTagSet(tmp, compare map[string]interface{}) map[string]map[string]interface{} {
  754. tagSet := map[string]map[string]interface{}{}
  755. wuye := map[string]interface{}{}
  756. buyer := util.ObjToString(compare["buyer"])
  757. publishtime := util.Int64All(tmp["publishtime"])
  758. bidamount := util.Float64All(compare["bidamount"])
  759. toptype := util.ObjToString(tmp["toptype"])
  760. subtype := util.ObjToString(tmp["subtype"])
  761. if subtype == "合同" {
  762. wuye["isfirsthand"] = 62
  763. if buyer != "" {
  764. sql := `{
  765. "query": {
  766. "bool": {
  767. "must": [
  768. {
  769. "term": {
  770. "buyer": "` + buyer + `"
  771. }
  772. },
  773. {
  774. "term": {
  775. "tag_topinformation": "情报_物业"
  776. }
  777. },
  778. {
  779. "term": {
  780. "subtype": "合同"
  781. }
  782. },
  783. {
  784. "range": {
  785. "publishtime": {
  786. "lte": ` + fmt.Sprint(publishtime) + `
  787. }
  788. }
  789. }
  790. ]
  791. }
  792. },
  793. "sort": {
  794. "publishtime": "asc"
  795. },
  796. "_source": [
  797. "s_winner"
  798. ],
  799. "size": 10000
  800. }`
  801. data := Es.Get("bidding", "bidding", sql)
  802. if data != nil && len(*data) > 0 {
  803. count := 0
  804. first := util.ObjToString((*data)[0]["s_winner"])
  805. for k, v := range *data {
  806. winner := util.ObjToString(v["s_winner"])
  807. if k > 0 && first != winner {
  808. first = winner
  809. count++
  810. }
  811. }
  812. changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
  813. changehands, _ := strconv.ParseFloat(changehand, 64)
  814. wuye["changehand"] = changehands
  815. if changehands > 0.3 {
  816. wuye["changehandindex"] = 61
  817. }
  818. if len(*data) > 1 {
  819. wuye["isfirsthand"] = 0
  820. }
  821. }
  822. }
  823. wuye["period"] = getperiod(compare)
  824. } else if toptype == "招标" || toptype == "采购意向" {
  825. bidamount = util.Float64All(compare["budget"])
  826. }
  827. if tmp["projectinfo"] != nil {
  828. projectInfo := util.ObjToMap(tmp["projectinfo"])
  829. if projectInfo != nil && len(*projectInfo) > 0 {
  830. if (*projectInfo)["attachments"] != nil {
  831. wuye["isfile"] = 63
  832. }
  833. }
  834. }
  835. wuye["scale"] = getBidamountRange(bidamount)
  836. if tmp["property_form"] != nil {
  837. property_form := util.ObjArrToStringArr(tmp["property_form"].([]interface{}))
  838. wuye["property_form"] = getpropertyform(property_form)
  839. }
  840. tagSet["wuye"] = wuye
  841. return tagSet
  842. }
  843. func getBidamountRange(value float64) int {
  844. switch {
  845. case value < 500000:
  846. return 1
  847. case value >= 500000 && value < 1000000:
  848. return 2
  849. case value >= 1000000 && value < 2000000:
  850. return 3
  851. case value >= 2000000 && value < 5000000:
  852. return 4
  853. default:
  854. return 5
  855. }
  856. }
  857. func getpropertyform(value []string) string {
  858. arr := []string{}
  859. categories := map[string]string{
  860. "住宅": "21",
  861. "政府办公楼": "22",
  862. "学校": "23",
  863. "医院": "24",
  864. "产业园区": "25",
  865. "旅游景区": "26",
  866. "交通运输": "27",
  867. "商务办公楼": "28",
  868. "酒店": "29",
  869. }
  870. for _, v := range value {
  871. if categories[v] != "" {
  872. arr = append(arr, categories[v])
  873. }
  874. }
  875. return strings.Join(arr, ",")
  876. }
  877. func getperiod(data map[string]interface{}) int {
  878. res := 16
  879. signaturedate := util.Int64All(data["signaturedate"]) //合同签订日期
  880. expiredate := util.Int64All(data["expiredate"]) //合同截止日期
  881. // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
  882. project_duration := util.IntAll(data["project_duration"]) //工期时长
  883. project_timeunit := util.ObjToString(data["project_timeunit"]) //工期单位
  884. result := float64(0)
  885. if expiredate > 0 && signaturedate > 0 {
  886. result = calculateYearDifference(signaturedate, expiredate)
  887. } else if project_duration > 0 && project_timeunit != "" {
  888. if strings.Contains(project_timeunit, "年") {
  889. if project_duration == 1 {
  890. res = 12
  891. } else if project_duration == 2 {
  892. res = 13
  893. } else if project_duration == 3 {
  894. res = 14
  895. } else if project_duration == 5 {
  896. res = 15
  897. }
  898. return res
  899. } else if strings.Contains(project_timeunit, "月") {
  900. result = float64(project_duration) / 12
  901. } else if strings.Contains(project_timeunit, "周") {
  902. result = float64(project_duration) * 7 / 365
  903. } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
  904. result = float64(project_duration) / 365
  905. }
  906. }
  907. if result == 0 {
  908. res = 16
  909. } else if result < 1 {
  910. res = 11
  911. } else if result >= 1 && result < 2 {
  912. res = 12
  913. } else if result >= 2 && result < 3 {
  914. res = 13
  915. } else if result >= 3 && result < 4 {
  916. res = 14
  917. } else if result >= 5 {
  918. res = 15
  919. }
  920. return res
  921. }
  922. func calculateYearDifference(startTime int64, endTime int64) float64 {
  923. start := time.Unix(startTime, 0)
  924. end := time.Unix(endTime, 0)
  925. duration := end.Sub(start)
  926. years := duration.Hours() / 24 / 365
  927. return years
  928. }
  929. func getStr(b string) string {
  930. if b == "" {
  931. return "其它"
  932. }
  933. a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
  934. a2 := "(卫健委|医疗)"
  935. a3 := "(教育|学校)"
  936. a4 := "(人行|金融业)"
  937. a5 := "(信息技术|电信行业|农林牧渔|建筑业|传媒|制造业|住宿餐饮|采矿业|能源化工|批发零售)"
  938. if strings.Contains(a1, b) {
  939. return "政府机构"
  940. } else if strings.Contains(a2, b) {
  941. return "医疗单位"
  942. } else if strings.Contains(a3, b) {
  943. return "教育单位"
  944. } else if strings.Contains(a4, b) {
  945. return "金融企业"
  946. } else if strings.Contains(a5, b) {
  947. return "商业公司"
  948. } else {
  949. return "其它"
  950. }
  951. return "其它"
  952. }