task.go 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036
  1. package main
  2. import (
  3. "encoding/json"
  4. "field_sync/config"
  5. "field_sync/oss"
  6. "fmt"
  7. "net"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "log"
  15. "go.mongodb.org/mongo-driver/bson"
  16. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  17. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  18. "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
  19. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  20. )
  21. var (
  22. regLetter = regexp.MustCompile("[a-z]*")
  23. cityEndReg = regexp.MustCompile("(区|县|市)$")
  24. )
  25. func biddingTask(data []byte, mapInfo map[string]interface{}) {
  26. defer util.Catch()
  27. stype := util.ObjToString(mapInfo["stype"])
  28. q, _ := mapInfo["query"].(map[string]interface{})
  29. bkey, _ := mapInfo["bkey"].(string)
  30. if q == nil {
  31. q = map[string]interface{}{
  32. "_id": map[string]interface{}{
  33. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  34. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  35. },
  36. }
  37. }
  38. //extract库
  39. // extractConn := MgoE.GetMgoConn()
  40. // defer MgoE.DestoryMongoConn(extractConn)
  41. // extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  42. // "field_source": 0,
  43. // "kvtext": 0,
  44. // }).Sort("_id").Iter()
  45. // eMap := map[string]map[string]interface{}{}
  46. // extCount, repeatCount := 0, 0
  47. // for tmp := make(map[string]interface{}); extractResult.Next(tmp); extCount++ {
  48. // if util.IntAll(tmp["repeat"]) == 1 {
  49. // repeatCount++
  50. // }
  51. // tid := mongodb.BsonIdToSId(tmp["_id"])
  52. // eMap[tid] = tmp
  53. // tmp = make(map[string]interface{})
  54. // }
  55. // log.Println("抽取表 数据量", extCount, "重复数据量", repeatCount)
  56. //bidding库
  57. biddingConn := MgoB.GetMgoConn()
  58. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  59. log.Println("bidding表 同步总数:", count)
  60. c := 0
  61. ygsiteArr := []map[string]interface{}{}
  62. sitedata, _ := MgoE.Find("bidding_yg_site", map[string]interface{}{}, nil, nil, false, -1, -1)
  63. if sitedata != nil && len(*sitedata) > 0 {
  64. ygsiteArr = *sitedata
  65. }
  66. // if count < 500000 {
  67. // var res []map[string]interface{}
  68. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  69. "contenthtml": 0,
  70. }).Iter()
  71. for tmp := make(map[string]interface{}); result.Next(tmp); {
  72. // res = append(res, tmp)
  73. ec := doIndex(tmp, bkey, stype, ygsiteArr)
  74. if ec > 0 {
  75. c++
  76. }
  77. tmp = make(map[string]interface{})
  78. }
  79. MgoB.DestoryMongoConn(biddingConn)
  80. // log.Println("查询结果 bidding", count, "抽取:", extCount)
  81. // c = doIndex(res, bkey, stype)
  82. // } else {
  83. // log.Println("查询结果 数据量太大,放弃", count)
  84. // MgoB.DestoryMongoConn(biddingConn)
  85. // }
  86. log.Println("bidding sync...over all", count, "extract sync ", c)
  87. NextNode(mapInfo, stype)
  88. // NextNodePro(mapInfo, stype)
  89. NextNodeTidb(mapInfo, stype)
  90. if stype == "bidding_history" {
  91. NextNodeBidData(mapInfo) // bidding-data数据
  92. NextNodeTidbQyxy(mapInfo) // tidb-企业数据
  93. NextNodeHn(mapInfo)
  94. }
  95. if stype == "bidding" {
  96. uq := map[string]interface{}{
  97. "gtid": map[string]interface{}{
  98. "$gte": util.ObjToString(mapInfo["gtid"]),
  99. },
  100. "lteid": map[string]interface{}{
  101. "$lte": util.ObjToString(mapInfo["lteid"]),
  102. },
  103. }
  104. MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 8, "updatetime": time.Now().Unix()}}, false, true)
  105. }
  106. //领域标签处理的数据 id段
  107. if stype == "bidding_history" {
  108. MgoB.Save("field_data_record", map[string]interface{}{"gtid": mapInfo["gtid"], "lteid": mapInfo["lteid"], "status": 0})
  109. }
  110. }
  111. func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
  112. defer util.Catch()
  113. q, _ := mapInfo["query"].(map[string]interface{})
  114. if q == nil {
  115. q = map[string]interface{}{
  116. "_id": map[string]interface{}{
  117. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  118. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  119. },
  120. }
  121. }
  122. //extract库
  123. extractConn := MgoE.GetMgoConn()
  124. defer MgoE.DestoryMongoConn(extractConn)
  125. extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  126. "field_source": 0,
  127. "kvtext": 0,
  128. }).Sort("-_id").Iter()
  129. //bidding库
  130. biddingConn := MgoB.GetMgoConn()
  131. defer MgoB.DestoryMongoConn(biddingConn)
  132. count := 0
  133. var compare map[string]interface{}
  134. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  135. "contenthtml": 0,
  136. "field_source": 0,
  137. }).Sort("-_id").Iter()
  138. for tmp := make(map[string]interface{}); result.Next(tmp); count++ {
  139. update := map[string]interface{}{}
  140. del := map[string]interface{}{} //记录extract没有值而bidding中有值的字段
  141. //对比方法----------------
  142. for {
  143. if compare == nil {
  144. compare = make(map[string]interface{})
  145. if !extractResult.Next(compare) {
  146. break
  147. }
  148. }
  149. if compare != nil {
  150. cid := mongodb.BsonIdToSId(compare["_id"])
  151. tid := mongodb.BsonIdToSId(tmp["_id"])
  152. if cid == tid {
  153. //更新bidding表;bidding表modifyinfo中的字段不更新
  154. modifyinfo := make(map[string]bool)
  155. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil {
  156. for k := range tmpmodifyinfo {
  157. modifyinfo[k] = true
  158. }
  159. }
  160. fmap := map[string]bool{
  161. "attach_text": true,
  162. "purchasing": true,
  163. "purchasinglist": true,
  164. "purchasingsource": true,
  165. "review_experts": true,
  166. }
  167. for _, k := range config.Conf.Serve.FieldS {
  168. v1 := compare[k] //extract
  169. v2 := tmp[k] //bidding
  170. if v2 == nil && v1 != nil {
  171. update[k] = v1
  172. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  173. update[k] = v1
  174. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  175. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  176. continue
  177. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  178. continue
  179. }
  180. if fmap[k] {
  181. continue
  182. }
  183. del[k] = 1
  184. //util.Debug("抽取结果没有值,bidding有值:field--", k, del)
  185. }
  186. }
  187. if util.IntAll(compare["repeat"]) == 1 {
  188. update["extracttype"] = -1
  189. update["dataprocess"] = 7
  190. if compare["repeat_id"] != nil {
  191. update["repeat_id"] = compare["repeat_id"]
  192. }
  193. } else {
  194. update["extracttype"] = 1
  195. update["dataprocess"] = 8
  196. }
  197. break
  198. } else {
  199. if cid < tid {
  200. compare = nil
  201. continue
  202. } else {
  203. break
  204. }
  205. }
  206. } else {
  207. break
  208. }
  209. }
  210. //------------------对比结束
  211. //处理分类
  212. if compare != nil { //extract
  213. fieldFun(compare, update)
  214. compare = nil
  215. }
  216. // entidlist
  217. extractMap := make(map[string]interface{})
  218. if update["s_winner"] != "" {
  219. cid := companyFun(update)
  220. if len(cid) > 0 {
  221. update["entidlist"] = cid
  222. extractMap["entidlist"] = cid
  223. }
  224. }
  225. if len(extractMap) > 0 {
  226. updateExtPool <- []map[string]interface{}{
  227. {"_id": tmp["_id"]},
  228. {"$set": extractMap},
  229. }
  230. }
  231. // 附件有效字段
  232. if i := validFile(tmp); i != 0 {
  233. if i == -1 {
  234. update["isValidFile"] = false
  235. } else {
  236. update["isValidFile"] = true
  237. }
  238. }
  239. if len(update) > 0 {
  240. if len(del) > 0 { //删除的字段
  241. updateBidPool <- []map[string]interface{}{{
  242. "_id": tmp["_id"],
  243. },
  244. {"$set": update, "$unset": del},
  245. }
  246. } else {
  247. updateBidPool <- []map[string]interface{}{{
  248. "_id": tmp["_id"],
  249. },
  250. {"$set": update},
  251. }
  252. }
  253. }
  254. if count%50000 == 0 {
  255. log.Println("biddingTask current", count)
  256. }
  257. tmp = make(map[string]interface{})
  258. }
  259. log.Println("biddingAll sync...over all", count)
  260. }
  261. func doIndex(tmp map[string]interface{}, bkey, stype string, ygsiteArr []map[string]interface{}) int {
  262. syncNo := 0 //抽取表数据同步数量
  263. //对比两张表数据,减少查询次数
  264. var compare map[string]interface{}
  265. var bidUpdate = map[string]interface{}{}
  266. var extUpdate = map[string]interface{}{}
  267. //SaveEsLock := &sync.Mutex{}
  268. // log.Println("start ...")
  269. // for n, tmp := range infos {
  270. tid := mongodb.BsonIdToSId(tmp["_id"])
  271. update := map[string]interface{}{} //要更新的mongo数据
  272. del := map[string]interface{}{}
  273. edata, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, tid, nil)
  274. //对比方法----------------
  275. if edata != nil && len(*edata) > 0 {
  276. compare = *edata
  277. if stype == "bidding" {
  278. // 增量id段 正常数据
  279. if dg := util.IntAll(compare["dataging"]); dg == 1 { //extract中dataging=1跳过
  280. tmp = make(map[string]interface{})
  281. compare = nil
  282. return 0
  283. }
  284. // delete(eMap, tid)
  285. }
  286. if stype == "bidding_history" {
  287. //增量id段 历史数据
  288. if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
  289. tmp = make(map[string]interface{})
  290. compare = nil
  291. return 0
  292. }
  293. // delete(eMap, tid)
  294. }
  295. syncNo++
  296. log.Println("抽取区域 省", compare["area"], " 市 ", compare["city"], " 区 ", compare["district"], " id ", tid)
  297. modifyinfo := make(map[string]bool)
  298. if tmp["modifyinfo"] != nil {
  299. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok {
  300. for k := range tmpmodifyinfo {
  301. modifyinfo[k] = true
  302. }
  303. }
  304. }
  305. fmap := map[string]bool{
  306. "attach_text": true,
  307. "purchasing": true,
  308. "purchasinglist": true,
  309. "purchasingsource": true,
  310. "review_experts": true,
  311. }
  312. for _, k := range config.Conf.Serve.FieldS {
  313. v1 := compare[k] //extract
  314. v2 := tmp[k] //bidding
  315. if v2 == nil && v1 != nil {
  316. update[k] = v1
  317. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  318. update[k] = v1
  319. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  320. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  321. continue
  322. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  323. continue
  324. } else if k == "city" || k == "district" {
  325. update[k] = ""
  326. } else if fmap[k] {
  327. continue
  328. } else {
  329. del[k] = 1
  330. }
  331. }
  332. }
  333. // 附件重采,数据同步时不更新判重标识
  334. if util.IntAll(compare["repeat"]) == 1 {
  335. update["extracttype"] = -1
  336. update["dataprocess"] = 7
  337. if compare["repeat_id"] != nil {
  338. update["repeat_id"] = compare["repeat_id"]
  339. }
  340. } else {
  341. update["extracttype"] = 1
  342. update["dataprocess"] = 8
  343. }
  344. } else {
  345. compare = nil
  346. if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
  347. update["dataging"] = 0
  348. }
  349. update["dataprocess"] = 8
  350. }
  351. //下面可以多线程跑的--->
  352. //处理分类
  353. if compare != nil { //extract
  354. fieldFun(compare, update)
  355. // publishtime 20230523
  356. if util.IntAll(tmp["publishtime"]) == -1 {
  357. if pb := methodPb(compare); pb > 0 {
  358. update["publishtime"] = pb
  359. }
  360. }
  361. compare = nil
  362. }
  363. //------------------对比结束
  364. //处理key descript
  365. if bkey == "" {
  366. DealInfo(&tmp, &update)
  367. }
  368. // entidlist
  369. extractMap := make(map[string]interface{})
  370. if update["s_winner"] != "" {
  371. cid := companyFun(update)
  372. if len(cid) > 0 {
  373. tmp["entidlist"] = cid
  374. update["entidlist"] = cid
  375. extractMap["entidlist"] = cid
  376. }
  377. }
  378. //阳光采购
  379. spidercode := util.ObjToString(tmp["spidercode"])
  380. site := util.ObjToString(tmp["site"])
  381. infoAttribute := util.ObjToString(tmp["infoattribute"])
  382. if infoAttribute == "zc_cgxx" {
  383. update["public_type"] = "用户发布"
  384. update["extracttype"] = -1
  385. update["dataprocess"] = 7
  386. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "user"})
  387. log.Println("阳光采购用户发布", tid)
  388. } else {
  389. if util.IntAll(compare["repeat"]) != 1 {
  390. for _, v := range ygsiteArr {
  391. spidercodes := util.ObjToString(v["spidercode"])
  392. sites := util.ObjToString(v["site"])
  393. datatype := util.ObjToString(v["datatype"])
  394. if datatype == "1" && spidercodes == spidercode {
  395. update["infoattribute"] = "zc_cgxx"
  396. update["public_type"] = "平台发布"
  397. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "spidercode"})
  398. log.Println("阳光采购平台发布爬虫", tid)
  399. } else if datatype == "2" && site == sites {
  400. update["infoattribute"] = "zc_cgxx"
  401. update["public_type"] = "平台发布"
  402. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "site"})
  403. log.Println("阳光采购平台发布站点", tid)
  404. } else if datatype == "3" && spidercodes == spidercode {
  405. update["infoattribute"] = "zc_cgxx"
  406. update["public_type"] = "平台发布"
  407. update["is_yg_new"] = 1
  408. update["extracttype"] = -1
  409. update["dataprocess"] = 7
  410. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "is_yg_new"})
  411. log.Println("阳光采购平台新采集爬虫", tid)
  412. } else if datatype == "4" && site == sites {
  413. update["extracttype"] = -1
  414. update["dataprocess"] = 7
  415. MgoE.Save("bidding_private", map[string]interface{}{"id": tid, "source": site})
  416. log.Println("私有采集爬虫", tid)
  417. }
  418. }
  419. }
  420. }
  421. // 6.10 剑鱼发布信息分类处理, 写在这里是为了修改抽取表
  422. typeFunc(tmp, update, extractMap)
  423. if len(extractMap) > 0 {
  424. if extractMap["toptype"] != nil && extractMap["subtype"] == nil {
  425. // extUpdate = append(extUpdate, []map[string]interface{}{
  426. // {"_id": tmp["_id"]},
  427. // {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
  428. // })
  429. extUpdate = map[string]interface{}{"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}}
  430. } else {
  431. extUpdate = map[string]interface{}{"$set": extractMap}
  432. }
  433. // if len(extUpdate) >= MgoBulkSize {
  434. // tmps := extUpdate
  435. // MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  436. // extUpdate = [][]map[string]interface{}{}
  437. // }
  438. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, tid, extUpdate)
  439. }
  440. // 附件有效字段
  441. if i := validFile(tmp); i != 0 {
  442. if i == -1 {
  443. tmp["isValidFile"] = false
  444. update["isValidFile"] = false
  445. } else {
  446. tmp["isValidFile"] = true
  447. update["isValidFile"] = true
  448. }
  449. }
  450. // 2024-02-21 徐志恒 情报标签字段
  451. toptype := util.ObjToString(tmp["toptype"])
  452. subtype := util.ObjToString(tmp["subtype"])
  453. buyerclass := util.ObjToString(update["buyerclass"])
  454. if buyerclass != "" {
  455. update["buyer_type"] = getStr(buyerclass)
  456. }
  457. s_topscopeclass := util.ObjToString(update["s_topscopeclass"])
  458. if (tmp["tag_topinformation"] != nil && (subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "采购意向" || toptype == "招标")) || (tmp["tag_topinformation"] == nil && toptype == "拟建" && strings.Contains(s_topscopeclass, "建筑工程")) {
  459. update["tag_set"] = getTagSet(tmp, compare)
  460. }
  461. if toptype == "拟建" {
  462. if update["owner"] != nil {
  463. update["buyer"] = update["owner"]
  464. }
  465. }
  466. if len(update) > 0 {
  467. log.Println("保存bidding区域 省", update["area"], " 市 ", update["city"], " 区 ", update["district"], " buyerclass ", update["buyerclass"], update["buyer_type"], " id ", tid)
  468. if len(del) > 0 {
  469. bidUpdate = map[string]interface{}{"$set": update, "$unset": del}
  470. } else {
  471. bidUpdate = map[string]interface{}{"$set": update}
  472. }
  473. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, tid, bidUpdate)
  474. }
  475. return syncNo
  476. }
  477. // @Description subscopeclass、topscopeclass、package
  478. // 20230523 多包处理 subpackage = 1
  479. // @Author J 2022/6/7 5:54 PM
  480. func fieldFun(compare, update map[string]interface{}) {
  481. subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
  482. if subscopeclass != nil {
  483. m1 := map[string]bool{}
  484. newclass := []string{}
  485. for _, sc := range subscopeclass {
  486. sclass, _ := sc.(string)
  487. if !m1[sclass] {
  488. m1[sclass] = true
  489. newclass = append(newclass, sclass)
  490. }
  491. }
  492. update["s_subscopeclass"] = strings.Join(newclass, ",")
  493. update["subscopeclass"] = newclass
  494. }
  495. topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
  496. if topscopeclass != nil {
  497. m2 := map[string]bool{}
  498. newclass := []string{}
  499. for _, tc := range topscopeclass {
  500. tclass, _ := tc.(string)
  501. tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
  502. if !m2[tclass] {
  503. m2[tclass] = true
  504. newclass = append(newclass, tclass)
  505. }
  506. }
  507. update["topscopeclass"] = topscopeclass
  508. update["s_topscopeclass"] = strings.Join(newclass, ",")
  509. }
  510. if package1 := compare["package"]; package1 != nil {
  511. packageM, _ := package1.(map[string]interface{})
  512. update["package"] = packageM
  513. for _, p := range packageM {
  514. pm, _ := p.(map[string]interface{})
  515. if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
  516. util.Float64All(pm["bidamount"]) > 0 {
  517. update["multipackage"] = 1
  518. break
  519. }
  520. }
  521. } else {
  522. update["multipackage"] = 0
  523. }
  524. // subpackage
  525. if compare["package"] != nil && compare["s_winner"] != nil && compare["bidamount"] != nil {
  526. pg := compare["package"].(map[string]interface{})
  527. if len(pg) > 1 {
  528. var bmt []float64
  529. var swn []string
  530. for _, p := range pg {
  531. p1 := p.(map[string]interface{})
  532. if p1["bidamount"] != nil {
  533. bmt = append(bmt, util.Float64All(p1["bidamount"]))
  534. }
  535. if w := util.ObjToString(p1["winner"]); w != "" {
  536. swn = append(swn)
  537. }
  538. }
  539. if len(bmt) > 1 && len(swn) > 1 {
  540. sn := strings.Split(util.ObjToString(compare["s_winner"]), ",")
  541. sort.Strings(sn)
  542. sort.Strings(swn)
  543. swn1 := util.ObjArrToStringArr(Duplicate(swn)) // 去重
  544. if strings.Join(swn1, ",") == strings.Join(sn, ",") {
  545. bidamount := 0.0
  546. for _, f := range bmt {
  547. bidamount += f
  548. }
  549. if bidamount == util.Float64All(compare["bidamount"]) {
  550. update["subpackage"] = 1
  551. }
  552. }
  553. }
  554. }
  555. }
  556. }
  557. // @Description entidlist
  558. // @Author J 2022/6/7 2:36 PM
  559. func companyFun(tmp map[string]interface{}) (cid []string) {
  560. sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
  561. for _, w := range sWinnerarr {
  562. if w != "" {
  563. id := redis.GetStr("qyxy_id", w)
  564. if id == "" {
  565. ents, _ := MgoQ.Find(config.Conf.DB.MongoQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
  566. if len(*ents) > 0 {
  567. id = util.ObjToString((*ents)[0]["_id"])
  568. redis.PutCKV("qyxy_id", w, id)
  569. } else {
  570. ent, _ := MgoP.FindOne(config.Conf.DB.MongoP.Coll, map[string]interface{}{"history_name": w})
  571. if len(*ent) > 0 {
  572. id = util.ObjToString((*ent)["company_id"])
  573. redis.PutCKV("qyxy_id", w, id)
  574. }
  575. }
  576. }
  577. if id == "" {
  578. id = "-"
  579. }
  580. cid = append(cid, id)
  581. }
  582. }
  583. return cid
  584. }
  585. // @Description update 修改bidding表,extractM修改抽取表
  586. // @Author J 2022/6/10 10:29 AM
  587. func typeFunc(tmp, update, extractM map[string]interface{}) {
  588. if jyData, ok := tmp["jyfb_data"].(map[string]interface{}); ok {
  589. if t := util.ObjToString(jyData["type"]); t != "" {
  590. switch t {
  591. //case "采购信息":
  592. case "招标公告":
  593. if util.ObjToString(tmp["toptype"]) != "招标" {
  594. update["toptype"] = "招标"
  595. extractM["toptype"] = "招标"
  596. delete(update, "subtype")
  597. }
  598. case "采购意向":
  599. if util.ObjToString(tmp["toptype"]) != "采购意向" {
  600. update["toptype"] = "采购意向"
  601. update["subtype"] = "采购意向"
  602. extractM["toptype"] = "采购意向"
  603. extractM["subtype"] = "采购意向"
  604. }
  605. case "招标预告":
  606. if util.ObjToString(tmp["toptype"]) != "预告" {
  607. update["toptype"] = "预告"
  608. extractM["toptype"] = "预告"
  609. delete(update, "subtype")
  610. }
  611. case "招标结果":
  612. if util.ObjToString(tmp["toptype"]) != "结果" {
  613. update["toptype"] = "结果"
  614. extractM["toptype"] = "结果"
  615. delete(update, "subtype")
  616. }
  617. }
  618. }
  619. }
  620. }
  621. // @Description 附件有效字段(isValidFile)
  622. // @Author J 2022/7/8 14:41
  623. func validFile(tmp map[string]interface{}) int {
  624. isContinue := false
  625. if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
  626. if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
  627. for _, att := range atts {
  628. if att == nil {
  629. continue
  630. }
  631. if reflect.TypeOf(att).String() == "string" {
  632. continue
  633. }
  634. att1 := att.(map[string]interface{})
  635. if fid := util.ObjToString(att1["fid"]); fid != "" {
  636. isContinue = true
  637. break
  638. }
  639. }
  640. if isContinue {
  641. if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
  642. if len(attachTxt) > 0 {
  643. for _, at := range attachTxt {
  644. at1 := at.(map[string]interface{})
  645. if len(at1) > 0 {
  646. for k, _ := range at1 {
  647. if reflect.TypeOf(at1[k]).String() == "string" {
  648. continue
  649. }
  650. at2 := at1[k].(map[string]interface{})
  651. s := strings.ToLower(util.ObjToString(at2["file_name"]))
  652. if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
  653. strings.Contains(s, "pdf") {
  654. if strings.Contains(s, "swf") || strings.Contains(s, "html") {
  655. return -1
  656. } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
  657. return 1
  658. }
  659. }
  660. }
  661. break
  662. } else {
  663. break
  664. }
  665. }
  666. }
  667. }
  668. flag := false
  669. for _, att := range atts {
  670. if att == nil {
  671. continue
  672. }
  673. if reflect.TypeOf(att).String() == "string" {
  674. continue
  675. }
  676. att1 := att.(map[string]interface{})
  677. if fid := util.ObjToString(att1["fid"]); fid != "" {
  678. ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
  679. if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
  680. return 1
  681. } else {
  682. flag = true
  683. }
  684. }
  685. }
  686. if flag {
  687. return -1
  688. }
  689. }
  690. }
  691. }
  692. return 0
  693. }
  694. // @Description id不变,内容变化 重新索引数据
  695. // @Author J 2022/8/10 13:29
  696. func taskinfo(id string) {
  697. tmp, _ := MgoB.FindById("bidding", id, nil)
  698. if tmp == nil || len(*tmp) == 0 {
  699. log.Println(fmt.Sprintf("taskinfo bidding id=%s 未查询到数据", id))
  700. return
  701. }
  702. extractM, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, id, nil)
  703. if extractM == nil || len(*extractM) == 0 {
  704. extractM, _ = MgoE.FindById(config.Conf.DB.MongoE.Coll1, id, nil)
  705. if extractM == nil || len(*extractM) == 0 {
  706. log.Println(fmt.Sprintf("taskinfo extract id=%s 未查询到数据", id))
  707. return
  708. }
  709. }
  710. update := map[string]interface{}{} //要更新的mongo数据
  711. //更新bidding表字段
  712. for _, k := range config.Conf.Serve.FieldS {
  713. v1 := (*extractM)[k] //extract
  714. v2 := (*tmp)[k] //bidding
  715. if v2 == nil && v1 != nil {
  716. update[k] = v1
  717. } else if v2 != nil && v1 != nil {
  718. update[k] = v1
  719. } else if v2 != nil && v1 == nil {
  720. if k == "city" || k == "district" {
  721. update[k] = ""
  722. }
  723. }
  724. }
  725. if util.IntAll((*extractM)["repeat"]) == 1 {
  726. update["extracttype"] = -1
  727. update["dataprocess"] = 7
  728. if (*extractM)["repeat_id"] != nil {
  729. update["repeat_id"] = (*extractM)["repeat_id"]
  730. }
  731. } else {
  732. update["extracttype"] = 1
  733. update["dataprocess"] = 8
  734. }
  735. //处理分类
  736. fieldFun(*extractM, update)
  737. extractMap := make(map[string]interface{})
  738. if util.ObjToString((*tmp)["s_winner"]) != "" {
  739. cid := companyFun(*tmp)
  740. if len(cid) > 0 {
  741. update["entidlist"] = cid
  742. extractMap["entidlist"] = cid
  743. }
  744. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
  745. }
  746. // 附件有效字段
  747. if i := validFile(*tmp); i != 0 {
  748. if i == -1 {
  749. update["isValidFile"] = false
  750. } else {
  751. update["isValidFile"] = true
  752. }
  753. }
  754. if len(update) > 0 {
  755. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
  756. }
  757. mapinfo := map[string]interface{}{
  758. "infoid": id,
  759. "stype": "index-by-id",
  760. }
  761. datas, _ := json.Marshal(mapinfo)
  762. var next = &net.UDPAddr{
  763. IP: net.ParseIP(config.Conf.Udp.Next.Addr),
  764. Port: util.IntAll(config.Conf.Udp.Next.Port),
  765. }
  766. log.Println("nsq data over es ", next, " mapinfo ", string(datas))
  767. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, next)
  768. }
  769. var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
  770. // @Description 发布时间处理
  771. // @Author J 2023/5/23 14:32
  772. func methodPb(tmp map[string]interface{}) int64 {
  773. if tmp["ext_publishtime"] != nil {
  774. if newPb := util.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
  775. return newPb
  776. }
  777. }
  778. for _, d := range DateTimeSelect {
  779. if tmp[d] != nil && util.Int64All(tmp[d]) < time.Now().Unix() {
  780. return util.Int64All(tmp[d])
  781. }
  782. }
  783. return 0
  784. }
  785. // Duplicate
  786. // @Description 去重
  787. // @Author J 2023/5/24 09:53
  788. func Duplicate(a interface{}) (ret []interface{}) {
  789. va := reflect.ValueOf(a)
  790. for i := 0; i < va.Len(); i++ {
  791. if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
  792. continue
  793. }
  794. ret = append(ret, va.Index(i).Interface())
  795. }
  796. return ret
  797. }
  798. // @Description 获取情报标签
  799. // @Author 徐志恒 2024/2/21 09:53
  800. func getTagSet(tmp, compare map[string]interface{}) map[string]map[string]interface{} {
  801. tagSet := map[string]map[string]interface{}{}
  802. wuye := map[string]interface{}{}
  803. buyer := util.ObjToString(compare["buyer"])
  804. publishtime := util.Int64All(tmp["publishtime"])
  805. bidamount := util.Float64All(compare["bidamount"])
  806. toptype := util.ObjToString(tmp["toptype"])
  807. subtype := util.ObjToString(tmp["subtype"])
  808. if subtype == "合同" {
  809. wuye["isfirsthand"] = 62
  810. if buyer != "" {
  811. sql := `{
  812. "query": {
  813. "bool": {
  814. "must": [
  815. {
  816. "term": {
  817. "buyer": "` + buyer + `"
  818. }
  819. },
  820. {
  821. "term": {
  822. "tag_topinformation": "情报_物业"
  823. }
  824. },
  825. {
  826. "term": {
  827. "subtype": "合同"
  828. }
  829. },
  830. {
  831. "range": {
  832. "publishtime": {
  833. "lte": ` + fmt.Sprint(publishtime) + `
  834. }
  835. }
  836. }
  837. ]
  838. }
  839. },
  840. "sort": {
  841. "publishtime": "asc"
  842. },
  843. "_source": [
  844. "s_winner"
  845. ],
  846. "size": 10000
  847. }`
  848. data := Es.Get("bidding", "bidding", sql)
  849. if data != nil && len(*data) > 0 {
  850. count := 0
  851. first := util.ObjToString((*data)[0]["s_winner"])
  852. for k, v := range *data {
  853. winner := util.ObjToString(v["s_winner"])
  854. if k > 0 && first != winner {
  855. first = winner
  856. count++
  857. }
  858. }
  859. changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
  860. changehands, _ := strconv.ParseFloat(changehand, 64)
  861. wuye["changehand"] = changehands
  862. if changehands > 0.3 {
  863. wuye["changehandindex"] = 61
  864. }
  865. if len(*data) > 1 {
  866. wuye["isfirsthand"] = 0
  867. }
  868. }
  869. }
  870. wuye["period"] = getperiod(compare)
  871. } else if toptype == "招标" || toptype == "采购意向" {
  872. bidamount = util.Float64All(compare["budget"])
  873. }
  874. if tmp["projectinfo"] != nil {
  875. projectInfo := util.ObjToMap(tmp["projectinfo"])
  876. if projectInfo != nil && len(*projectInfo) > 0 {
  877. if (*projectInfo)["attachments"] != nil {
  878. wuye["isfile"] = 63
  879. }
  880. }
  881. }
  882. wuye["scale"] = getBidamountRange(bidamount)
  883. if tmp["property_form"] != nil {
  884. property_form := util.ObjArrToStringArr(tmp["property_form"].([]interface{}))
  885. wuye["property_form"] = getpropertyform(property_form)
  886. }
  887. tagSet["wuye"] = wuye
  888. return tagSet
  889. }
  890. func getBidamountRange(value float64) int {
  891. switch {
  892. case value < 500000:
  893. return 1
  894. case value >= 500000 && value < 1000000:
  895. return 2
  896. case value >= 1000000 && value < 2000000:
  897. return 3
  898. case value >= 2000000 && value < 5000000:
  899. return 4
  900. default:
  901. return 5
  902. }
  903. }
  904. func getpropertyform(value []string) string {
  905. arr := []string{}
  906. categories := map[string]string{
  907. "住宅": "21",
  908. "政府办公楼": "22",
  909. "学校": "23",
  910. "医院": "24",
  911. "产业园区": "25",
  912. "旅游景区": "26",
  913. "交通运输": "27",
  914. "商务办公楼": "28",
  915. "酒店": "29",
  916. }
  917. for _, v := range value {
  918. if categories[v] != "" {
  919. arr = append(arr, categories[v])
  920. }
  921. }
  922. return strings.Join(arr, ",")
  923. }
  924. func getperiod(data map[string]interface{}) int {
  925. res := 16
  926. signaturedate := util.Int64All(data["signaturedate"]) //合同签订日期
  927. expiredate := util.Int64All(data["expiredate"]) //合同截止日期
  928. // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
  929. project_duration := util.IntAll(data["project_duration"]) //工期时长
  930. project_timeunit := util.ObjToString(data["project_timeunit"]) //工期单位
  931. result := float64(0)
  932. if expiredate > 0 && signaturedate > 0 {
  933. result = calculateYearDifference(signaturedate, expiredate)
  934. } else if project_duration > 0 && project_timeunit != "" {
  935. if strings.Contains(project_timeunit, "年") {
  936. if project_duration == 1 {
  937. res = 12
  938. } else if project_duration == 2 {
  939. res = 13
  940. } else if project_duration == 3 {
  941. res = 14
  942. } else if project_duration == 5 {
  943. res = 15
  944. }
  945. return res
  946. } else if strings.Contains(project_timeunit, "月") {
  947. result = float64(project_duration) / 12
  948. } else if strings.Contains(project_timeunit, "周") {
  949. result = float64(project_duration) * 7 / 365
  950. } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
  951. result = float64(project_duration) / 365
  952. }
  953. }
  954. if result == 0 {
  955. res = 16
  956. } else if result < 1 {
  957. res = 11
  958. } else if result >= 1 && result < 2 {
  959. res = 12
  960. } else if result >= 2 && result < 3 {
  961. res = 13
  962. } else if result >= 3 && result < 4 {
  963. res = 14
  964. } else if result >= 5 {
  965. res = 15
  966. }
  967. return res
  968. }
  969. func calculateYearDifference(startTime int64, endTime int64) float64 {
  970. start := time.Unix(startTime, 0)
  971. end := time.Unix(endTime, 0)
  972. duration := end.Sub(start)
  973. years := duration.Hours() / 24 / 365
  974. return years
  975. }
  976. func getStr(b string) string {
  977. if b == "" {
  978. return "其它"
  979. }
  980. a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
  981. a2 := "(卫健委|医疗)"
  982. a3 := "(教育|学校)"
  983. a4 := "(人行|金融业)"
  984. a5 := "(信息技术|电信行业|农林牧渔|建筑业|传媒|制造业|住宿餐饮|采矿业|能源化工|批发零售)"
  985. if strings.Contains(a1, b) {
  986. return "政府机构"
  987. } else if strings.Contains(a2, b) {
  988. return "医疗单位"
  989. } else if strings.Contains(a3, b) {
  990. return "教育单位"
  991. } else if strings.Contains(a4, b) {
  992. return "金融企业"
  993. } else if strings.Contains(a5, b) {
  994. return "商业公司"
  995. } else {
  996. return "其它"
  997. }
  998. return "其它"
  999. }