task.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. package main
  2. import (
  3. "encoding/json"
  4. "field_sync/config"
  5. "field_sync/oss"
  6. "fmt"
  7. "net"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "log"
  15. "go.mongodb.org/mongo-driver/bson"
  16. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  17. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  18. "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
  19. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  20. )
  21. var (
  22. regLetter = regexp.MustCompile("[a-z]*")
  23. cityEndReg = regexp.MustCompile("(区|县|市)$")
  24. )
  25. func biddingTask(data []byte, mapInfo map[string]interface{}) {
  26. defer util.Catch()
  27. stype := util.ObjToString(mapInfo["stype"])
  28. q, _ := mapInfo["query"].(map[string]interface{})
  29. bkey, _ := mapInfo["bkey"].(string)
  30. if q == nil {
  31. q = map[string]interface{}{
  32. "_id": map[string]interface{}{
  33. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  34. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  35. },
  36. }
  37. }
  38. //extract库
  39. // extractConn := MgoE.GetMgoConn()
  40. // defer MgoE.DestoryMongoConn(extractConn)
  41. // extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  42. // "field_source": 0,
  43. // "kvtext": 0,
  44. // }).Sort("_id").Iter()
  45. // eMap := map[string]map[string]interface{}{}
  46. // extCount, repeatCount := 0, 0
  47. // for tmp := make(map[string]interface{}); extractResult.Next(tmp); extCount++ {
  48. // if util.IntAll(tmp["repeat"]) == 1 {
  49. // repeatCount++
  50. // }
  51. // tid := mongodb.BsonIdToSId(tmp["_id"])
  52. // eMap[tid] = tmp
  53. // tmp = make(map[string]interface{})
  54. // }
  55. // log.Println("抽取表 数据量", extCount, "重复数据量", repeatCount)
  56. //bidding库
  57. biddingConn := MgoB.GetMgoConn()
  58. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  59. log.Println("bidding表 同步总数:", count)
  60. c := 0
  61. ygsiteArr := []map[string]interface{}{}
  62. sitedata, _ := MgoE.Find("bidding_yg_site", map[string]interface{}{}, nil, nil, false, -1, -1)
  63. if sitedata != nil && len(*sitedata) > 0 {
  64. ygsiteArr = *sitedata
  65. }
  66. // if count < 500000 {
  67. // var res []map[string]interface{}
  68. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  69. "contenthtml": 0,
  70. }).Iter()
  71. for tmp := make(map[string]interface{}); result.Next(tmp); {
  72. // res = append(res, tmp)
  73. ec := doIndex(tmp, bkey, stype, ygsiteArr)
  74. if ec > 0 {
  75. c++
  76. }
  77. tmp = make(map[string]interface{})
  78. }
  79. MgoB.DestoryMongoConn(biddingConn)
  80. // log.Println("查询结果 bidding", count, "抽取:", extCount)
  81. // c = doIndex(res, bkey, stype)
  82. // } else {
  83. // log.Println("查询结果 数据量太大,放弃", count)
  84. // MgoB.DestoryMongoConn(biddingConn)
  85. // }
  86. log.Println("bidding sync...over all", count, "extract sync ", c)
  87. NextNode(mapInfo, stype)
  88. // NextNodePro(mapInfo, stype)
  89. NextNodeTidb(mapInfo, stype)
  90. if stype == "bidding_history" {
  91. NextNodeBidData(mapInfo) // bidding-data数据
  92. NextNodeTidbQyxy(mapInfo) // tidb-企业数据
  93. NextNodeHn(mapInfo)
  94. }
  95. if stype == "bidding" {
  96. uq := map[string]interface{}{
  97. "gtid": map[string]interface{}{
  98. "$gte": util.ObjToString(mapInfo["gtid"]),
  99. },
  100. "lteid": map[string]interface{}{
  101. "$lte": util.ObjToString(mapInfo["lteid"]),
  102. },
  103. }
  104. MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 7, "updatetime": time.Now().Unix()}}, false, true)
  105. }
  106. //领域标签处理的数据 id段
  107. if stype == "bidding_history" {
  108. MgoB.Save("field_data_record", map[string]interface{}{"gtid": mapInfo["gtid"], "lteid": mapInfo["lteid"], "status": 0})
  109. }
  110. }
  111. func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
  112. defer util.Catch()
  113. q, _ := mapInfo["query"].(map[string]interface{})
  114. if q == nil {
  115. q = map[string]interface{}{
  116. "_id": map[string]interface{}{
  117. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  118. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  119. },
  120. }
  121. }
  122. //extract库
  123. extractConn := MgoE.GetMgoConn()
  124. defer MgoE.DestoryMongoConn(extractConn)
  125. extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  126. "field_source": 0,
  127. "kvtext": 0,
  128. }).Sort("-_id").Iter()
  129. //bidding库
  130. biddingConn := MgoB.GetMgoConn()
  131. defer MgoB.DestoryMongoConn(biddingConn)
  132. count := 0
  133. var compare map[string]interface{}
  134. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  135. "contenthtml": 0,
  136. "field_source": 0,
  137. }).Sort("-_id").Iter()
  138. for tmp := make(map[string]interface{}); result.Next(tmp); count++ {
  139. update := map[string]interface{}{}
  140. del := map[string]interface{}{} //记录extract没有值而bidding中有值的字段
  141. //对比方法----------------
  142. for {
  143. if compare == nil {
  144. compare = make(map[string]interface{})
  145. if !extractResult.Next(compare) {
  146. break
  147. }
  148. }
  149. if compare != nil {
  150. cid := mongodb.BsonIdToSId(compare["_id"])
  151. tid := mongodb.BsonIdToSId(tmp["_id"])
  152. if cid == tid {
  153. //更新bidding表;bidding表modifyinfo中的字段不更新
  154. modifyinfo := make(map[string]bool)
  155. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil {
  156. for k := range tmpmodifyinfo {
  157. modifyinfo[k] = true
  158. }
  159. }
  160. fmap := map[string]bool{
  161. "attach_text": true,
  162. "purchasing": true,
  163. "purchasinglist": true,
  164. "purchasingsource": true,
  165. "review_experts": true,
  166. }
  167. for _, k := range config.Conf.Serve.FieldS {
  168. v1 := compare[k] //extract
  169. v2 := tmp[k] //bidding
  170. if v2 == nil && v1 != nil {
  171. update[k] = v1
  172. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  173. update[k] = v1
  174. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  175. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  176. continue
  177. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  178. continue
  179. }
  180. if fmap[k] {
  181. continue
  182. }
  183. del[k] = 1
  184. //util.Debug("抽取结果没有值,bidding有值:field--", k, del)
  185. }
  186. }
  187. if util.IntAll(compare["repeat"]) == 1 {
  188. update["extracttype"] = -1
  189. update["dataprocess"] = 7
  190. if compare["repeat_id"] != nil {
  191. update["repeat_id"] = compare["repeat_id"]
  192. }
  193. } else {
  194. update["extracttype"] = 1
  195. update["dataprocess"] = 8
  196. }
  197. break
  198. } else {
  199. if cid < tid {
  200. compare = nil
  201. continue
  202. } else {
  203. break
  204. }
  205. }
  206. } else {
  207. break
  208. }
  209. }
  210. //------------------对比结束
  211. //处理分类
  212. if compare != nil { //extract
  213. fieldFun(compare, update)
  214. compare = nil
  215. }
  216. // entidlist
  217. extractMap := make(map[string]interface{})
  218. if update["s_winner"] != "" {
  219. cid := companyFun(update)
  220. if len(cid) > 0 {
  221. update["entidlist"] = cid
  222. extractMap["entidlist"] = cid
  223. }
  224. }
  225. if len(extractMap) > 0 {
  226. updateExtPool <- []map[string]interface{}{
  227. {"_id": tmp["_id"]},
  228. {"$set": extractMap},
  229. }
  230. }
  231. // 附件有效字段
  232. if i := validFile(tmp); i != 0 {
  233. if i == -1 {
  234. update["isValidFile"] = false
  235. } else {
  236. update["isValidFile"] = true
  237. }
  238. }
  239. if len(update) > 0 {
  240. if len(del) > 0 { //删除的字段
  241. updateBidPool <- []map[string]interface{}{{
  242. "_id": tmp["_id"],
  243. },
  244. {"$set": update, "$unset": del},
  245. }
  246. } else {
  247. updateBidPool <- []map[string]interface{}{{
  248. "_id": tmp["_id"],
  249. },
  250. {"$set": update},
  251. }
  252. }
  253. }
  254. if count%50000 == 0 {
  255. log.Println("biddingTask current", count)
  256. }
  257. tmp = make(map[string]interface{})
  258. }
  259. log.Println("biddingAll sync...over all", count)
  260. }
  261. func doIndex(tmp map[string]interface{}, bkey, stype string, ygsiteArr []map[string]interface{}) int {
  262. syncNo := 0 //抽取表数据同步数量
  263. //对比两张表数据,减少查询次数
  264. var compare map[string]interface{}
  265. var bidUpdate = map[string]interface{}{}
  266. var extUpdate = map[string]interface{}{}
  267. //SaveEsLock := &sync.Mutex{}
  268. // log.Println("start ...")
  269. // for n, tmp := range infos {
  270. tid := mongodb.BsonIdToSId(tmp["_id"])
  271. update := map[string]interface{}{} //要更新的mongo数据
  272. del := map[string]interface{}{}
  273. edata, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, tid, nil)
  274. //对比方法----------------
  275. if edata != nil && len(*edata) > 0 {
  276. compare = *edata
  277. if stype == "bidding" {
  278. // 增量id段 正常数据
  279. if dg := util.IntAll(compare["dataging"]); dg == 1 { //extract中dataging=1跳过
  280. tmp = make(map[string]interface{})
  281. compare = nil
  282. return 0
  283. }
  284. // delete(eMap, tid)
  285. }
  286. if stype == "bidding_history" {
  287. //增量id段 历史数据
  288. if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
  289. tmp = make(map[string]interface{})
  290. compare = nil
  291. return 0
  292. }
  293. // delete(eMap, tid)
  294. }
  295. syncNo++
  296. log.Println("抽取区域 省", compare["area"], " 市 ", compare["city"], " 区 ", compare["district"], " id ", tid)
  297. modifyinfo := make(map[string]bool)
  298. if tmp["modifyinfo"] != nil {
  299. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok {
  300. for k := range tmpmodifyinfo {
  301. modifyinfo[k] = true
  302. }
  303. }
  304. }
  305. fmap := map[string]bool{
  306. "attach_text": true,
  307. "purchasing": true,
  308. "purchasinglist": true,
  309. "purchasingsource": true,
  310. "review_experts": true,
  311. }
  312. for _, k := range config.Conf.Serve.FieldS {
  313. v1 := compare[k] //extract
  314. v2 := tmp[k] //bidding
  315. if v2 == nil && v1 != nil {
  316. update[k] = v1
  317. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  318. update[k] = v1
  319. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  320. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  321. continue
  322. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  323. continue
  324. } else if k == "city" || k == "district" {
  325. update[k] = ""
  326. } else if fmap[k] {
  327. continue
  328. } else {
  329. del[k] = 1
  330. }
  331. }
  332. }
  333. // 附件重采,数据同步时不更新判重标识
  334. if util.IntAll(compare["repeat"]) == 1 {
  335. update["extracttype"] = -1
  336. update["dataprocess"] = 7
  337. if compare["repeat_id"] != nil {
  338. update["repeat_id"] = compare["repeat_id"]
  339. }
  340. } else {
  341. update["extracttype"] = 1
  342. update["dataprocess"] = 8
  343. }
  344. } else {
  345. compare = nil
  346. if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
  347. update["dataging"] = 0
  348. }
  349. update["dataprocess"] = 8
  350. }
  351. //下面可以多线程跑的--->
  352. //处理分类
  353. if compare != nil { //extract
  354. fieldFun(compare, update)
  355. // publishtime 20230523
  356. if util.IntAll(tmp["publishtime"]) == -1 {
  357. if pb := methodPb(compare); pb > 0 {
  358. update["publishtime"] = pb
  359. }
  360. }
  361. compare = nil
  362. }
  363. //------------------对比结束
  364. //处理key descript
  365. if bkey == "" {
  366. DealInfo(&tmp, &update)
  367. }
  368. // entidlist
  369. extractMap := make(map[string]interface{})
  370. if update["s_winner"] != "" {
  371. cid := companyFun(update)
  372. if len(cid) > 0 {
  373. tmp["entidlist"] = cid
  374. update["entidlist"] = cid
  375. extractMap["entidlist"] = cid
  376. }
  377. }
  378. //阳光采购
  379. spidercode := util.ObjToString(tmp["spidercode"])
  380. site := util.ObjToString(tmp["site"])
  381. infoAttribute := util.ObjToString(tmp["infoattribute"])
  382. if infoAttribute == "zc_cgxx" {
  383. update["public_type"] = "用户发布"
  384. update["extracttype"] = -1
  385. update["dataprocess"] = 7
  386. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "user"})
  387. log.Println("阳光采购用户发布", tid)
  388. } else {
  389. if util.IntAll(compare["repeat"]) != 1 {
  390. for _, v := range ygsiteArr {
  391. spidercodes := util.ObjToString(v["spidercode"])
  392. sites := util.ObjToString(v["site"])
  393. datatype := util.ObjToString(v["datatype"])
  394. if datatype == "1" && spidercodes == spidercode {
  395. update["infoattribute"] = "zc_cgxx"
  396. update["public_type"] = "平台发布"
  397. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "spidercode"})
  398. log.Println("阳光采购平台发布爬虫", tid)
  399. } else if datatype == "2" && site == sites {
  400. update["infoattribute"] = "zc_cgxx"
  401. update["public_type"] = "平台发布"
  402. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "site"})
  403. log.Println("阳光采购平台发布站点", tid)
  404. } else if datatype == "3" && spidercodes == spidercode {
  405. update["infoattribute"] = "zc_cgxx"
  406. update["public_type"] = "平台发布"
  407. update["is_yg_new"] = 1
  408. update["extracttype"] = -1
  409. update["dataprocess"] = 7
  410. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "is_yg_new"})
  411. log.Println("阳光采购平台新采集爬虫", tid)
  412. }
  413. }
  414. }
  415. }
  416. // 6.10 剑鱼发布信息分类处理, 写在这里是为了修改抽取表
  417. typeFunc(tmp, update, extractMap)
  418. if len(extractMap) > 0 {
  419. if extractMap["toptype"] != nil && extractMap["subtype"] == nil {
  420. // extUpdate = append(extUpdate, []map[string]interface{}{
  421. // {"_id": tmp["_id"]},
  422. // {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
  423. // })
  424. extUpdate = map[string]interface{}{"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}}
  425. } else {
  426. extUpdate = map[string]interface{}{"$set": extractMap}
  427. }
  428. // if len(extUpdate) >= MgoBulkSize {
  429. // tmps := extUpdate
  430. // MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  431. // extUpdate = [][]map[string]interface{}{}
  432. // }
  433. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, tid, extUpdate)
  434. }
  435. // 附件有效字段
  436. if i := validFile(tmp); i != 0 {
  437. if i == -1 {
  438. tmp["isValidFile"] = false
  439. update["isValidFile"] = false
  440. } else {
  441. tmp["isValidFile"] = true
  442. update["isValidFile"] = true
  443. }
  444. }
  445. // 2024-02-21 徐志恒 情报标签字段
  446. toptype := util.ObjToString(tmp["toptype"])
  447. subtype := util.ObjToString(tmp["subtype"])
  448. buyerclass := util.ObjToString(update["buyerclass"])
  449. if buyerclass != "" {
  450. update["buyer_type"] = getStr(buyerclass)
  451. }
  452. s_topscopeclass := util.ObjToString(update["s_topscopeclass"])
  453. if (tmp["tag_topinformation"] != nil && (subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "采购意向" || toptype == "招标")) || (tmp["tag_topinformation"] == nil && toptype == "拟建" && strings.Contains(s_topscopeclass, "建筑工程")) {
  454. update["tag_set"] = getTagSet(tmp, compare)
  455. }
  456. if len(update) > 0 {
  457. log.Println("保存bidding区域 省", update["area"], " 市 ", update["city"], " 区 ", update["district"], " buyerclass ", update["buyerclass"], update["buyer_type"], " id ", tid)
  458. if len(del) > 0 {
  459. bidUpdate = map[string]interface{}{"$set": update, "$unset": del}
  460. // bidUpdate = append(bidUpdate, []map[string]interface{}{{
  461. // "_id": tmp["_id"],
  462. // },
  463. // {"$set": update, "$unset": del},
  464. // })
  465. } else {
  466. bidUpdate = map[string]interface{}{"$set": update}
  467. }
  468. // if len(bidUpdate) >= MgoBulkSize {
  469. // tmps := bidUpdate
  470. // MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  471. // bidUpdate = [][]map[string]interface{}{}
  472. // }
  473. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, tid, bidUpdate)
  474. }
  475. // if n%500 == 0 {
  476. // log.Println("biddingTask current ", n)
  477. // }
  478. // tmp = make(map[string]interface{})
  479. // }
  480. //SaveEsLock.Lock()
  481. // if len(bidUpdate) > 0 {
  482. // tmps := bidUpdate
  483. // MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  484. // bidUpdate = [][]map[string]interface{}{}
  485. // }
  486. // if len(extUpdate) > 0 {
  487. // tmps := extUpdate
  488. // MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  489. // extUpdate = [][]map[string]interface{}{}
  490. // }
  491. //SaveEsLock.Unlock()
  492. return syncNo
  493. }
  494. // @Description subscopeclass、topscopeclass、package
  495. // 20230523 多包处理 subpackage = 1
  496. // @Author J 2022/6/7 5:54 PM
  497. func fieldFun(compare, update map[string]interface{}) {
  498. subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
  499. if subscopeclass != nil {
  500. m1 := map[string]bool{}
  501. newclass := []string{}
  502. for _, sc := range subscopeclass {
  503. sclass, _ := sc.(string)
  504. if !m1[sclass] {
  505. m1[sclass] = true
  506. newclass = append(newclass, sclass)
  507. }
  508. }
  509. update["s_subscopeclass"] = strings.Join(newclass, ",")
  510. update["subscopeclass"] = newclass
  511. }
  512. topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
  513. if topscopeclass != nil {
  514. m2 := map[string]bool{}
  515. newclass := []string{}
  516. for _, tc := range topscopeclass {
  517. tclass, _ := tc.(string)
  518. tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
  519. if !m2[tclass] {
  520. m2[tclass] = true
  521. newclass = append(newclass, tclass)
  522. }
  523. }
  524. update["topscopeclass"] = topscopeclass
  525. update["s_topscopeclass"] = strings.Join(newclass, ",")
  526. }
  527. if package1 := compare["package"]; package1 != nil {
  528. packageM, _ := package1.(map[string]interface{})
  529. update["package"] = packageM
  530. for _, p := range packageM {
  531. pm, _ := p.(map[string]interface{})
  532. if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
  533. util.Float64All(pm["bidamount"]) > 0 {
  534. update["multipackage"] = 1
  535. break
  536. }
  537. }
  538. } else {
  539. update["multipackage"] = 0
  540. }
  541. // subpackage
  542. if compare["package"] != nil && compare["s_winner"] != nil && compare["bidamount"] != nil {
  543. pg := compare["package"].(map[string]interface{})
  544. if len(pg) > 1 {
  545. var bmt []float64
  546. var swn []string
  547. for _, p := range pg {
  548. p1 := p.(map[string]interface{})
  549. if p1["bidamount"] != nil {
  550. bmt = append(bmt, util.Float64All(p1["bidamount"]))
  551. }
  552. if w := util.ObjToString(p1["winner"]); w != "" {
  553. swn = append(swn)
  554. }
  555. }
  556. if len(bmt) > 1 && len(swn) > 1 {
  557. sn := strings.Split(util.ObjToString(compare["s_winner"]), ",")
  558. sort.Strings(sn)
  559. sort.Strings(swn)
  560. swn1 := util.ObjArrToStringArr(Duplicate(swn)) // 去重
  561. if strings.Join(swn1, ",") == strings.Join(sn, ",") {
  562. bidamount := 0.0
  563. for _, f := range bmt {
  564. bidamount += f
  565. }
  566. if bidamount == util.Float64All(compare["bidamount"]) {
  567. update["subpackage"] = 1
  568. }
  569. }
  570. }
  571. }
  572. }
  573. }
  574. // @Description entidlist
  575. // @Author J 2022/6/7 2:36 PM
  576. func companyFun(tmp map[string]interface{}) (cid []string) {
  577. sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
  578. for _, w := range sWinnerarr {
  579. if w != "" {
  580. id := redis.GetStr("qyxy_id", w)
  581. if id == "" {
  582. ents, _ := MgoQ.Find(config.Conf.DB.MongoQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
  583. if len(*ents) > 0 {
  584. id = util.ObjToString((*ents)[0]["_id"])
  585. redis.PutCKV("qyxy_id", w, id)
  586. } else {
  587. ent, _ := MgoP.FindOne(config.Conf.DB.MongoP.Coll, map[string]interface{}{"history_name": w})
  588. if len(*ent) > 0 {
  589. id = util.ObjToString((*ent)["company_id"])
  590. redis.PutCKV("qyxy_id", w, id)
  591. }
  592. }
  593. }
  594. if id == "" {
  595. id = "-"
  596. }
  597. cid = append(cid, id)
  598. }
  599. }
  600. return cid
  601. }
  602. // @Description update 修改bidding表,extractM修改抽取表
  603. // @Author J 2022/6/10 10:29 AM
  604. func typeFunc(tmp, update, extractM map[string]interface{}) {
  605. if jyData, ok := tmp["jyfb_data"].(map[string]interface{}); ok {
  606. if t := util.ObjToString(jyData["type"]); t != "" {
  607. switch t {
  608. //case "采购信息":
  609. case "招标公告":
  610. if util.ObjToString(tmp["toptype"]) != "招标" {
  611. update["toptype"] = "招标"
  612. extractM["toptype"] = "招标"
  613. delete(update, "subtype")
  614. }
  615. case "采购意向":
  616. if util.ObjToString(tmp["toptype"]) != "采购意向" {
  617. update["toptype"] = "采购意向"
  618. update["subtype"] = "采购意向"
  619. extractM["toptype"] = "采购意向"
  620. extractM["subtype"] = "采购意向"
  621. }
  622. case "招标预告":
  623. if util.ObjToString(tmp["toptype"]) != "预告" {
  624. update["toptype"] = "预告"
  625. extractM["toptype"] = "预告"
  626. delete(update, "subtype")
  627. }
  628. case "招标结果":
  629. if util.ObjToString(tmp["toptype"]) != "结果" {
  630. update["toptype"] = "结果"
  631. extractM["toptype"] = "结果"
  632. delete(update, "subtype")
  633. }
  634. }
  635. }
  636. }
  637. }
  638. // @Description 附件有效字段(isValidFile)
  639. // @Author J 2022/7/8 14:41
  640. func validFile(tmp map[string]interface{}) int {
  641. isContinue := false
  642. if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
  643. if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
  644. for _, att := range atts {
  645. if att == nil {
  646. continue
  647. }
  648. if reflect.TypeOf(att).String() == "string" {
  649. continue
  650. }
  651. att1 := att.(map[string]interface{})
  652. if fid := util.ObjToString(att1["fid"]); fid != "" {
  653. isContinue = true
  654. break
  655. }
  656. }
  657. if isContinue {
  658. if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
  659. if len(attachTxt) > 0 {
  660. for _, at := range attachTxt {
  661. at1 := at.(map[string]interface{})
  662. if len(at1) > 0 {
  663. for k, _ := range at1 {
  664. if reflect.TypeOf(at1[k]).String() == "string" {
  665. continue
  666. }
  667. at2 := at1[k].(map[string]interface{})
  668. s := strings.ToLower(util.ObjToString(at2["file_name"]))
  669. if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
  670. strings.Contains(s, "pdf") {
  671. if strings.Contains(s, "swf") || strings.Contains(s, "html") {
  672. return -1
  673. } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
  674. return 1
  675. }
  676. }
  677. }
  678. break
  679. } else {
  680. break
  681. }
  682. }
  683. }
  684. }
  685. flag := false
  686. for _, att := range atts {
  687. if att == nil {
  688. continue
  689. }
  690. if reflect.TypeOf(att).String() == "string" {
  691. continue
  692. }
  693. att1 := att.(map[string]interface{})
  694. if fid := util.ObjToString(att1["fid"]); fid != "" {
  695. ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
  696. if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
  697. return 1
  698. } else {
  699. flag = true
  700. }
  701. }
  702. }
  703. if flag {
  704. return -1
  705. }
  706. }
  707. }
  708. }
  709. return 0
  710. }
  711. // @Description id不变,内容变化 重新索引数据
  712. // @Author J 2022/8/10 13:29
  713. func taskinfo(id string) {
  714. tmp, _ := MgoB.FindById("bidding", id, nil)
  715. if tmp == nil || len(*tmp) == 0 {
  716. log.Println(fmt.Sprintf("taskinfo bidding id=%s 未查询到数据", id))
  717. return
  718. }
  719. extractM, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, id, nil)
  720. if extractM == nil || len(*extractM) == 0 {
  721. extractM, _ = MgoE.FindById(config.Conf.DB.MongoE.Coll1, id, nil)
  722. if extractM == nil || len(*extractM) == 0 {
  723. log.Println(fmt.Sprintf("taskinfo extract id=%s 未查询到数据", id))
  724. return
  725. }
  726. }
  727. update := map[string]interface{}{} //要更新的mongo数据
  728. //更新bidding表字段
  729. for _, k := range config.Conf.Serve.FieldS {
  730. v1 := (*extractM)[k] //extract
  731. v2 := (*tmp)[k] //bidding
  732. if v2 == nil && v1 != nil {
  733. update[k] = v1
  734. } else if v2 != nil && v1 != nil {
  735. update[k] = v1
  736. } else if v2 != nil && v1 == nil {
  737. if k == "city" || k == "district" {
  738. update[k] = ""
  739. }
  740. }
  741. }
  742. if util.IntAll((*extractM)["repeat"]) == 1 {
  743. update["extracttype"] = -1
  744. update["dataprocess"] = 7
  745. if (*extractM)["repeat_id"] != nil {
  746. update["repeat_id"] = (*extractM)["repeat_id"]
  747. }
  748. } else {
  749. update["extracttype"] = 1
  750. update["dataprocess"] = 8
  751. }
  752. //处理分类
  753. fieldFun(*extractM, update)
  754. extractMap := make(map[string]interface{})
  755. if util.ObjToString((*tmp)["s_winner"]) != "" {
  756. cid := companyFun(*tmp)
  757. if len(cid) > 0 {
  758. update["entidlist"] = cid
  759. extractMap["entidlist"] = cid
  760. }
  761. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
  762. }
  763. // 附件有效字段
  764. if i := validFile(*tmp); i != 0 {
  765. if i == -1 {
  766. update["isValidFile"] = false
  767. } else {
  768. update["isValidFile"] = true
  769. }
  770. }
  771. if len(update) > 0 {
  772. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
  773. }
  774. mapinfo := map[string]interface{}{
  775. "infoid": id,
  776. "stype": "index-by-id",
  777. }
  778. datas, _ := json.Marshal(mapinfo)
  779. var next = &net.UDPAddr{
  780. IP: net.ParseIP(config.Conf.Udp.Next.Addr),
  781. Port: util.IntAll(config.Conf.Udp.Next.Port),
  782. }
  783. log.Println("nsq data over es ", next, " mapinfo ", string(datas))
  784. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, next)
  785. }
  786. var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
  787. // @Description 发布时间处理
  788. // @Author J 2023/5/23 14:32
  789. func methodPb(tmp map[string]interface{}) int64 {
  790. if tmp["ext_publishtime"] != nil {
  791. if newPb := util.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
  792. return newPb
  793. }
  794. }
  795. for _, d := range DateTimeSelect {
  796. if tmp[d] != nil && util.Int64All(tmp[d]) < time.Now().Unix() {
  797. return util.Int64All(tmp[d])
  798. }
  799. }
  800. return 0
  801. }
  802. // Duplicate
  803. // @Description 去重
  804. // @Author J 2023/5/24 09:53
  805. func Duplicate(a interface{}) (ret []interface{}) {
  806. va := reflect.ValueOf(a)
  807. for i := 0; i < va.Len(); i++ {
  808. if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
  809. continue
  810. }
  811. ret = append(ret, va.Index(i).Interface())
  812. }
  813. return ret
  814. }
  815. // @Description 获取情报标签
  816. // @Author 徐志恒 2024/2/21 09:53
  817. func getTagSet(tmp, compare map[string]interface{}) map[string]map[string]interface{} {
  818. tagSet := map[string]map[string]interface{}{}
  819. wuye := map[string]interface{}{}
  820. buyer := util.ObjToString(compare["buyer"])
  821. publishtime := util.Int64All(tmp["publishtime"])
  822. bidamount := util.Float64All(compare["bidamount"])
  823. toptype := util.ObjToString(tmp["toptype"])
  824. subtype := util.ObjToString(tmp["subtype"])
  825. if subtype == "合同" {
  826. wuye["isfirsthand"] = 62
  827. if buyer != "" {
  828. sql := `{
  829. "query": {
  830. "bool": {
  831. "must": [
  832. {
  833. "term": {
  834. "buyer": "` + buyer + `"
  835. }
  836. },
  837. {
  838. "term": {
  839. "tag_topinformation": "情报_物业"
  840. }
  841. },
  842. {
  843. "term": {
  844. "subtype": "合同"
  845. }
  846. },
  847. {
  848. "range": {
  849. "publishtime": {
  850. "lte": ` + fmt.Sprint(publishtime) + `
  851. }
  852. }
  853. }
  854. ]
  855. }
  856. },
  857. "sort": {
  858. "publishtime": "asc"
  859. },
  860. "_source": [
  861. "s_winner"
  862. ],
  863. "size": 10000
  864. }`
  865. data := Es.Get("bidding", "bidding", sql)
  866. if data != nil && len(*data) > 0 {
  867. count := 0
  868. first := util.ObjToString((*data)[0]["s_winner"])
  869. for k, v := range *data {
  870. winner := util.ObjToString(v["s_winner"])
  871. if k > 0 && first != winner {
  872. first = winner
  873. count++
  874. }
  875. }
  876. changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
  877. changehands, _ := strconv.ParseFloat(changehand, 64)
  878. wuye["changehand"] = changehands
  879. if changehands > 0.3 {
  880. wuye["changehandindex"] = 61
  881. }
  882. if len(*data) > 1 {
  883. wuye["isfirsthand"] = 0
  884. }
  885. }
  886. }
  887. wuye["period"] = getperiod(compare)
  888. } else if toptype == "招标" || toptype == "采购意向" {
  889. bidamount = util.Float64All(compare["budget"])
  890. }
  891. if tmp["projectinfo"] != nil {
  892. projectInfo := util.ObjToMap(tmp["projectinfo"])
  893. if projectInfo != nil && len(*projectInfo) > 0 {
  894. if (*projectInfo)["attachments"] != nil {
  895. wuye["isfile"] = 63
  896. }
  897. }
  898. }
  899. wuye["scale"] = getBidamountRange(bidamount)
  900. if tmp["property_form"] != nil {
  901. property_form := util.ObjArrToStringArr(tmp["property_form"].([]interface{}))
  902. wuye["property_form"] = getpropertyform(property_form)
  903. }
  904. tagSet["wuye"] = wuye
  905. return tagSet
  906. }
  907. func getBidamountRange(value float64) int {
  908. switch {
  909. case value < 500000:
  910. return 1
  911. case value >= 500000 && value < 1000000:
  912. return 2
  913. case value >= 1000000 && value < 2000000:
  914. return 3
  915. case value >= 2000000 && value < 5000000:
  916. return 4
  917. default:
  918. return 5
  919. }
  920. }
  921. func getpropertyform(value []string) string {
  922. arr := []string{}
  923. categories := map[string]string{
  924. "住宅": "21",
  925. "政府办公楼": "22",
  926. "学校": "23",
  927. "医院": "24",
  928. "产业园区": "25",
  929. "旅游景区": "26",
  930. "交通运输": "27",
  931. "商务办公楼": "28",
  932. "酒店": "29",
  933. }
  934. for _, v := range value {
  935. if categories[v] != "" {
  936. arr = append(arr, categories[v])
  937. }
  938. }
  939. return strings.Join(arr, ",")
  940. }
  941. func getperiod(data map[string]interface{}) int {
  942. res := 16
  943. signaturedate := util.Int64All(data["signaturedate"]) //合同签订日期
  944. expiredate := util.Int64All(data["expiredate"]) //合同截止日期
  945. // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
  946. project_duration := util.IntAll(data["project_duration"]) //工期时长
  947. project_timeunit := util.ObjToString(data["project_timeunit"]) //工期单位
  948. result := float64(0)
  949. if expiredate > 0 && signaturedate > 0 {
  950. result = calculateYearDifference(signaturedate, expiredate)
  951. } else if project_duration > 0 && project_timeunit != "" {
  952. if strings.Contains(project_timeunit, "年") {
  953. if project_duration == 1 {
  954. res = 12
  955. } else if project_duration == 2 {
  956. res = 13
  957. } else if project_duration == 3 {
  958. res = 14
  959. } else if project_duration == 5 {
  960. res = 15
  961. }
  962. return res
  963. } else if strings.Contains(project_timeunit, "月") {
  964. result = float64(project_duration) / 12
  965. } else if strings.Contains(project_timeunit, "周") {
  966. result = float64(project_duration) * 7 / 365
  967. } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
  968. result = float64(project_duration) / 365
  969. }
  970. }
  971. if result == 0 {
  972. res = 16
  973. } else if result < 1 {
  974. res = 11
  975. } else if result >= 1 && result < 2 {
  976. res = 12
  977. } else if result >= 2 && result < 3 {
  978. res = 13
  979. } else if result >= 3 && result < 4 {
  980. res = 14
  981. } else if result >= 5 {
  982. res = 15
  983. }
  984. return res
  985. }
  986. func calculateYearDifference(startTime int64, endTime int64) float64 {
  987. start := time.Unix(startTime, 0)
  988. end := time.Unix(endTime, 0)
  989. duration := end.Sub(start)
  990. years := duration.Hours() / 24 / 365
  991. return years
  992. }
  993. func getStr(b string) string {
  994. if b == "" {
  995. return "其它"
  996. }
  997. a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
  998. a2 := "(卫健委|医疗)"
  999. a3 := "(教育|学校)"
  1000. a4 := "(人行|金融业)"
  1001. a5 := "(信息技术|电信行业|农林牧渔|建筑业|传媒|制造业|住宿餐饮|采矿业|能源化工|批发零售)"
  1002. if strings.Contains(a1, b) {
  1003. return "政府机构"
  1004. } else if strings.Contains(a2, b) {
  1005. return "医疗单位"
  1006. } else if strings.Contains(a3, b) {
  1007. return "教育单位"
  1008. } else if strings.Contains(a4, b) {
  1009. return "金融企业"
  1010. } else if strings.Contains(a5, b) {
  1011. return "商业公司"
  1012. } else {
  1013. return "其它"
  1014. }
  1015. return "其它"
  1016. }