task.go 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. package main
  2. import (
  3. "encoding/json"
  4. "field_sync/config"
  5. "field_sync/oss"
  6. "fmt"
  7. "net"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "log"
  15. "go.mongodb.org/mongo-driver/bson"
  16. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  17. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  18. "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
  19. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  20. )
  21. var (
  22. regLetter = regexp.MustCompile("[a-z]*")
  23. cityEndReg = regexp.MustCompile("(区|县|市)$")
  24. )
  25. func biddingTask(data []byte, mapInfo map[string]interface{}) {
  26. defer util.Catch()
  27. stype := util.ObjToString(mapInfo["stype"])
  28. q, _ := mapInfo["query"].(map[string]interface{})
  29. bkey, _ := mapInfo["bkey"].(string)
  30. if q == nil {
  31. q = map[string]interface{}{
  32. "_id": map[string]interface{}{
  33. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  34. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  35. },
  36. }
  37. }
  38. //extract库
  39. // extractConn := MgoE.GetMgoConn()
  40. // defer MgoE.DestoryMongoConn(extractConn)
  41. // extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  42. // "field_source": 0,
  43. // "kvtext": 0,
  44. // }).Sort("_id").Iter()
  45. // eMap := map[string]map[string]interface{}{}
  46. // extCount, repeatCount := 0, 0
  47. // for tmp := make(map[string]interface{}); extractResult.Next(tmp); extCount++ {
  48. // if util.IntAll(tmp["repeat"]) == 1 {
  49. // repeatCount++
  50. // }
  51. // tid := mongodb.BsonIdToSId(tmp["_id"])
  52. // eMap[tid] = tmp
  53. // tmp = make(map[string]interface{})
  54. // }
  55. // log.Println("抽取表 数据量", extCount, "重复数据量", repeatCount)
  56. //bidding库
  57. biddingConn := MgoB.GetMgoConn()
  58. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  59. log.Println("bidding表 同步总数:", count)
  60. c := 0
  61. // if count < 500000 {
  62. // var res []map[string]interface{}
  63. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  64. "contenthtml": 0,
  65. }).Iter()
  66. for tmp := make(map[string]interface{}); result.Next(tmp); {
  67. // res = append(res, tmp)
  68. ec := doIndex(tmp, bkey, stype)
  69. if ec > 0 {
  70. c++
  71. }
  72. tmp = make(map[string]interface{})
  73. }
  74. MgoB.DestoryMongoConn(biddingConn)
  75. // log.Println("查询结果 bidding", count, "抽取:", extCount)
  76. // c = doIndex(res, bkey, stype)
  77. // } else {
  78. // log.Println("查询结果 数据量太大,放弃", count)
  79. // MgoB.DestoryMongoConn(biddingConn)
  80. // }
  81. log.Println("bidding sync...over all", count, "extract sync ", c)
  82. NextNode(mapInfo, stype)
  83. // NextNodePro(mapInfo, stype)
  84. NextNodeTidb(mapInfo, stype)
  85. if stype == "bidding_history" {
  86. NextNodeBidData(mapInfo) // bidding-data数据
  87. NextNodeTidbQyxy(mapInfo) // tidb-企业数据
  88. NextNodeHn(mapInfo)
  89. }
  90. if stype == "bidding" {
  91. uq := map[string]interface{}{
  92. "gtid": map[string]interface{}{
  93. "$gte": util.ObjToString(mapInfo["gtid"]),
  94. },
  95. "lteid": map[string]interface{}{
  96. "$lte": util.ObjToString(mapInfo["lteid"]),
  97. },
  98. }
  99. MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 7, "updatetime": time.Now().Unix()}}, false, true)
  100. }
  101. //领域标签处理的数据 id段
  102. if stype == "bidding_history" {
  103. MgoB.Save("field_data_record", map[string]interface{}{"gtid": mapInfo["gtid"], "lteid": mapInfo["lteid"], "status": 0})
  104. }
  105. }
  106. func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
  107. defer util.Catch()
  108. q, _ := mapInfo["query"].(map[string]interface{})
  109. if q == nil {
  110. q = map[string]interface{}{
  111. "_id": map[string]interface{}{
  112. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  113. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  114. },
  115. }
  116. }
  117. //extract库
  118. extractConn := MgoE.GetMgoConn()
  119. defer MgoE.DestoryMongoConn(extractConn)
  120. extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  121. "field_source": 0,
  122. "kvtext": 0,
  123. }).Sort("-_id").Iter()
  124. //bidding库
  125. biddingConn := MgoB.GetMgoConn()
  126. defer MgoB.DestoryMongoConn(biddingConn)
  127. count := 0
  128. var compare map[string]interface{}
  129. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  130. "contenthtml": 0,
  131. "field_source": 0,
  132. }).Sort("-_id").Iter()
  133. for tmp := make(map[string]interface{}); result.Next(tmp); count++ {
  134. update := map[string]interface{}{}
  135. del := map[string]interface{}{} //记录extract没有值而bidding中有值的字段
  136. //对比方法----------------
  137. for {
  138. if compare == nil {
  139. compare = make(map[string]interface{})
  140. if !extractResult.Next(compare) {
  141. break
  142. }
  143. }
  144. if compare != nil {
  145. cid := mongodb.BsonIdToSId(compare["_id"])
  146. tid := mongodb.BsonIdToSId(tmp["_id"])
  147. if cid == tid {
  148. //更新bidding表;bidding表modifyinfo中的字段不更新
  149. modifyinfo := make(map[string]bool)
  150. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok && tmpmodifyinfo != nil {
  151. for k := range tmpmodifyinfo {
  152. modifyinfo[k] = true
  153. }
  154. }
  155. for _, k := range config.Conf.Serve.FieldS {
  156. v1 := compare[k] //extract
  157. v2 := tmp[k] //bidding
  158. if v2 == nil && v1 != nil {
  159. update[k] = v1
  160. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  161. update[k] = v1
  162. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  163. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  164. continue
  165. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  166. continue
  167. }
  168. del[k] = 1
  169. //util.Debug("抽取结果没有值,bidding有值:field--", k, del)
  170. }
  171. }
  172. if util.IntAll(compare["repeat"]) == 1 {
  173. update["extracttype"] = -1
  174. update["dataprocess"] = 7
  175. if compare["repeat_id"] != nil {
  176. update["repeat_id"] = compare["repeat_id"]
  177. }
  178. } else {
  179. update["extracttype"] = 1
  180. update["dataprocess"] = 8
  181. }
  182. break
  183. } else {
  184. if cid < tid {
  185. compare = nil
  186. continue
  187. } else {
  188. break
  189. }
  190. }
  191. } else {
  192. break
  193. }
  194. }
  195. //------------------对比结束
  196. //处理分类
  197. if compare != nil { //extract
  198. fieldFun(compare, update)
  199. compare = nil
  200. }
  201. // entidlist
  202. extractMap := make(map[string]interface{})
  203. if update["s_winner"] != "" {
  204. cid := companyFun(update)
  205. if len(cid) > 0 {
  206. update["entidlist"] = cid
  207. extractMap["entidlist"] = cid
  208. }
  209. }
  210. if len(extractMap) > 0 {
  211. updateExtPool <- []map[string]interface{}{
  212. {"_id": tmp["_id"]},
  213. {"$set": extractMap},
  214. }
  215. }
  216. // 附件有效字段
  217. if i := validFile(tmp); i != 0 {
  218. if i == -1 {
  219. update["isValidFile"] = false
  220. } else {
  221. update["isValidFile"] = true
  222. }
  223. }
  224. if len(update) > 0 {
  225. if len(del) > 0 { //删除的字段
  226. updateBidPool <- []map[string]interface{}{{
  227. "_id": tmp["_id"],
  228. },
  229. {"$set": update, "$unset": del},
  230. }
  231. } else {
  232. updateBidPool <- []map[string]interface{}{{
  233. "_id": tmp["_id"],
  234. },
  235. {"$set": update},
  236. }
  237. }
  238. }
  239. if count%50000 == 0 {
  240. log.Println("biddingTask current", count)
  241. }
  242. tmp = make(map[string]interface{})
  243. }
  244. log.Println("biddingAll sync...over all", count)
  245. }
  246. func doIndex(tmp map[string]interface{}, bkey, stype string) int {
  247. syncNo := 0 //抽取表数据同步数量
  248. //对比两张表数据,减少查询次数
  249. var compare map[string]interface{}
  250. var bidUpdate = map[string]interface{}{}
  251. var extUpdate = map[string]interface{}{}
  252. //SaveEsLock := &sync.Mutex{}
  253. ygsiteArr := []map[string]interface{}{}
  254. sitedata, _ := MgoE.Find("bidding_yg_site", map[string]interface{}{}, nil, nil, false, -1, -1)
  255. if sitedata != nil && len(*sitedata) > 0 {
  256. ygsiteArr = *sitedata
  257. }
  258. // log.Println("start ...")
  259. // for n, tmp := range infos {
  260. tid := mongodb.BsonIdToSId(tmp["_id"])
  261. update := map[string]interface{}{} //要更新的mongo数据
  262. del := map[string]interface{}{}
  263. edata, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, tid, nil)
  264. //对比方法----------------
  265. if edata != nil && len(*edata) > 0 {
  266. compare = *edata
  267. if stype == "bidding" {
  268. // 增量id段 正常数据
  269. if dg := util.IntAll(compare["dataging"]); dg == 1 { //extract中dataging=1跳过
  270. tmp = make(map[string]interface{})
  271. compare = nil
  272. return 0
  273. }
  274. // delete(eMap, tid)
  275. }
  276. if stype == "bidding_history" {
  277. //增量id段 历史数据
  278. if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
  279. tmp = make(map[string]interface{})
  280. compare = nil
  281. return 0
  282. }
  283. // delete(eMap, tid)
  284. }
  285. syncNo++
  286. log.Println("抽取区域 省", compare["area"], " 市 ", compare["city"], " 区 ", compare["district"], " id ", tid)
  287. modifyinfo := make(map[string]bool)
  288. if tmp["modifyinfo"] != nil {
  289. if tmpmodifyinfo, ok := tmp["modifyinfo"].(map[string]interface{}); ok {
  290. for k := range tmpmodifyinfo {
  291. modifyinfo[k] = true
  292. }
  293. }
  294. }
  295. for _, k := range config.Conf.Serve.FieldS {
  296. v1 := compare[k] //extract
  297. v2 := tmp[k] //bidding
  298. if v2 == nil && v1 != nil {
  299. update[k] = v1
  300. } else if v2 != nil && v1 != nil && !modifyinfo[k] {
  301. update[k] = v1
  302. } else if v2 != nil && v1 == nil && !modifyinfo[k] {
  303. if k == "s_subscopeclass" && del["subscopeclass"] == nil {
  304. continue
  305. } else if k == "s_topscopeclass" && del["topscopeclass"] == nil {
  306. continue
  307. } else if k == "city" || k == "district" {
  308. update[k] = ""
  309. } else {
  310. del[k] = 1
  311. }
  312. }
  313. }
  314. // 附件重采,数据同步时不更新判重标识
  315. if util.IntAll(compare["repeat"]) == 1 {
  316. update["extracttype"] = -1
  317. update["dataprocess"] = 7
  318. if compare["repeat_id"] != nil {
  319. update["repeat_id"] = compare["repeat_id"]
  320. }
  321. } else {
  322. update["extracttype"] = 1
  323. update["dataprocess"] = 8
  324. }
  325. } else {
  326. compare = nil
  327. if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
  328. update["dataging"] = 0
  329. }
  330. update["dataprocess"] = 8
  331. }
  332. //下面可以多线程跑的--->
  333. //处理分类
  334. if compare != nil { //extract
  335. fieldFun(compare, update)
  336. // publishtime 20230523
  337. if util.IntAll(tmp["publishtime"]) == -1 {
  338. if pb := methodPb(compare); pb > 0 {
  339. update["publishtime"] = pb
  340. }
  341. }
  342. compare = nil
  343. }
  344. //------------------对比结束
  345. //处理key descript
  346. if bkey == "" {
  347. DealInfo(&tmp, &update)
  348. }
  349. // entidlist
  350. extractMap := make(map[string]interface{})
  351. if update["s_winner"] != "" {
  352. cid := companyFun(update)
  353. if len(cid) > 0 {
  354. tmp["entidlist"] = cid
  355. update["entidlist"] = cid
  356. extractMap["entidlist"] = cid
  357. }
  358. }
  359. //阳光采购
  360. spidercode := util.ObjToString(tmp["spidercode"])
  361. site := util.ObjToString(tmp["site"])
  362. infoAttribute := util.ObjToString(tmp["infoattribute"])
  363. if infoAttribute == "zc_cgxx" {
  364. update["public_type"] = "用户发布"
  365. update["extracttype"] = -1
  366. update["dataprocess"] = 7
  367. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "user"})
  368. log.Println("阳光采购用户发布", tid)
  369. } else {
  370. if util.IntAll(compare["repeat"]) != 1 {
  371. for _, v := range ygsiteArr {
  372. spidercodes := util.ObjToString(v["spidercode"])
  373. sites := util.ObjToString(v["site"])
  374. datatype := util.ObjToString(v["datatype"])
  375. if datatype == "1" && spidercodes == spidercode {
  376. update["infoattribute"] = "zc_cgxx"
  377. update["public_type"] = "平台发布"
  378. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "spidercode"})
  379. log.Println("阳光采购平台发布爬虫", tid)
  380. } else if datatype == "2" && site == sites {
  381. update["infoattribute"] = "zc_cgxx"
  382. update["public_type"] = "平台发布"
  383. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "site"})
  384. log.Println("阳光采购平台发布站点", tid)
  385. } else if datatype == "3" && spidercodes == spidercode {
  386. update["infoattribute"] = "zc_cgxx"
  387. update["public_type"] = "平台发布"
  388. update["is_yg_new"] = 1
  389. update["extracttype"] = -1
  390. update["dataprocess"] = 7
  391. MgoE.Save("bidding_yg", map[string]interface{}{"id": tid, "source": "is_yg_new"})
  392. log.Println("阳光采购平台新采集爬虫", tid)
  393. }
  394. }
  395. }
  396. }
  397. // 6.10 剑鱼发布信息分类处理, 写在这里是为了修改抽取表
  398. typeFunc(tmp, update, extractMap)
  399. if len(extractMap) > 0 {
  400. if extractMap["toptype"] != nil && extractMap["subtype"] == nil {
  401. // extUpdate = append(extUpdate, []map[string]interface{}{
  402. // {"_id": tmp["_id"]},
  403. // {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
  404. // })
  405. extUpdate = map[string]interface{}{"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}}
  406. } else {
  407. extUpdate = map[string]interface{}{"$set": extractMap}
  408. }
  409. // if len(extUpdate) >= MgoBulkSize {
  410. // tmps := extUpdate
  411. // MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  412. // extUpdate = [][]map[string]interface{}{}
  413. // }
  414. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, tid, extUpdate)
  415. }
  416. // 附件有效字段
  417. if i := validFile(tmp); i != 0 {
  418. if i == -1 {
  419. tmp["isValidFile"] = false
  420. update["isValidFile"] = false
  421. } else {
  422. tmp["isValidFile"] = true
  423. update["isValidFile"] = true
  424. }
  425. }
  426. // 2024-02-21 徐志恒 情报标签字段
  427. toptype := util.ObjToString(tmp["toptype"])
  428. subtype := util.ObjToString(tmp["subtype"])
  429. buyerclass := util.ObjToString(update["buyerclass"])
  430. if buyerclass != "" {
  431. update["buyer_type"] = getStr(buyerclass)
  432. }
  433. s_topscopeclass := util.ObjToString(update["s_topscopeclass"])
  434. if (tmp["tag_topinformation"] != nil && (subtype == "合同" || subtype == "中标" || subtype == "成交" || subtype == "采购意向" || toptype == "招标")) || (tmp["tag_topinformation"] == nil && toptype == "拟建" && strings.Contains(s_topscopeclass, "建筑工程")) {
  435. update["tag_set"] = getTagSet(tmp, compare)
  436. }
  437. if len(update) > 0 {
  438. log.Println("保存bidding区域 省", update["area"], " 市 ", update["city"], " 区 ", update["district"], " buyerclass ", update["buyerclass"], update["buyer_type"], " id ", tid)
  439. if len(del) > 0 {
  440. bidUpdate = map[string]interface{}{"$set": update, "$unset": del}
  441. // bidUpdate = append(bidUpdate, []map[string]interface{}{{
  442. // "_id": tmp["_id"],
  443. // },
  444. // {"$set": update, "$unset": del},
  445. // })
  446. } else {
  447. bidUpdate = map[string]interface{}{"$set": update}
  448. }
  449. // if len(bidUpdate) >= MgoBulkSize {
  450. // tmps := bidUpdate
  451. // MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  452. // bidUpdate = [][]map[string]interface{}{}
  453. // }
  454. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, tid, bidUpdate)
  455. }
  456. // if n%500 == 0 {
  457. // log.Println("biddingTask current ", n)
  458. // }
  459. // tmp = make(map[string]interface{})
  460. // }
  461. //SaveEsLock.Lock()
  462. // if len(bidUpdate) > 0 {
  463. // tmps := bidUpdate
  464. // MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  465. // bidUpdate = [][]map[string]interface{}{}
  466. // }
  467. // if len(extUpdate) > 0 {
  468. // tmps := extUpdate
  469. // MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  470. // extUpdate = [][]map[string]interface{}{}
  471. // }
  472. //SaveEsLock.Unlock()
  473. return syncNo
  474. }
  475. // @Description subscopeclass、topscopeclass、package
  476. // 20230523 多包处理 subpackage = 1
  477. // @Author J 2022/6/7 5:54 PM
  478. func fieldFun(compare, update map[string]interface{}) {
  479. subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
  480. if subscopeclass != nil {
  481. m1 := map[string]bool{}
  482. newclass := []string{}
  483. for _, sc := range subscopeclass {
  484. sclass, _ := sc.(string)
  485. if !m1[sclass] {
  486. m1[sclass] = true
  487. newclass = append(newclass, sclass)
  488. }
  489. }
  490. update["s_subscopeclass"] = strings.Join(newclass, ",")
  491. update["subscopeclass"] = newclass
  492. }
  493. topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
  494. if topscopeclass != nil {
  495. m2 := map[string]bool{}
  496. newclass := []string{}
  497. for _, tc := range topscopeclass {
  498. tclass, _ := tc.(string)
  499. tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
  500. if !m2[tclass] {
  501. m2[tclass] = true
  502. newclass = append(newclass, tclass)
  503. }
  504. }
  505. update["topscopeclass"] = topscopeclass
  506. update["s_topscopeclass"] = strings.Join(newclass, ",")
  507. }
  508. if package1 := compare["package"]; package1 != nil {
  509. packageM, _ := package1.(map[string]interface{})
  510. update["package"] = packageM
  511. for _, p := range packageM {
  512. pm, _ := p.(map[string]interface{})
  513. if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
  514. util.Float64All(pm["bidamount"]) > 0 {
  515. update["multipackage"] = 1
  516. break
  517. }
  518. }
  519. } else {
  520. update["multipackage"] = 0
  521. }
  522. // subpackage
  523. if compare["package"] != nil && compare["s_winner"] != nil && compare["bidamount"] != nil {
  524. pg := compare["package"].(map[string]interface{})
  525. if len(pg) > 1 {
  526. var bmt []float64
  527. var swn []string
  528. for _, p := range pg {
  529. p1 := p.(map[string]interface{})
  530. if p1["bidamount"] != nil {
  531. bmt = append(bmt, util.Float64All(p1["bidamount"]))
  532. }
  533. if w := util.ObjToString(p1["winner"]); w != "" {
  534. swn = append(swn)
  535. }
  536. }
  537. if len(bmt) > 1 && len(swn) > 1 {
  538. sn := strings.Split(util.ObjToString(compare["s_winner"]), ",")
  539. sort.Strings(sn)
  540. sort.Strings(swn)
  541. swn1 := util.ObjArrToStringArr(Duplicate(swn)) // 去重
  542. if strings.Join(swn1, ",") == strings.Join(sn, ",") {
  543. bidamount := 0.0
  544. for _, f := range bmt {
  545. bidamount += f
  546. }
  547. if bidamount == util.Float64All(compare["bidamount"]) {
  548. update["subpackage"] = 1
  549. }
  550. }
  551. }
  552. }
  553. }
  554. }
  555. // @Description entidlist
  556. // @Author J 2022/6/7 2:36 PM
  557. func companyFun(tmp map[string]interface{}) (cid []string) {
  558. sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
  559. for _, w := range sWinnerarr {
  560. if w != "" {
  561. id := redis.GetStr("qyxy_id", w)
  562. if id == "" {
  563. ents, _ := MgoQ.Find(config.Conf.DB.MongoQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
  564. if len(*ents) > 0 {
  565. id = util.ObjToString((*ents)[0]["_id"])
  566. redis.PutCKV("qyxy_id", w, id)
  567. } else {
  568. ent, _ := MgoP.FindOne(config.Conf.DB.MongoP.Coll, map[string]interface{}{"history_name": w})
  569. if len(*ent) > 0 {
  570. id = util.ObjToString((*ent)["company_id"])
  571. redis.PutCKV("qyxy_id", w, id)
  572. }
  573. }
  574. }
  575. if id == "" {
  576. id = "-"
  577. }
  578. cid = append(cid, id)
  579. }
  580. }
  581. return cid
  582. }
  583. // @Description update 修改bidding表,extractM修改抽取表
  584. // @Author J 2022/6/10 10:29 AM
  585. func typeFunc(tmp, update, extractM map[string]interface{}) {
  586. if jyData, ok := tmp["jyfb_data"].(map[string]interface{}); ok {
  587. if t := util.ObjToString(jyData["type"]); t != "" {
  588. switch t {
  589. //case "采购信息":
  590. case "招标公告":
  591. if util.ObjToString(tmp["toptype"]) != "招标" {
  592. update["toptype"] = "招标"
  593. extractM["toptype"] = "招标"
  594. delete(update, "subtype")
  595. }
  596. case "采购意向":
  597. if util.ObjToString(tmp["toptype"]) != "采购意向" {
  598. update["toptype"] = "采购意向"
  599. update["subtype"] = "采购意向"
  600. extractM["toptype"] = "采购意向"
  601. extractM["subtype"] = "采购意向"
  602. }
  603. case "招标预告":
  604. if util.ObjToString(tmp["toptype"]) != "预告" {
  605. update["toptype"] = "预告"
  606. extractM["toptype"] = "预告"
  607. delete(update, "subtype")
  608. }
  609. case "招标结果":
  610. if util.ObjToString(tmp["toptype"]) != "结果" {
  611. update["toptype"] = "结果"
  612. extractM["toptype"] = "结果"
  613. delete(update, "subtype")
  614. }
  615. }
  616. }
  617. }
  618. }
  619. // @Description 附件有效字段(isValidFile)
  620. // @Author J 2022/7/8 14:41
  621. func validFile(tmp map[string]interface{}) int {
  622. isContinue := false
  623. if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
  624. if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
  625. for _, att := range atts {
  626. if att == nil {
  627. continue
  628. }
  629. if reflect.TypeOf(att).String() == "string" {
  630. continue
  631. }
  632. att1 := att.(map[string]interface{})
  633. if fid := util.ObjToString(att1["fid"]); fid != "" {
  634. isContinue = true
  635. break
  636. }
  637. }
  638. if isContinue {
  639. if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
  640. if len(attachTxt) > 0 {
  641. for _, at := range attachTxt {
  642. at1 := at.(map[string]interface{})
  643. if len(at1) > 0 {
  644. for k, _ := range at1 {
  645. if reflect.TypeOf(at1[k]).String() == "string" {
  646. continue
  647. }
  648. at2 := at1[k].(map[string]interface{})
  649. s := strings.ToLower(util.ObjToString(at2["file_name"]))
  650. if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
  651. strings.Contains(s, "pdf") {
  652. if strings.Contains(s, "swf") || strings.Contains(s, "html") {
  653. return -1
  654. } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
  655. return 1
  656. }
  657. }
  658. }
  659. break
  660. } else {
  661. break
  662. }
  663. }
  664. }
  665. }
  666. flag := false
  667. for _, att := range atts {
  668. if att == nil {
  669. continue
  670. }
  671. if reflect.TypeOf(att).String() == "string" {
  672. continue
  673. }
  674. att1 := att.(map[string]interface{})
  675. if fid := util.ObjToString(att1["fid"]); fid != "" {
  676. ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
  677. if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
  678. return 1
  679. } else {
  680. flag = true
  681. }
  682. }
  683. }
  684. if flag {
  685. return -1
  686. }
  687. }
  688. }
  689. }
  690. return 0
  691. }
  692. // @Description id不变,内容变化 重新索引数据
  693. // @Author J 2022/8/10 13:29
  694. func taskinfo(id string) {
  695. tmp, _ := MgoB.FindById("bidding", id, nil)
  696. if tmp == nil || len(*tmp) == 0 {
  697. log.Println(fmt.Sprintf("taskinfo bidding id=%s 未查询到数据", id))
  698. return
  699. }
  700. extractM, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, id, nil)
  701. if extractM == nil || len(*extractM) == 0 {
  702. extractM, _ = MgoE.FindById(config.Conf.DB.MongoE.Coll1, id, nil)
  703. if extractM == nil || len(*extractM) == 0 {
  704. log.Println(fmt.Sprintf("taskinfo extract id=%s 未查询到数据", id))
  705. return
  706. }
  707. }
  708. update := map[string]interface{}{} //要更新的mongo数据
  709. //更新bidding表字段
  710. for _, k := range config.Conf.Serve.FieldS {
  711. v1 := (*extractM)[k] //extract
  712. v2 := (*tmp)[k] //bidding
  713. if v2 == nil && v1 != nil {
  714. update[k] = v1
  715. } else if v2 != nil && v1 != nil {
  716. update[k] = v1
  717. } else if v2 != nil && v1 == nil {
  718. if k == "city" || k == "district" {
  719. update[k] = ""
  720. }
  721. }
  722. }
  723. if util.IntAll((*extractM)["repeat"]) == 1 {
  724. update["extracttype"] = -1
  725. update["dataprocess"] = 7
  726. if (*extractM)["repeat_id"] != nil {
  727. update["repeat_id"] = (*extractM)["repeat_id"]
  728. }
  729. } else {
  730. update["extracttype"] = 1
  731. update["dataprocess"] = 8
  732. }
  733. //处理分类
  734. fieldFun(*extractM, update)
  735. extractMap := make(map[string]interface{})
  736. if util.ObjToString((*tmp)["s_winner"]) != "" {
  737. cid := companyFun(*tmp)
  738. if len(cid) > 0 {
  739. update["entidlist"] = cid
  740. extractMap["entidlist"] = cid
  741. }
  742. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
  743. }
  744. // 附件有效字段
  745. if i := validFile(*tmp); i != 0 {
  746. if i == -1 {
  747. update["isValidFile"] = false
  748. } else {
  749. update["isValidFile"] = true
  750. }
  751. }
  752. if len(update) > 0 {
  753. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
  754. }
  755. mapinfo := map[string]interface{}{
  756. "infoid": id,
  757. "stype": "index-by-id",
  758. }
  759. datas, _ := json.Marshal(mapinfo)
  760. var next = &net.UDPAddr{
  761. IP: net.ParseIP(config.Conf.Udp.Next.Addr),
  762. Port: util.IntAll(config.Conf.Udp.Next.Port),
  763. }
  764. log.Println("nsq data over es ", next, " mapinfo ", string(datas))
  765. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, next)
  766. }
  767. var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
  768. // @Description 发布时间处理
  769. // @Author J 2023/5/23 14:32
  770. func methodPb(tmp map[string]interface{}) int64 {
  771. if tmp["ext_publishtime"] != nil {
  772. if newPb := util.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
  773. return newPb
  774. }
  775. }
  776. for _, d := range DateTimeSelect {
  777. if tmp[d] != nil && util.Int64All(tmp[d]) < time.Now().Unix() {
  778. return util.Int64All(tmp[d])
  779. }
  780. }
  781. return 0
  782. }
  783. // Duplicate
  784. // @Description 去重
  785. // @Author J 2023/5/24 09:53
  786. func Duplicate(a interface{}) (ret []interface{}) {
  787. va := reflect.ValueOf(a)
  788. for i := 0; i < va.Len(); i++ {
  789. if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
  790. continue
  791. }
  792. ret = append(ret, va.Index(i).Interface())
  793. }
  794. return ret
  795. }
  796. // @Description 获取情报标签
  797. // @Author 徐志恒 2024/2/21 09:53
  798. func getTagSet(tmp, compare map[string]interface{}) map[string]map[string]interface{} {
  799. tagSet := map[string]map[string]interface{}{}
  800. wuye := map[string]interface{}{}
  801. buyer := util.ObjToString(compare["buyer"])
  802. publishtime := util.Int64All(tmp["publishtime"])
  803. bidamount := util.Float64All(compare["bidamount"])
  804. toptype := util.ObjToString(tmp["toptype"])
  805. subtype := util.ObjToString(tmp["subtype"])
  806. if subtype == "合同" {
  807. wuye["isfirsthand"] = 62
  808. if buyer != "" {
  809. sql := `{
  810. "query": {
  811. "bool": {
  812. "must": [
  813. {
  814. "term": {
  815. "buyer": "` + buyer + `"
  816. }
  817. },
  818. {
  819. "term": {
  820. "tag_topinformation": "情报_物业"
  821. }
  822. },
  823. {
  824. "term": {
  825. "subtype": "合同"
  826. }
  827. },
  828. {
  829. "range": {
  830. "publishtime": {
  831. "lte": ` + fmt.Sprint(publishtime) + `
  832. }
  833. }
  834. }
  835. ]
  836. }
  837. },
  838. "sort": {
  839. "publishtime": "asc"
  840. },
  841. "_source": [
  842. "s_winner"
  843. ],
  844. "size": 10000
  845. }`
  846. data := Es.Get("bidding", "bidding", sql)
  847. if data != nil && len(*data) > 0 {
  848. count := 0
  849. first := util.ObjToString((*data)[0]["s_winner"])
  850. for k, v := range *data {
  851. winner := util.ObjToString(v["s_winner"])
  852. if k > 0 && first != winner {
  853. first = winner
  854. count++
  855. }
  856. }
  857. changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
  858. changehands, _ := strconv.ParseFloat(changehand, 64)
  859. wuye["changehand"] = changehands
  860. if changehands > 0.3 {
  861. wuye["changehandindex"] = 61
  862. }
  863. if len(*data) > 1 {
  864. wuye["isfirsthand"] = 0
  865. }
  866. }
  867. }
  868. wuye["period"] = getperiod(compare)
  869. } else if toptype == "招标" || toptype == "采购意向" {
  870. bidamount = util.Float64All(compare["budget"])
  871. }
  872. if tmp["projectinfo"] != nil {
  873. projectInfo := util.ObjToMap(tmp["projectinfo"])
  874. if projectInfo != nil && len(*projectInfo) > 0 {
  875. if (*projectInfo)["attachments"] != nil {
  876. wuye["isfile"] = 63
  877. }
  878. }
  879. }
  880. wuye["scale"] = getBidamountRange(bidamount)
  881. if tmp["property_form"] != nil {
  882. property_form := util.ObjArrToStringArr(tmp["property_form"].([]interface{}))
  883. wuye["property_form"] = getpropertyform(property_form)
  884. }
  885. tagSet["wuye"] = wuye
  886. return tagSet
  887. }
  888. func getBidamountRange(value float64) int {
  889. switch {
  890. case value < 500000:
  891. return 1
  892. case value >= 500000 && value < 1000000:
  893. return 2
  894. case value >= 1000000 && value < 2000000:
  895. return 3
  896. case value >= 2000000 && value < 5000000:
  897. return 4
  898. default:
  899. return 5
  900. }
  901. }
  902. func getpropertyform(value []string) string {
  903. arr := []string{}
  904. categories := map[string]string{
  905. "住宅": "21",
  906. "政府办公楼": "22",
  907. "学校": "23",
  908. "医院": "24",
  909. "产业园区": "25",
  910. "旅游景区": "26",
  911. "交通运输": "27",
  912. "商务办公楼": "28",
  913. "酒店": "29",
  914. }
  915. for _, v := range value {
  916. if categories[v] != "" {
  917. arr = append(arr, categories[v])
  918. }
  919. }
  920. return strings.Join(arr, ",")
  921. }
  922. func getperiod(data map[string]interface{}) int {
  923. res := 16
  924. signaturedate := util.Int64All(data["signaturedate"]) //合同签订日期
  925. expiredate := util.Int64All(data["expiredate"]) //合同截止日期
  926. // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
  927. project_duration := util.IntAll(data["project_duration"]) //工期时长
  928. project_timeunit := util.ObjToString(data["project_timeunit"]) //工期单位
  929. result := float64(0)
  930. if expiredate > 0 && signaturedate > 0 {
  931. result = calculateYearDifference(signaturedate, expiredate)
  932. } else if project_duration > 0 && project_timeunit != "" {
  933. if strings.Contains(project_timeunit, "年") {
  934. if project_duration == 1 {
  935. res = 12
  936. } else if project_duration == 2 {
  937. res = 13
  938. } else if project_duration == 3 {
  939. res = 14
  940. } else if project_duration == 5 {
  941. res = 15
  942. }
  943. return res
  944. } else if strings.Contains(project_timeunit, "月") {
  945. result = float64(project_duration) / 12
  946. } else if strings.Contains(project_timeunit, "周") {
  947. result = float64(project_duration) * 7 / 365
  948. } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
  949. result = float64(project_duration) / 365
  950. }
  951. }
  952. if result == 0 {
  953. res = 16
  954. } else if result < 1 {
  955. res = 11
  956. } else if result >= 1 && result < 2 {
  957. res = 12
  958. } else if result >= 2 && result < 3 {
  959. res = 13
  960. } else if result >= 3 && result < 4 {
  961. res = 14
  962. } else if result >= 5 {
  963. res = 15
  964. }
  965. return res
  966. }
  967. func calculateYearDifference(startTime int64, endTime int64) float64 {
  968. start := time.Unix(startTime, 0)
  969. end := time.Unix(endTime, 0)
  970. duration := end.Sub(start)
  971. years := duration.Hours() / 24 / 365
  972. return years
  973. }
  974. func getStr(b string) string {
  975. if b == "" {
  976. return "其它"
  977. }
  978. a1 := "(交通|运输物流|工信|农业|住建|城管|市政|出版广电|检察院|科技|民政|生态环境|市场监管|水利|应急管理|自然资源|财政|档案|党委办|组织|发改|宣传|政府办|政务中心|人大|政协|法院|公安|国资委|海关|机关事务|纪委|军队|人社|商务|审计税务|司法|体育|统计|统战|文旅|民宗|银保监|证监|气象|社会团体|公共资源交易)"
  979. a2 := "(卫健委|医疗)"
  980. a3 := "(教育|学校)"
  981. a4 := "(人行|金融业)"
  982. a5 := "(信息技术|电信行业|农林牧渔|建筑业|传媒|制造业|住宿餐饮|采矿业|能源化工|批发零售)"
  983. if strings.Contains(a1, b) {
  984. return "政府机构"
  985. } else if strings.Contains(a2, b) {
  986. return "医疗单位"
  987. } else if strings.Contains(a3, b) {
  988. return "教育单位"
  989. } else if strings.Contains(a4, b) {
  990. return "金融企业"
  991. } else if strings.Contains(a5, b) {
  992. return "商业公司"
  993. } else {
  994. return "其它"
  995. }
  996. return "其它"
  997. }