task.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. package main
  2. import (
  3. util "app.yhyue.com/data_processing/common_utils"
  4. "app.yhyue.com/data_processing/common_utils/log"
  5. "app.yhyue.com/data_processing/common_utils/mongodb"
  6. "app.yhyue.com/data_processing/common_utils/redis"
  7. "app.yhyue.com/data_processing/common_utils/udp"
  8. "encoding/json"
  9. "field_sync/config"
  10. "field_sync/oss"
  11. "fmt"
  12. "go.uber.org/zap"
  13. "net"
  14. "reflect"
  15. "regexp"
  16. "strings"
  17. )
  18. var (
  19. regLetter = regexp.MustCompile("[a-z]*")
  20. )
  21. func biddingTask(data []byte, mapInfo map[string]interface{}) {
  22. defer util.Catch()
  23. stype := util.ObjToString(mapInfo["stype"])
  24. // 领域标签处理的数据 id段
  25. if stype == "bidding_history" {
  26. MgoB.Save("field_data_record", map[string]interface{}{"gtid": mapInfo["gtid"], "lteid": mapInfo["lteid"], "status": 0})
  27. }
  28. q, _ := mapInfo["query"].(map[string]interface{})
  29. bkey, _ := mapInfo["bkey"].(string)
  30. if q == nil {
  31. q = map[string]interface{}{
  32. "_id": map[string]interface{}{
  33. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  34. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  35. },
  36. }
  37. }
  38. //extract库
  39. extractConn := MgoE.GetMgoConn()
  40. defer MgoE.DestoryMongoConn(extractConn)
  41. extractResult := extractConn.DB(MgoE.DbName).C(config.Conf.DB.MongoE.Coll).Find(q).Select(map[string]interface{}{
  42. "field_source": 0,
  43. "kvtext": 0,
  44. }).Sort("_id").Iter()
  45. eMap := map[string]map[string]interface{}{}
  46. extCount, repeatCount := 0, 0
  47. for tmp := make(map[string]interface{}); extractResult.Next(tmp); extCount++ {
  48. if util.IntAll(tmp["repeat"]) == 1 {
  49. repeatCount++
  50. }
  51. tid := mongodb.BsonIdToSId(tmp["_id"])
  52. eMap[tid] = tmp
  53. tmp = make(map[string]interface{})
  54. }
  55. log.Info("抽取表", zap.Int("数据量", extCount), zap.Int("重复数据量", repeatCount))
  56. //bidding库
  57. biddingConn := MgoB.GetMgoConn()
  58. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  59. log.Info("bidding表", zap.Int64("同步总数:", count))
  60. c := 0
  61. if count < 200000 {
  62. var res []map[string]interface{}
  63. result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
  64. "contenthtml": 0,
  65. }).Iter()
  66. for tmp := make(map[string]interface{}); result.Next(tmp); {
  67. res = append(res, tmp)
  68. tmp = make(map[string]interface{})
  69. }
  70. MgoB.DestoryMongoConn(biddingConn)
  71. log.Info("查询结果", zap.Int64("bidding", count), zap.Int("抽取:", extCount))
  72. c = doIndex(res, eMap, bkey, stype)
  73. } else {
  74. log.Info("查询结果", zap.Int64("数据量太大,放弃", count))
  75. MgoB.DestoryMongoConn(biddingConn)
  76. }
  77. log.Info("bidding sync...over", zap.Int64("all", count), zap.Int("extract sync", c))
  78. NextNode(mapInfo, stype)
  79. NextNodePro(mapInfo, stype)
  80. NextNodeTidb(mapInfo, stype)
  81. if stype == "bidding_history" {
  82. NextNodeBidData(mapInfo) // bidding-data数据
  83. NextNodeTidbQyxy(mapInfo) // tidb-企业数据
  84. NextNodeHn(mapInfo)
  85. }
  86. }
  87. func doIndex(infos []map[string]interface{}, eMap map[string]map[string]interface{}, bkey, stype string) int {
  88. syncNo := 0 //抽取表数据同步数量
  89. //对比两张表数据,减少查询次数
  90. var compare map[string]interface{}
  91. var bidUpdate [][]map[string]interface{}
  92. var extUpdate [][]map[string]interface{}
  93. //SaveEsLock := &sync.Mutex{}
  94. log.Info("start ...")
  95. for n, tmp := range infos {
  96. tid := mongodb.BsonIdToSId(tmp["_id"])
  97. update := map[string]interface{}{} //要更新的mongo数据
  98. //对比方法----------------
  99. if eMap[tid] != nil {
  100. compare = eMap[tid]
  101. if stype == "bidding" {
  102. // 增量id段 正常数据
  103. if dg := util.IntAll(compare["dataging"]); dg == 1 { //extract中dataging=1跳过
  104. tmp = make(map[string]interface{})
  105. compare = nil
  106. continue
  107. }
  108. delete(eMap, tid)
  109. }
  110. if stype == "bidding_history" {
  111. //增量id段 历史数据
  112. if compare["history_updatetime"] == nil { //extract中history_updatetime不存在跳过
  113. tmp = make(map[string]interface{})
  114. compare = nil
  115. continue
  116. }
  117. delete(eMap, tid)
  118. }
  119. syncNo++
  120. for _, k := range config.Conf.Serve.FieldS {
  121. v1 := compare[k] //extract
  122. v2 := tmp[k] //bidding
  123. if v2 == nil && v1 != nil {
  124. update[k] = v1
  125. } else if v2 != nil && v1 != nil {
  126. update[k] = v1
  127. } else if v2 != nil && v1 == nil {
  128. if k == "city" || k == "district" {
  129. update[k] = ""
  130. }
  131. }
  132. }
  133. if util.IntAll(compare["repeat"]) == 1 {
  134. update["extracttype"] = -1
  135. update["dataprocess"] = 7
  136. } else {
  137. update["extracttype"] = 1
  138. update["dataprocess"] = 8
  139. }
  140. } else {
  141. compare = nil
  142. if util.IntAll(tmp["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
  143. update["dataging"] = 0
  144. }
  145. update["dataprocess"] = 8
  146. }
  147. //下面可以多线程跑的--->
  148. //处理分类
  149. if compare != nil { //extract
  150. fieldFun(compare, update)
  151. compare = nil
  152. }
  153. //------------------对比结束
  154. //处理key descript
  155. if bkey == "" {
  156. DealInfo(&tmp, &update)
  157. }
  158. // entidlist
  159. extractMap := make(map[string]interface{})
  160. if update["s_winner"] != "" {
  161. cid := companyFun(update)
  162. if len(cid) > 0 {
  163. tmp["entidlist"] = cid
  164. update["entidlist"] = cid
  165. extractMap["entidlist"] = cid
  166. }
  167. }
  168. // 6.10 剑鱼发布信息分类处理, 写在这里是为了修改抽取表
  169. typeFunc(tmp, update, extractMap)
  170. if len(extractMap) > 0 {
  171. if extractMap["toptype"] != nil && extractMap["subtype"] == nil {
  172. //updateExtPool <- []map[string]interface{}{
  173. // {"_id": tmp["_id"]},
  174. // {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
  175. //}
  176. extUpdate = append(extUpdate, []map[string]interface{}{
  177. {"_id": tmp["_id"]},
  178. {"$set": extractMap, "$unset": map[string]interface{}{"subtype": ""}},
  179. })
  180. } else {
  181. //updateExtPool <- []map[string]interface{}{
  182. // {"_id": tmp["_id"]},
  183. // {"$set": extractMap},
  184. //}
  185. extUpdate = append(extUpdate, []map[string]interface{}{
  186. {"_id": tmp["_id"]},
  187. {"$set": extractMap},
  188. })
  189. }
  190. if len(extUpdate) >= MgoBulkSize {
  191. tmps := extUpdate
  192. MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  193. extUpdate = [][]map[string]interface{}{}
  194. }
  195. }
  196. // 附件有效字段
  197. if i := validFile(tmp); i != 0 {
  198. if i == -1 {
  199. tmp["isValidFile"] = false
  200. update["isValidFile"] = false
  201. } else {
  202. tmp["isValidFile"] = true
  203. update["isValidFile"] = true
  204. }
  205. }
  206. if len(update) > 0 {
  207. //SaveEsLock.Lock()
  208. bidUpdate = append(bidUpdate, []map[string]interface{}{{
  209. "_id": tmp["_id"],
  210. },
  211. {"$set": update},
  212. })
  213. if len(bidUpdate) >= MgoBulkSize {
  214. tmps := bidUpdate
  215. MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  216. bidUpdate = [][]map[string]interface{}{}
  217. }
  218. //SaveEsLock.Unlock()
  219. //updateBidPool <- []map[string]interface{}{{
  220. // "_id": tmp["_id"],
  221. //},
  222. // {"$set": update},
  223. //}
  224. }
  225. if n%500 == 0 {
  226. log.Info("biddingTask", zap.Int("current", n))
  227. }
  228. tmp = make(map[string]interface{})
  229. }
  230. //SaveEsLock.Lock()
  231. if len(bidUpdate) > 0 {
  232. tmps := bidUpdate
  233. MgoB.UpdateBulk(config.Conf.DB.MongoB.Coll, tmps...)
  234. bidUpdate = [][]map[string]interface{}{}
  235. }
  236. if len(extUpdate) > 0 {
  237. tmps := extUpdate
  238. MgoE.UpdateBulk(config.Conf.DB.MongoE.Coll, tmps...)
  239. extUpdate = [][]map[string]interface{}{}
  240. }
  241. //SaveEsLock.Unlock()
  242. return syncNo
  243. }
  244. // @Description subscopeclass、topscopeclass、package
  245. // @Author J 2022/6/7 5:54 PM
  246. func fieldFun(compare, update map[string]interface{}) {
  247. subscopeclass, _ := compare["subscopeclass"].([]interface{}) //subscopeclass
  248. if subscopeclass != nil {
  249. m1 := map[string]bool{}
  250. newclass := []string{}
  251. for _, sc := range subscopeclass {
  252. sclass, _ := sc.(string)
  253. if !m1[sclass] {
  254. m1[sclass] = true
  255. newclass = append(newclass, sclass)
  256. }
  257. }
  258. update["s_subscopeclass"] = strings.Join(newclass, ",")
  259. update["subscopeclass"] = newclass
  260. }
  261. topscopeclass, _ := compare["topscopeclass"].([]interface{}) //topscopeclass
  262. if topscopeclass != nil {
  263. m2 := map[string]bool{}
  264. newclass := []string{}
  265. for _, tc := range topscopeclass {
  266. tclass, _ := tc.(string)
  267. tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
  268. if !m2[tclass] {
  269. m2[tclass] = true
  270. newclass = append(newclass, tclass)
  271. }
  272. }
  273. update["topscopeclass"] = topscopeclass
  274. update["s_topscopeclass"] = strings.Join(newclass, ",")
  275. }
  276. if package1 := compare["package"]; package1 != nil {
  277. packageM, _ := package1.(map[string]interface{})
  278. update["package"] = packageM
  279. for _, p := range packageM {
  280. pm, _ := p.(map[string]interface{})
  281. if util.ObjToString(pm["winner"]) != "" || util.Float64All(pm["budget"]) > 0 ||
  282. util.Float64All(pm["bidamount"]) > 0 {
  283. update["multipackage"] = 1
  284. break
  285. }
  286. }
  287. } else {
  288. update["multipackage"] = 0
  289. }
  290. }
  291. // @Description entidlist
  292. // @Author J 2022/6/7 2:36 PM
  293. func companyFun(tmp map[string]interface{}) (cid []string) {
  294. sWinnerarr := strings.Split(util.ObjToString(tmp["s_winner"]), ",")
  295. for _, w := range sWinnerarr {
  296. if w != "" {
  297. id := redis.GetStr("qyxy_id", w)
  298. if id == "" {
  299. ents, _ := MgoQ.Find(config.Conf.DB.MongoQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
  300. if len(*ents) > 0 {
  301. id = util.ObjToString((*ents)[0]["_id"])
  302. redis.PutCKV("qyxy_id", w, id)
  303. } else {
  304. ent, _ := MgoP.FindOne(config.Conf.DB.MongoP.Coll, map[string]interface{}{"history_name": w})
  305. if len(*ent) > 0 {
  306. id = util.ObjToString((*ent)["company_id"])
  307. redis.PutCKV("qyxy_id", w, id)
  308. }
  309. }
  310. }
  311. if id == "" {
  312. id = "-"
  313. }
  314. cid = append(cid, id)
  315. }
  316. }
  317. return cid
  318. }
  319. // @Description update 修改bidding表,extractM修改抽取表
  320. // @Author J 2022/6/10 10:29 AM
  321. func typeFunc(tmp, update, extractM map[string]interface{}) {
  322. if jyData, ok := tmp["jyfb_data"].(map[string]interface{}); ok {
  323. if t := util.ObjToString(jyData["type"]); t != "" {
  324. switch t {
  325. //case "采购信息":
  326. case "招标公告":
  327. if util.ObjToString(tmp["toptype"]) != "招标" {
  328. update["toptype"] = "招标"
  329. extractM["toptype"] = "招标"
  330. delete(update, "subtype")
  331. }
  332. case "采购意向":
  333. if util.ObjToString(tmp["toptype"]) != "采购意向" {
  334. update["toptype"] = "采购意向"
  335. update["subtype"] = "采购意向"
  336. extractM["toptype"] = "采购意向"
  337. extractM["subtype"] = "采购意向"
  338. }
  339. case "招标预告":
  340. if util.ObjToString(tmp["toptype"]) != "预告" {
  341. update["toptype"] = "预告"
  342. extractM["toptype"] = "预告"
  343. delete(update, "subtype")
  344. }
  345. case "招标结果":
  346. if util.ObjToString(tmp["toptype"]) != "结果" {
  347. update["toptype"] = "结果"
  348. extractM["toptype"] = "结果"
  349. delete(update, "subtype")
  350. }
  351. }
  352. }
  353. }
  354. }
  355. // @Description 附件有效字段(isValidFile)
  356. // @Author J 2022/7/8 14:41
  357. func validFile(tmp map[string]interface{}) int {
  358. isContinue := false
  359. if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
  360. if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
  361. for _, att := range atts {
  362. if att == nil {
  363. continue
  364. }
  365. if reflect.TypeOf(att).String() == "string" {
  366. continue
  367. }
  368. att1 := att.(map[string]interface{})
  369. if fid := util.ObjToString(att1["fid"]); fid != "" {
  370. isContinue = true
  371. break
  372. }
  373. }
  374. if isContinue {
  375. if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
  376. if len(attachTxt) > 0 {
  377. for _, at := range attachTxt {
  378. at1 := at.(map[string]interface{})
  379. if len(at1) > 0 {
  380. for k, _ := range at1 {
  381. if reflect.TypeOf(at1[k]).String() == "string" {
  382. continue
  383. }
  384. at2 := at1[k].(map[string]interface{})
  385. s := strings.ToLower(util.ObjToString(at2["file_name"]))
  386. if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") ||
  387. strings.Contains(s, "pdf") {
  388. if strings.Contains(s, "swf") || strings.Contains(s, "html") {
  389. return -1
  390. } else if AnalysisFile(oss.OssGetObject(util.ObjToString(at2["attach_url"]))) {
  391. return 1
  392. }
  393. }
  394. }
  395. break
  396. } else {
  397. break
  398. }
  399. }
  400. }
  401. }
  402. flag := false
  403. for _, att := range atts {
  404. if att == nil {
  405. continue
  406. }
  407. if reflect.TypeOf(att).String() == "string" {
  408. continue
  409. }
  410. att1 := att.(map[string]interface{})
  411. if fid := util.ObjToString(att1["fid"]); fid != "" {
  412. ftype := strings.ToLower(util.ObjToString(tmp["ftype"]))
  413. if ftype != "swf" && ftype != "html" && oss.OssObjExists("jy-datafile", fid) {
  414. return 1
  415. } else {
  416. flag = true
  417. }
  418. }
  419. }
  420. if flag {
  421. return -1
  422. }
  423. }
  424. }
  425. }
  426. return 0
  427. }
  428. // @Description id不变,内容变化 重新索引数据
  429. // @Author J 2022/8/10 13:29
  430. func taskinfo(id string) {
  431. tmp, _ := MgoB.FindById("bidding", id, nil)
  432. if tmp == nil || len(*tmp) == 0 {
  433. log.Info(fmt.Sprintf("taskinfo bidding id=%s 未查询到数据", id))
  434. return
  435. }
  436. extractM, _ := MgoE.FindById(config.Conf.DB.MongoE.Coll, id, nil)
  437. if extractM == nil || len(*extractM) == 0 {
  438. extractM, _ = MgoE.FindById(config.Conf.DB.MongoE.Coll1, id, nil)
  439. if extractM == nil || len(*extractM) == 0 {
  440. log.Info(fmt.Sprintf("taskinfo extract id=%s 未查询到数据", id))
  441. return
  442. }
  443. }
  444. update := map[string]interface{}{} //要更新的mongo数据
  445. //更新bidding表字段
  446. for _, k := range config.Conf.Serve.FieldS {
  447. v1 := (*extractM)[k] //extract
  448. v2 := (*tmp)[k] //bidding
  449. if v2 == nil && v1 != nil {
  450. update[k] = v1
  451. } else if v2 != nil && v1 != nil {
  452. update[k] = v1
  453. } else if v2 != nil && v1 == nil {
  454. if k == "city" || k == "district" {
  455. update[k] = ""
  456. }
  457. }
  458. }
  459. if util.IntAll((*extractM)["repeat"]) == 1 {
  460. update["extracttype"] = -1
  461. update["dataprocess"] = 7
  462. } else {
  463. update["extracttype"] = 1
  464. update["dataprocess"] = 8
  465. }
  466. //处理分类
  467. fieldFun(*extractM, update)
  468. extractMap := make(map[string]interface{})
  469. if util.ObjToString((*tmp)["s_winner"]) != "" {
  470. cid := companyFun(*tmp)
  471. if len(cid) > 0 {
  472. update["entidlist"] = cid
  473. extractMap["entidlist"] = cid
  474. }
  475. MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
  476. //updateExtPool <- []map[string]interface{}{
  477. // {"_id": mongodb.StringTOBsonId(id)},
  478. // {"$set": extractMap},
  479. //}
  480. }
  481. // 附件有效字段
  482. if i := validFile(*tmp); i != 0 {
  483. if i == -1 {
  484. update["isValidFile"] = false
  485. } else {
  486. update["isValidFile"] = true
  487. }
  488. }
  489. util.Debug(update)
  490. if len(update) > 0 {
  491. MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
  492. //updateBidPool <- []map[string]interface{}{{
  493. // "_id": mongodb.StringTOBsonId(id),
  494. //},
  495. // {"$set": update},
  496. //}
  497. }
  498. mapinfo := map[string]interface{}{
  499. "infoid": id,
  500. "stype": "index-by-id",
  501. }
  502. datas, _ := json.Marshal(mapinfo)
  503. var next = &net.UDPAddr{
  504. IP: net.ParseIP(config.Conf.Udp.Next.Addr),
  505. Port: util.IntAll(config.Conf.Udp.Next.Port),
  506. }
  507. log.Info("nsq data over", zap.Any("es", next), zap.String("mapinfo", string(datas)))
  508. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, next)
  509. }