bidding_es.go 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. package main
  2. import (
  3. "encoding/json"
  4. "esindex/config"
  5. "esindex/oss"
  6. "fmt"
  7. "github.com/spf13/viper"
  8. "go.mongodb.org/mongo-driver/bson"
  9. "go.uber.org/zap"
  10. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  11. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  12. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  13. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  14. "reflect"
  15. "regexp"
  16. "strconv"
  17. "strings"
  18. "sync"
  19. "time"
  20. "unicode/utf8"
  21. )
  22. var (
  23. TimeV1 = regexp.MustCompile("^(\\d{4})[年.]?$")
  24. TimeV2 = regexp.MustCompile("^(\\d{4})[年./-]?(\\d{1,2})[月./-]?$")
  25. TimeV3 = regexp.MustCompile("^(\\d{4})[年./-]?(\\d{1,2})[月./-]?(\\d{1,2})[日]?$")
  26. TimeClear = regexp.MustCompile("[年|月|日|/|.|-]")
  27. filterSpace = regexp.MustCompile("<[^>]*?>|[\\s\u3000\u2003\u00a0]")
  28. date1 = regexp.MustCompile("20[0-2][0-9][年|\\-/.][0-9]{1,2}[月|\\-/.][0-9]{1,2}[日]?")
  29. HtmlReg = regexp.MustCompile("<[^>]+>")
  30. )
  31. // biddingDetailTask 针对 detail contenthtml 二个字段的索引;bidding_detail
  32. func biddingDetailTask(mapInfo map[string]interface{}) {
  33. defer util.Catch()
  34. //stype := util.ObjToString(mapInfo["stype"])
  35. q, _ := mapInfo["query"].(map[string]interface{})
  36. if q == nil {
  37. q = map[string]interface{}{
  38. "_id": map[string]interface{}{
  39. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  40. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  41. },
  42. }
  43. } else {
  44. //针对gte/lte,单独转换
  45. q = convertToMongoID(q)
  46. }
  47. ch := make(chan bool, 10)
  48. wg := &sync.WaitGroup{}
  49. //bidding库
  50. biddingConn := MgoB.GetMgoConn()
  51. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  52. log.Info("biddingDetailTask", zap.Int64("同步总数:", count))
  53. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(config.Conf.DB.MongoB.Coll).Find(&q).Select(nil).Iter()
  54. c1, index := 0, 0
  55. var indexLock sync.Mutex
  56. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  57. if c1%1000 == 0 {
  58. log.Info("biddingDetailTask", zap.Int("current:", c1))
  59. log.Info("biddingDetailTask", zap.Any("current:_id =>", tmp["_id"]))
  60. }
  61. ch <- true
  62. wg.Add(1)
  63. // 创建一个新的map用于goroutine,避免重用
  64. docCopy := make(map[string]interface{})
  65. for k, v := range tmp {
  66. docCopy[k] = v
  67. }
  68. go func(tmp map[string]interface{}) {
  69. defer func() {
  70. <-ch
  71. wg.Done()
  72. }()
  73. indexLock.Lock()
  74. index++
  75. indexLock.Unlock()
  76. //
  77. detail, _ := tmp["detail"].(string)
  78. detail = filterSpace.ReplaceAllString(detail, "")
  79. detail_new := ""
  80. if tmp["cleartag"] != nil {
  81. if tmp["cleartag"].(bool) {
  82. text, _ := FilterDetail(detail)
  83. detail_new = util.ObjToString(tmp["title"]) + " " + text
  84. } else {
  85. detail_new = util.ObjToString(tmp["title"]) + " " + detail
  86. }
  87. } else {
  88. text, _ := FilterDetail(detail)
  89. detail_new = util.ObjToString(tmp["title"]) + " " + text
  90. }
  91. //
  92. insert := map[string]interface{}{
  93. "detail": detail_new,
  94. "id": mongodb.BsonIdToSId(tmp["_id"]),
  95. "_id": mongodb.BsonIdToSId(tmp["_id"]),
  96. "contenthtml": tmp["contenthtml"],
  97. }
  98. //
  99. saveDetailEsPool <- insert
  100. }(docCopy)
  101. tmp = map[string]interface{}{}
  102. }
  103. wg.Wait()
  104. log.Info("biddingDetailTask over", zap.Int("count", c1), zap.Int("index", index))
  105. }
  106. func biddingTask(mapInfo map[string]interface{}) {
  107. defer util.Catch()
  108. // 同时处理详情索引
  109. go biddingDetailTask(mapInfo)
  110. stype := util.ObjToString(mapInfo["stype"])
  111. q, _ := mapInfo["query"].(map[string]interface{})
  112. if q == nil {
  113. q = map[string]interface{}{
  114. "_id": map[string]interface{}{
  115. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  116. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  117. },
  118. }
  119. } else {
  120. //针对gte/lte,单独转换
  121. q = convertToMongoID(q)
  122. }
  123. ch := make(chan bool, 10)
  124. wg := &sync.WaitGroup{}
  125. //bidding库
  126. biddingConn := MgoB.GetMgoConn()
  127. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  128. log.Info("bidding表", zap.Int64("同步总数:", count))
  129. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(config.Conf.DB.MongoB.Coll).Find(&q).Select(map[string]interface{}{
  130. "contenthtml": 0,
  131. }).Iter()
  132. c1, index := 0, 0
  133. var indexLock sync.Mutex
  134. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  135. if c1%1000 == 0 {
  136. log.Info("biddingTask", zap.Int("current:", c1))
  137. log.Info("biddingTask", zap.Any("current:_id =>", tmp["_id"]))
  138. }
  139. ch <- true
  140. wg.Add(1)
  141. // 创建一个新的map用于goroutine,避免重用
  142. docCopy := make(map[string]interface{})
  143. for k, v := range tmp {
  144. docCopy[k] = v
  145. }
  146. go func(tmp map[string]interface{}) {
  147. defer func() {
  148. <-ch
  149. wg.Done()
  150. }()
  151. if sensitive := util.ObjToString(tmp["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  152. tmp = make(map[string]interface{})
  153. return
  154. }
  155. //只针对增量数据处理;全量数据 需要用extracttype字段判断
  156. //7: 重复数据
  157. //8: 不重
  158. //if util.IntAll(tmp["dataprocess"]) != 8 {
  159. // return
  160. //}
  161. //// 增量数据使用上面判断;全量数据使用下面配置
  162. //-1:重复 ,1:不重复 ,0:入库 9:分类
  163. if util.IntAll(tmp["extracttype"]) != 1 {
  164. return
  165. }
  166. // 优选字段,和 extracttype 一致,isprefer 1:优选 -1:取消优选
  167. //if util.IntAll(tmp["isprefer"]) != 1 {
  168. // return
  169. //}
  170. //针对产权数据,暂时不入es 索引库
  171. if util.IntAll(tmp["infoformat"]) == 3 {
  172. return
  173. }
  174. /**
  175. 数据抽取时,有的数据的发布时间是之前的,属于增量历史数据,在判重和同步到bidding表是,会添加history_updatetime
  176. 字段,所以下面判断才会处理
  177. */
  178. if stype == "bidding_history" && tmp["history_updatetime"] == nil {
  179. return
  180. }
  181. //开启OSS时,detail需要重新获取
  182. if config.Conf.Env.Oss {
  183. id := mongodb.BsonIdToSId(tmp["_id"])
  184. val := oss.OssGetObject(id, config.Conf.DB.Oss.DetailBucket)
  185. tmp["detail"] = val
  186. }
  187. indexLock.Lock()
  188. index++
  189. indexLock.Unlock()
  190. newTmp, update := GetEsField(tmp, stype)
  191. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  192. newTmp["old_preferid"] = tmp["old_preferid"] //上次优选ID,需要es 删除
  193. //针对中国政府采购网,单独处理
  194. if util.ObjToString(tmp["site"]) == "中国政府采购网" {
  195. objectType := MatchService(tmp)
  196. if objectType != "" {
  197. newTmp["object_type"] = objectType
  198. }
  199. }
  200. newTmp["stype"] = stype
  201. pici := util.Int64All(tmp["pici"])
  202. //MongoDB 存在pici 字段
  203. if pici > 0 {
  204. newTmp["pici"] = pici
  205. }
  206. //
  207. //if stype == "bidding" || stype == "bidding_history" || stype == "index-by-id" {
  208. // newTmp["stype"] = stype
  209. // //之前存在pici,就不在添加
  210. // if pici, ok := tmp["pici"]; ok {
  211. // newTmp["pici"] = pici
  212. // //log.Info("dddddddd", zap.Any("bidding_id", tmp["_id"]), zap.Any("pici", pici))
  213. // } else {
  214. // newTmp["pici"] = time.Now().Unix()
  215. // update["pici"] = time.Now().Unix()
  216. // }
  217. //
  218. //}
  219. //todo 处理中国移动定制标签
  220. if len(globalRegs) > 0 && len(MatchArr) > 0 {
  221. gs, _, _ := TaskTags(tmp, globalRegs)
  222. if len(gs) > 0 {
  223. tags, match, add := TaskTags(tmp, MatchArr)
  224. if len(tags) > 0 {
  225. newTmp["mobile_tag"] = tags
  226. update["mobile_tag"] = tags
  227. log.Info("biddingTask", zap.Any(mongodb.BsonIdToSId(tmp["_id"]), match+","+add))
  228. }
  229. }
  230. }
  231. //
  232. saveEsPool <- newTmp
  233. if len(update) > 0 {
  234. updateBiddingPool <- map[string]interface{}{
  235. "_id": tmp["_id"],
  236. "set": update,
  237. }
  238. //if util.ObjToString(newTmp["spidercode"]) == "a_jyxxfbpt_gg" {
  239. // // 剑鱼信息发布数据 通过udp通知信息发布程序
  240. // go UdpMethod(mongodb.BsonIdToSId(newTmp["_id"]))
  241. //}
  242. }
  243. }(docCopy)
  244. tmp = map[string]interface{}{}
  245. }
  246. wg.Wait()
  247. log.Info("biddingTask over", zap.Int("count", c1), zap.Int("index", index))
  248. //更新状态
  249. if stype == "bidding" {
  250. uq := bson.M{"gtid": bson.M{"$gte": util.ObjToString(mapInfo["gtid"])},
  251. "lteid": bson.M{"$lte": util.ObjToString(mapInfo["lteid"])}}
  252. if config.Conf.Env.Ai {
  253. MgoBOld.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess_ai": 8, "updatetime": time.Now().Unix(), "index_num": index}}, false, true)
  254. } else {
  255. MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 9, "updatetime": time.Now().Unix(), "index_num": index}}, false, true)
  256. }
  257. }
  258. ////发送udp,附件补采 才需要
  259. //data := map[string]interface{}{
  260. // "stype": "update",
  261. // "gtid": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  262. // "lteid": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  263. //}
  264. //target := &net.UDPAddr{
  265. // Port: 1782,
  266. // IP: net.ParseIP("127.0.0.1"),
  267. //}
  268. //bytes, _ := json.Marshal(data)
  269. //err := UdpClient.WriteUdp(bytes, udp.OP_TYPE_DATA, target)
  270. //if err != nil {
  271. // log.Info("biddingTask ", zap.Any("WriteUdp err", err), zap.Any("target", target))
  272. //}
  273. //
  274. //log.Info("biddingTask ", zap.Any("target", target), zap.Any("data", data))
  275. //
  276. //
  277. //重采平台需要
  278. //mapInfo["stype"] = ""
  279. //datas, _ := json.Marshal(mapInfo)
  280. //var next = &net.UDPAddr{
  281. // IP: net.ParseIP("127.0.0.1"),
  282. // Port: 1910,
  283. //}
  284. //log.Info("bidding index es over", zap.Any("es", next), zap.String("mapinfo", string(datas)))
  285. //}
  286. }
  287. // biddingAllTask 补充存量数据
  288. func biddingAllTask(mapInfo map[string]interface{}) {
  289. defer util.Catch()
  290. stype := util.ObjToString(mapInfo["stype"])
  291. q, _ := mapInfo["query"].(map[string]interface{})
  292. if q == nil {
  293. q = map[string]interface{}{
  294. "_id": map[string]interface{}{
  295. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  296. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  297. },
  298. }
  299. } else {
  300. //针对gte/lte,单独转换
  301. q = convertToMongoID(q)
  302. }
  303. ch := make(chan bool, 50)
  304. wg := &sync.WaitGroup{}
  305. //bidding库
  306. biddingConn := MgoB.GetMgoConn()
  307. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(config.Conf.DB.MongoB.Coll).Find(&q).Select(map[string]interface{}{
  308. "contenthtml": 0,
  309. }).Iter()
  310. c1, index := 0, 0
  311. var indexLock sync.Mutex
  312. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  313. if c1%20000 == 0 {
  314. log.Info("biddingAllTask", zap.Int("current:", c1))
  315. log.Info("biddingAllTask", zap.Any("current:_id =>", tmp["_id"]))
  316. }
  317. ch <- true
  318. wg.Add(1)
  319. go func(tmp map[string]interface{}) {
  320. defer func() {
  321. <-ch
  322. wg.Done()
  323. }()
  324. if sensitive := util.ObjToString(tmp["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  325. tmp = make(map[string]interface{})
  326. return
  327. }
  328. // 针对存量数据,重复数据不进索引
  329. if util.IntAll(tmp["extracttype"]) == -1 {
  330. return
  331. }
  332. //针对产权数据,暂时不入es 索引库
  333. if util.IntAll(tmp["infoformat"]) == 3 {
  334. return
  335. }
  336. //开启OSS时,detail需要重新获取
  337. if config.Conf.Env.Oss {
  338. id := mongodb.BsonIdToSId(tmp["_id"])
  339. val := oss.OssGetObject(id, config.Conf.DB.Oss.DetailBucket)
  340. tmp["detail"] = val
  341. }
  342. indexLock.Lock()
  343. index++
  344. indexLock.Unlock()
  345. newTmp, update := GetEsField(tmp, stype)
  346. newTmp["old_preferid"] = tmp["old_preferid"] //上次优选ID,需要es 删除
  347. //针对中国政府采购网,单独处理
  348. if util.ObjToString(tmp["site"]) == "中国政府采购网" {
  349. objectType := MatchService(tmp)
  350. if objectType != "" {
  351. newTmp["object_type"] = objectType
  352. }
  353. }
  354. //todo 处理中国移动定制标签
  355. if len(globalRegs) > 0 && len(MatchArr) > 0 {
  356. gs, _, _ := TaskTags(tmp, globalRegs)
  357. if len(gs) > 0 {
  358. tags, match, add := TaskTags(tmp, MatchArr)
  359. if len(tags) > 0 {
  360. newTmp["mobile_tag"] = tags
  361. update["mobile_tag"] = tags
  362. log.Info("biddingTask", zap.Any(mongodb.BsonIdToSId(tmp["_id"]), match+","+add))
  363. }
  364. }
  365. }
  366. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  367. if len(update) > 0 {
  368. updateBiddingPool <- map[string]interface{}{
  369. "_id": tmp["_id"],
  370. "set": update,
  371. }
  372. }
  373. saveEsPool <- newTmp
  374. }(tmp)
  375. tmp = map[string]interface{}{}
  376. }
  377. wg.Wait()
  378. log.Info("biddingAllTask over", zap.Int("count", c1), zap.Int("index", index))
  379. }
  380. // biddingAllDataTask 处理配置文件的存量数据
  381. func biddingAllDataTask() {
  382. type Biddingall struct {
  383. Coll string
  384. Gtid string
  385. Lteid string
  386. }
  387. type RoutinesConf struct {
  388. Num int
  389. }
  390. type AllConf struct {
  391. All map[string]Biddingall
  392. Routines RoutinesConf
  393. }
  394. var all AllConf
  395. viper.SetConfigFile("biddingall.toml")
  396. viper.SetConfigName("biddingall") // 配置文件名称(无扩展名)
  397. viper.SetConfigType("toml") // 如果配置文件的名称中没有扩展名,则需要配置此项
  398. viper.AddConfigPath("./")
  399. err := viper.ReadInConfig() // 查找并读取配置文件
  400. if err != nil { // 处理读取配置文件的错误
  401. fmt.Println("ReadInConfig err =>", err)
  402. return
  403. }
  404. err = viper.Unmarshal(&all)
  405. if err != nil {
  406. fmt.Println("biddingAllDataTask Unmarshal err =>", err)
  407. return
  408. }
  409. for k, conf := range all.All {
  410. go dealData(conf.Coll, conf.Gtid, conf.Lteid, k, all.Routines.Num)
  411. }
  412. }
  413. func dealData(coll, gtid, lteid, kword string, routines int) {
  414. ch := make(chan bool, routines)
  415. wg := &sync.WaitGroup{}
  416. q := map[string]interface{}{
  417. "_id": map[string]interface{}{
  418. "$gt": mongodb.StringTOBsonId(gtid),
  419. "$lte": mongodb.StringTOBsonId(lteid),
  420. },
  421. }
  422. log.Info("dealData", zap.Any(kword, q))
  423. biddingConn := MgoB.GetMgoConn()
  424. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(coll).Find(&q).Select(map[string]interface{}{
  425. "contenthtml": 0,
  426. }).Iter()
  427. c1, index := 0, 0
  428. var indexLock sync.Mutex
  429. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  430. if c1%20000 == 0 {
  431. log.Info(kword, zap.Int("current:", c1))
  432. log.Info(kword, zap.Any("current:_id =>", tmp["_id"]))
  433. }
  434. ch <- true
  435. wg.Add(1)
  436. go func(tmp map[string]interface{}) {
  437. defer func() {
  438. <-ch
  439. wg.Done()
  440. }()
  441. if sensitive := util.ObjToString(tmp["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  442. tmp = make(map[string]interface{})
  443. return
  444. }
  445. // 针对存量数据,重复数据不进索引
  446. if util.IntAll(tmp["extracttype"]) == -1 {
  447. return
  448. }
  449. //针对产权数据,暂时不入es 索引库
  450. if util.IntAll(tmp["infoformat"]) == 3 {
  451. return
  452. }
  453. indexLock.Lock()
  454. index++
  455. indexLock.Unlock()
  456. newTmp, update := GetEsField(tmp, "biddingall")
  457. //针对中国政府采购网,单独处理
  458. if util.ObjToString(tmp["site"]) == "中国政府采购网" {
  459. objectType := MatchService(tmp)
  460. if objectType != "" {
  461. newTmp["object_type"] = objectType
  462. }
  463. }
  464. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  465. if len(update) > 0 {
  466. updateBiddingPool <- map[string]interface{}{
  467. "_id": tmp["_id"],
  468. "set": update,
  469. }
  470. }
  471. //saveEsPool <- newTmp
  472. saveBiddingAllPool <- newTmp
  473. }(tmp)
  474. tmp = map[string]interface{}{}
  475. }
  476. wg.Wait()
  477. log.Info(fmt.Sprintf("%s over", kword), zap.Int("count", c1), zap.Int("index", index))
  478. }
  479. func biddingTaskById(mapInfo map[string]interface{}) {
  480. defer util.Catch()
  481. stype := util.ObjToString(mapInfo["stype"])
  482. infoid := util.ObjToString(mapInfo["infoid"])
  483. tmp, _ := MgoB.FindById(config.Conf.DB.MongoB.Coll, infoid, map[string]interface{}{"contenthtml": 0})
  484. ////判断是否是预处理数据;pre_id 是标识
  485. //if config.Conf.Env.OpenPre {
  486. // if pre_id, ok := (*tmp)["pre_id"]; ok {
  487. // preID := util.ObjToString(pre_id)
  488. // if preID != "" {
  489. // deletePreEsData(preID)
  490. // }
  491. // }
  492. //}
  493. if sensitive := util.ObjToString((*tmp)["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  494. return
  495. }
  496. //开启OSS时,detail需要重新获取
  497. if config.Conf.Env.Oss {
  498. id := mongodb.BsonIdToSId((*tmp)["_id"])
  499. val := oss.OssGetObject(id, config.Conf.DB.Oss.DetailBucket)
  500. (*tmp)["detail"] = val
  501. }
  502. if util.IntAll((*tmp)["extracttype"]) == 1 {
  503. newTmp, update := GetEsField(*tmp, stype)
  504. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  505. newTmp["old_preferid"] = (*tmp)["old_preferid"] //上次优选ID,需要es 删除
  506. if len(update) > 0 {
  507. //updateBiddingPool <- []map[string]interface{}{{
  508. // "_id": mongodb.StringTOBsonId(infoid),
  509. //},
  510. // {"$set": update},
  511. //}
  512. }
  513. saveEsPool <- newTmp
  514. }
  515. log.Info("biddingTaskById over", zap.Any("mapInfo", mapInfo))
  516. }
  517. // GetEsField @Description ES字段
  518. // @Author J 2022/6/7 11:34 AM
  519. func GetEsField(tmp map[string]interface{}, stype string) (map[string]interface{}, map[string]interface{}) {
  520. newTmp := make(map[string]interface{})
  521. update := make(map[string]interface{}) // bidding 修改字段
  522. saveErr := make(map[string]interface{})
  523. //for field, ftype := range config.Conf.DB.Es.FieldEs {
  524. for field, ftype := range BiddingField {
  525. if tmp[field] != nil { //
  526. if field == "purchasinglist" { //标的物处理
  527. purchasinglist_new := []map[string]interface{}{}
  528. if pcl, _ := tmp[field].([]interface{}); len(pcl) > 0 {
  529. for _, ls := range pcl {
  530. lsm_new := make(map[string]interface{})
  531. lsm := ls.(map[string]interface{})
  532. for pf, pftype := range BiddingLevelField[field] {
  533. lsmv := lsm[pf]
  534. if lsmv != nil && reflect.TypeOf(lsmv).String() == pftype {
  535. lsm_new[pf] = lsm[pf]
  536. }
  537. }
  538. if lsm_new != nil && len(lsm_new) > 0 {
  539. purchasinglist_new = append(purchasinglist_new, lsm_new)
  540. }
  541. }
  542. }
  543. if len(purchasinglist_new) > 0 {
  544. newTmp[field] = purchasinglist_new
  545. }
  546. } else if field == "procurementlist" {
  547. if tmp["procurementlist"] != nil {
  548. var arr []interface{}
  549. plist := tmp["procurementlist"].([]interface{})
  550. for _, p := range plist {
  551. p1 := p.(map[string]interface{})
  552. p2 := make(map[string]interface{})
  553. for k, v := range BiddingLevelField[field] {
  554. if k == "projectname" && util.ObjToString(p1[k]) == "" {
  555. p2[k] = util.ObjToString(tmp["projectname"])
  556. } else if k == "buyer" && util.ObjToString(p1[k]) == "" && util.ObjToString(tmp["buyer"]) != "" {
  557. p2[k] = util.ObjToString(tmp["buyer"])
  558. } else if k == "expurasingtime" && util.ObjToString(p1[k]) != "" {
  559. res := getMethod(util.ObjToString(p1[k]))
  560. if res != 0 {
  561. p2[k] = res
  562. }
  563. } else if p1[k] != nil && reflect.TypeOf(p1[k]).String() == v {
  564. p2[k] = p1[k]
  565. }
  566. }
  567. arr = append(arr, p2)
  568. }
  569. if len(arr) > 0 {
  570. newTmp[field] = arr
  571. }
  572. }
  573. } else if field == "projectscope" {
  574. ps, _ := tmp["projectscope"].(string)
  575. newTmp["projectscope"] = ps
  576. //新版本已无需记录长度
  577. //if len(ps) > pscopeLength {
  578. // saveErr["projectscope"] = ps
  579. // saveErr["projectscope_length"] = len(ps)
  580. //}
  581. } else if field == "winnerorder" { //中标候选
  582. winnerorder_new := []map[string]interface{}{}
  583. if winnerorder, _ := tmp[field].([]interface{}); len(winnerorder) > 0 {
  584. for _, win := range winnerorder {
  585. winMap_new := make(map[string]interface{})
  586. winMap := win.(map[string]interface{})
  587. for wf, wftype := range BiddingLevelField[field] {
  588. wfv := winMap[wf]
  589. if wfv != nil && reflect.TypeOf(wfv).String() == wftype {
  590. if wf == "sort" && util.Int64All(wfv) > 100 {
  591. continue
  592. }
  593. winMap_new[wf] = winMap[wf]
  594. }
  595. }
  596. if winMap_new != nil && len(winMap_new) > 0 {
  597. winnerorder_new = append(winnerorder_new, winMap_new)
  598. }
  599. }
  600. }
  601. if len(winnerorder_new) > 0 {
  602. newTmp[field] = winnerorder_new
  603. }
  604. } else if field == "qualifies" {
  605. //项目资质
  606. qs := []string{}
  607. if q, _ := tmp[field].([]interface{}); len(q) > 0 {
  608. for _, v := range q {
  609. v1 := v.(map[string]interface{})
  610. qs = append(qs, util.ObjToString(v1["key"]))
  611. }
  612. }
  613. if len(qs) > 0 {
  614. newTmp[field] = strings.Join(qs, ",")
  615. }
  616. } else if field == "bidopentime" {
  617. if tmp[field] != nil && tmp["bidendtime"] == nil {
  618. newTmp["bidendtime"] = tmp[field]
  619. newTmp[field] = tmp[field]
  620. } else if tmp[field] == nil && tmp["bidendtime"] != nil {
  621. newTmp["bidendtime"] = tmp[field]
  622. newTmp[field] = tmp["bidendtime"]
  623. } else {
  624. if tmp["bidopentime"] != nil {
  625. newTmp[field] = tmp["bidopentime"]
  626. }
  627. }
  628. } else if field == "detail" { //过滤
  629. detail, _ := tmp[field].(string)
  630. detail = filterSpace.ReplaceAllString(detail, "")
  631. // 不需要再保存记录长度
  632. //if len(detail) > pscopeLength {
  633. // saveErr["detail"] = detail
  634. // saveErr["detail_length"] = len(detail)
  635. //}
  636. if tmp["cleartag"] != nil {
  637. if tmp["cleartag"].(bool) {
  638. text, _ := FilterDetail(detail)
  639. newTmp[field] = util.ObjToString(tmp["title"]) + " " + text
  640. } else {
  641. newTmp[field] = util.ObjToString(tmp["title"]) + " " + detail
  642. }
  643. } else {
  644. text, b := FilterDetail(detail)
  645. newTmp[field] = util.ObjToString(tmp["title"]) + " " + text
  646. update["cleartag"] = b
  647. }
  648. } else if field == "topscopeclass" || field == "entidlist" {
  649. newTmp[field] = tmp[field]
  650. } else if field == "_id" {
  651. newTmp["_id"] = mongodb.BsonIdToSId(tmp["_id"])
  652. newTmp["id"] = mongodb.BsonIdToSId(tmp["_id"])
  653. } else if field == "publishtime" || field == "comeintime" {
  654. //字段类型不正确,特别处理
  655. if tmp[field] != nil && util.Int64All(tmp[field]) > 0 {
  656. newTmp[field] = util.Int64All(tmp[field])
  657. }
  658. } else if field == "package" {
  659. //分包信息处理
  660. packages := dealPackage(tmp)
  661. if len(packages) > 0 {
  662. newTmp["package"] = packages
  663. newTmp["subpackage"] = 1
  664. }
  665. } else if field == "infoformat" {
  666. newTmp[field] = tmp[field]
  667. } else { //其它字段判断数据类型,不正确舍弃
  668. if fieldval := tmp[field]; reflect.TypeOf(fieldval).String() != ftype && ftype != "" {
  669. continue
  670. } else {
  671. if fieldval != "" {
  672. newTmp[field] = fieldval
  673. }
  674. }
  675. }
  676. }
  677. }
  678. // 附件内容长度不做限制,大于长度限制做记录
  679. filetext := getFileText(tmp)
  680. if len([]rune(filetext)) > 10 {
  681. //去除 空格
  682. newTmp["filetext"] = strings.Replace(filetext, " ", "", -1)
  683. if len([]rune(filetext)) > fileLength {
  684. //saveErr["filetext"] = filetext
  685. saveErr["filetext_length"] = len([]rune(filetext))
  686. }
  687. }
  688. YuceEndtime(newTmp) // 预测结果时间
  689. //if stype == "bidding" || stype == "bidding_history" || stype == "index-by-id" {
  690. // newTmp["createtime"] = time.Now().Unix() // es库数据创建时间,只有增量数据有
  691. // newTmp["pici"] = time.Now().Unix() //createtime跟pici一样,为了剑鱼功能需要,并行存在一段时间,之后可以删掉createtime
  692. // update["pici"] = time.Now().Unix()
  693. //}
  694. if len(saveErr) > 0 {
  695. saveErr["infoid"] = mongodb.BsonIdToSId(tmp["_id"])
  696. saveErr["time"] = time.Now().Unix()
  697. saveErrBidPool <- saveErr
  698. }
  699. return newTmp, update
  700. }
  701. // @Description 采购意向 预计采购时间处理
  702. // @Author J 2022/6/7 8:04 PM
  703. func getMethod(str string) int64 {
  704. // Handle "YYYY" format
  705. if TimeV1.MatchString(str) {
  706. arr := TimeV1.FindStringSubmatch(str)
  707. st := arr[1] + "0000"
  708. parseInt, err := strconv.ParseInt(st, 10, 64)
  709. if err == nil {
  710. return parseInt
  711. }
  712. }
  713. // Handle "YYYYMM" or "YYYY/MM" or "YYYY-MM" or "YYYY.MM" format
  714. if TimeV2.MatchString(str) {
  715. arr := TimeV2.FindStringSubmatch(str)
  716. year := arr[1]
  717. month := arr[2]
  718. if len(month) == 1 {
  719. month = "0" + month
  720. }
  721. str2 := year + month + "00"
  722. parseInt, err := strconv.ParseInt(str2, 10, 64)
  723. if err == nil {
  724. return parseInt
  725. }
  726. }
  727. // Handle "YYYYMMDD" or "YYYY/MM/DD" or "YYYY-MM-DD" or "YYYY.MM.DD" format
  728. if TimeV3.MatchString(str) {
  729. match := TimeV3.FindStringSubmatch(str)
  730. if len(match) >= 4 {
  731. year := match[1]
  732. month := match[2]
  733. day := match[3]
  734. if len(month) == 1 {
  735. month = "0" + month
  736. }
  737. if len(day) == 1 {
  738. day = "0" + day
  739. }
  740. dateStr := year + month + day
  741. parseInt, err := strconv.ParseInt(dateStr, 10, 64)
  742. if err == nil {
  743. return parseInt
  744. }
  745. }
  746. }
  747. return 0
  748. }
  749. func FilterDetail(text string) (string, bool) {
  750. b := false // 清理标记
  751. for _, s := range config.Conf.DB.Es.DetailFilter {
  752. reg := regexp.MustCompile(s)
  753. if reg.MatchString(text) {
  754. text = reg.ReplaceAllString(text, "")
  755. if !b {
  756. b = true
  757. }
  758. }
  759. }
  760. return text, b
  761. }
  762. // @Description 附件内容
  763. // @Author J 2022/6/7 1:54 PM
  764. func getFileText(tmp map[string]interface{}) (filetext string) {
  765. if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok {
  766. for _, tmpData1 := range attchMap {
  767. if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok {
  768. for _, result := range tmpData2 {
  769. if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
  770. if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
  771. bs := oss.OssGetObject(attach_url, mongodb.BsonIdToSId(tmp["_id"])) //oss读数据
  772. //附件总长度限制550000,其中最后一个文件长度限制50000
  773. size := config.Conf.DB.Oss.Filesize
  774. if size <= 0 {
  775. size = 500000
  776. }
  777. if utf8.RuneCountInString(filetext+bs) < 50000+size {
  778. filetext += bs + "\n"
  779. } else {
  780. if len(bs) > 50000 {
  781. filetext += bs[0:50000]
  782. } else {
  783. filetext += bs
  784. }
  785. }
  786. //附件总长度限制550000
  787. if utf8.RuneCountInString(filetext) >= 50000+size {
  788. return
  789. }
  790. //正式环境
  791. //if utf8.RuneCountInString(filetext+bs) < fileLength {
  792. // filetext += bs + "\n"
  793. //} else {
  794. // if utf8.RuneCountInString(bs) > fileLength {
  795. // filetext = bs[0:fileLength]
  796. // } else {
  797. // filetext = bs
  798. // }
  799. // break
  800. //}
  801. }
  802. }
  803. }
  804. }
  805. }
  806. }
  807. return
  808. }
  809. // 预测结果时间
  810. func YuceEndtime(tmp map[string]interface{}) {
  811. flag := false
  812. flag2 := false
  813. falseOld := false
  814. scopeOld := []string{"服务采购_法律咨询", "服务采购_会计", "服务采购_物业", "服务采购_审计", "服务采购_安保", "服务采购_仓储物流",
  815. "服务采购_广告宣传印刷"}
  816. scope := []string{"信息技术_运维服务", "信息技术_软件开发", "信息技术_系统集成及安全", "信息技术_其他"}
  817. titles := []string{"短信服务", "短信发送服务"}
  818. details := []string{"短信发送服务", "短信服务平台", "短信服务项目"}
  819. subscopeclass := util.ObjToString(tmp["s_subscopeclass"])
  820. //1.先判断老的
  821. for _, v := range scopeOld {
  822. if strings.Contains(subscopeclass, v) {
  823. falseOld = true
  824. break
  825. }
  826. }
  827. //2.判断满足 s_subscopeclass 条件
  828. for _, v := range scope {
  829. if strings.Contains(subscopeclass, v) {
  830. flag = true
  831. break
  832. }
  833. }
  834. //不满足旧的,判断新的规则
  835. if !falseOld {
  836. //满足 s_subscopeclass ,再去判断title detail
  837. if flag {
  838. title := util.ObjToString(tmp["title"])
  839. for _, v := range titles {
  840. if strings.Contains(title, v) {
  841. flag2 = true
  842. }
  843. }
  844. if !flag2 {
  845. detail := util.ObjToString(tmp["detail"])
  846. for _, v := range details {
  847. if strings.Contains(detail, v) {
  848. flag2 = true
  849. }
  850. }
  851. }
  852. }
  853. if !flag2 {
  854. return
  855. }
  856. }
  857. subtype := util.ObjToString(tmp["subtype"])
  858. if subtype == "成交" || subtype == "合同" {
  859. // yucestarttime、yuceendtime
  860. yucestarttime, yuceendtime := int64(0), int64(0)
  861. // 项目周期中
  862. if util.ObjToString(tmp["projectperiod"]) != "" {
  863. dateStr := date1.FindAllString(util.ObjToString(tmp["projectperiod"]), -1)
  864. if len(dateStr) == 2 {
  865. sdate := FormatDateStr(dateStr[0])
  866. edate := FormatDateStr(dateStr[1])
  867. if sdate < edate && sdate != 0 && edate != 0 {
  868. yucestarttime = sdate
  869. yuceendtime = edate
  870. }
  871. }
  872. }
  873. if yucestarttime > 0 && yuceendtime > yucestarttime {
  874. tmp["yuceendtime"] = yuceendtime
  875. return
  876. }
  877. // 预测开始时间 合同签订日期
  878. if yucestarttime == 0 {
  879. if util.IntAll(tmp["signaturedate"]) <= 0 {
  880. if util.IntAll(tmp["publishtime"]) <= 0 {
  881. return
  882. } else {
  883. yucestarttime = util.Int64All(tmp["publishtime"])
  884. }
  885. } else {
  886. yucestarttime = util.Int64All(tmp["signaturedate"])
  887. }
  888. }
  889. // 预测结束时间
  890. if yucestarttime > 0 && yuceendtime == 0 {
  891. if util.IntAll(tmp["project_duration"]) > 0 && util.ObjToString(tmp["project_timeunit"]) != "" {
  892. yuceendtime = YcEndTime(yucestarttime, util.IntAll(tmp["project_duration"]), util.ObjToString(tmp["project_timeunit"]))
  893. tmp["yuceendtime"] = yuceendtime
  894. }
  895. }
  896. }
  897. }
  898. func FormatDateStr(ds string) int64 {
  899. ds = strings.Replace(ds, "年", "-", -1)
  900. ds = strings.Replace(ds, "月", "-", -1)
  901. ds = strings.Replace(ds, "日", "", -1)
  902. ds = strings.Replace(ds, "/", "-", -1)
  903. ds = strings.Replace(ds, ".", "-", -1)
  904. layout1 := "2006-1-2"
  905. location, err := time.ParseInLocation(util.Date_Short_Layout, ds, time.Local)
  906. if err != nil {
  907. location, err := time.ParseInLocation(layout1, ds, time.Local)
  908. if err != nil {
  909. log.Error("FormatDateStr", zap.Error(err))
  910. return 0
  911. } else {
  912. return location.Unix()
  913. }
  914. } else {
  915. return location.Unix()
  916. }
  917. }
  918. func YcEndTime(starttime int64, num int, unit string) int64 {
  919. yuceendtime := int64(0)
  920. if unit == "日历天" || unit == "天" || unit == "日" {
  921. yuceendtime = starttime + int64(num*86400)
  922. } else if unit == "周" {
  923. yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num*7).Unix()
  924. } else if unit == "月" {
  925. yuceendtime = time.Unix(starttime, 0).AddDate(0, num, 0).Unix()
  926. } else if unit == "年" {
  927. yuceendtime = time.Unix(starttime, 0).AddDate(num, 0, 0).Unix()
  928. } else if unit == "工作日" {
  929. n := num / 7 * 2
  930. yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num+n).Unix()
  931. }
  932. return yuceendtime
  933. }
  934. // UdpMethod @Description rpc调用信息发布程序接口
  935. // @Author J 2022/4/13 9:13 AM
  936. func UdpMethod(id string) {
  937. mapinfo := map[string]interface{}{
  938. "infoid": id,
  939. "stype": "jyfb_data_over",
  940. }
  941. datas, _ := json.Marshal(mapinfo)
  942. log.Info("UdpMethod", zap.Any("JyUdpAddr", JyUdpAddr), zap.String("mapinfo", string(datas)))
  943. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, JyUdpAddr)
  944. }
  945. // MatchService 针对中国招标网,匹配关键词打标签,object_type,货物、服务、工程,jsondata.item
  946. func MatchService(tmp map[string]interface{}) (res string) {
  947. if jsondata, ok := tmp["jsondata"]; ok {
  948. if da, ok := jsondata.(map[string]interface{}); ok {
  949. if item, ok := da["item"]; ok {
  950. services := []string{"货物", "服务", "工程"}
  951. for _, v := range services {
  952. if strings.Contains(util.ObjToString(item), v) {
  953. return v
  954. }
  955. }
  956. }
  957. }
  958. }
  959. return
  960. }
  961. // dealPackage 处理package 字段
  962. func dealPackage(tmp map[string]interface{}) (newpackages []map[string]interface{}) {
  963. package1, ok1 := tmp["package"]
  964. s_winner, ok2 := tmp["s_winner"]
  965. bidamount, ok3 := tmp["bidamount"]
  966. var innerWinners = make([]string, 0)
  967. var biaoAmounts = make([]float64, 0)
  968. // 三个字段都存在
  969. if ok3 && ok2 && ok1 {
  970. packageMap, ok := package1.(map[string]interface{})
  971. if ok {
  972. if len(packageMap) >= 2 {
  973. var packages = make([]map[string]interface{}, 0)
  974. //var newTmp = make(map[string]interface{})
  975. winner_amount_count := 0
  976. for _, pack := range packageMap {
  977. var newPackage = make(map[string]interface{})
  978. pac, okk := pack.(map[string]interface{})
  979. if okk {
  980. _, okk1 := pac["winner"]
  981. _, okk2 := pac["bidamount"]
  982. _, okk3 := pac["name"]
  983. if okk1 {
  984. innerWinners = append(innerWinners, util.ObjToString(pac["winner"]))
  985. }
  986. if okk2 {
  987. biaoAmounts = append(biaoAmounts, util.Float64All(pac["bidamount"]))
  988. }
  989. //winner bidamount 二个字段都存在
  990. if okk1 && okk2 {
  991. winner_amount_count++
  992. newPackage["winner"] = pac["winner"]
  993. newPackage["bidamount"] = pac["bidamount"]
  994. if okk3 {
  995. newPackage["name"] = pac["name"]
  996. }
  997. packages = append(packages, newPackage)
  998. }
  999. }
  1000. }
  1001. //出现次数大于1
  1002. if winner_amount_count > 1 {
  1003. swinner := util.ObjToString(s_winner)
  1004. swinners := strings.Split(swinner, ",")
  1005. //判断里外 winner 是否相等
  1006. eq := StringSliceValuesEqual(swinners, innerWinners)
  1007. if eq {
  1008. //判断金额相等
  1009. if Float64Equal1Precision(Float64SliceSum(biaoAmounts), util.Float64All(bidamount)) {
  1010. newpackages = packages
  1011. }
  1012. }
  1013. }
  1014. }
  1015. }
  1016. }
  1017. return
  1018. }