bidding_es.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. package main
  2. import (
  3. "encoding/json"
  4. "esindex/config"
  5. "esindex/oss"
  6. "fmt"
  7. "github.com/spf13/viper"
  8. "go.mongodb.org/mongo-driver/bson"
  9. "go.uber.org/zap"
  10. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  11. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  12. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  13. "jygit.jydev.jianyu360.cn/data_processing/common_utils/udp"
  14. "reflect"
  15. "regexp"
  16. "strconv"
  17. "strings"
  18. "sync"
  19. "time"
  20. "unicode/utf8"
  21. )
  22. var (
  23. TimeV1 = regexp.MustCompile("(\\d{4})[年.]?$")
  24. TimeV2 = regexp.MustCompile("(\\d{4}[年.\\-/]?)(\\d{1,2}[月.\\-/]?$)")
  25. TimeV3 = regexp.MustCompile("(\\d{4}[年.\\-/]?)(\\d{1,2}[月.\\-/]?)(\\d{1,2}日?$)")
  26. TimeClear = regexp.MustCompile("[年|月|日|/|.|-]")
  27. filterSpace = regexp.MustCompile("<[^>]*?>|[\\s\u3000\u2003\u00a0]")
  28. date1 = regexp.MustCompile("20[0-2][0-9][年|\\-/.][0-9]{1,2}[月|\\-/.][0-9]{1,2}[日]?")
  29. HtmlReg = regexp.MustCompile("<[^>]+>")
  30. )
  31. func biddingTask(mapInfo map[string]interface{}) {
  32. defer util.Catch()
  33. stype := util.ObjToString(mapInfo["stype"])
  34. q, _ := mapInfo["query"].(map[string]interface{})
  35. if q == nil {
  36. q = map[string]interface{}{
  37. "_id": map[string]interface{}{
  38. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  39. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  40. },
  41. }
  42. } else {
  43. //针对gte/lte,单独转换
  44. q = convertToMongoID(q)
  45. }
  46. ch := make(chan bool, 10)
  47. wg := &sync.WaitGroup{}
  48. //bidding库
  49. biddingConn := MgoB.GetMgoConn()
  50. count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
  51. log.Info("bidding表", zap.Int64("同步总数:", count))
  52. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(config.Conf.DB.MongoB.Coll).Find(&q).Select(map[string]interface{}{
  53. "contenthtml": 0,
  54. }).Iter()
  55. c1, index := 0, 0
  56. var indexLock sync.Mutex
  57. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  58. if c1%1000 == 0 {
  59. log.Info("biddingTask", zap.Int("current:", c1))
  60. log.Info("biddingTask", zap.Any("current:_id =>", tmp["_id"]))
  61. }
  62. ch <- true
  63. wg.Add(1)
  64. go func(tmp map[string]interface{}) {
  65. defer func() {
  66. <-ch
  67. wg.Done()
  68. }()
  69. //判断是否是预处理数据;pre_id 是标识
  70. //if config.Conf.Env.OpenPre {
  71. // if pre_id, ok := tmp["pre_id"]; ok {
  72. // preID := util.ObjToString(pre_id)
  73. // if preID != "" {
  74. // deletePreEsData(preID)
  75. // }
  76. // }
  77. //}
  78. if sensitive := util.ObjToString(tmp["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  79. tmp = make(map[string]interface{})
  80. return
  81. }
  82. //只针对增量数据处理;全量数据 需要用extracttype字段判断
  83. //7: 重复数据
  84. //8: 不重复
  85. if util.IntAll(tmp["dataprocess"]) != 8 {
  86. return
  87. }
  88. //// 增量数据使用上面判断;全量数据使用下面配置
  89. //-1:重复 ,1:不重复 ,0:入库 9:分类
  90. //if util.IntAll(tmp["extracttype"]) != 1 {
  91. // return
  92. //}
  93. //针对产权数据,暂时不入es 索引库
  94. if util.IntAll(tmp["infoformat"]) == 3 {
  95. return
  96. }
  97. /**
  98. 数据抽取时,有的数据的发布时间是之前的,属于增量历史数据,在判重和同步到bidding表是,会添加history_updatetime
  99. 字段,所以下面判断才会处理
  100. */
  101. if stype == "bidding_history" && tmp["history_updatetime"] == nil {
  102. return
  103. }
  104. indexLock.Lock()
  105. index++
  106. indexLock.Unlock()
  107. newTmp, update := GetEsField(tmp, stype)
  108. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  109. //针对中国政府采购网,单独处理
  110. if util.ObjToString(tmp["site"]) == "中国政府采购网" {
  111. objectType := MatchService(tmp)
  112. if objectType != "" {
  113. newTmp["object_type"] = objectType
  114. }
  115. }
  116. if len(update) > 0 {
  117. updateBiddingPool <- []map[string]interface{}{{
  118. "_id": tmp["_id"],
  119. },
  120. {"$set": update},
  121. }
  122. }
  123. if util.ObjToString(newTmp["spidercode"]) == "a_jyxxfbpt_gg" {
  124. // 剑鱼信息发布数据 通过udp通知信息发布程序
  125. go UdpMethod(mongodb.BsonIdToSId(newTmp["_id"]))
  126. }
  127. saveEsPool <- newTmp
  128. }(tmp)
  129. tmp = map[string]interface{}{}
  130. }
  131. wg.Wait()
  132. log.Info("biddingTask over", zap.Int("count", c1), zap.Int("index", index))
  133. //更新状态
  134. if stype == "bidding" {
  135. uq := bson.M{"gtid": bson.M{"$gte": util.ObjToString(mapInfo["gtid"])},
  136. "lteid": bson.M{"$lte": util.ObjToString(mapInfo["lteid"])}}
  137. MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 8, "updatetime": time.Now().Unix(), "index_num": index}}, false, true)
  138. }
  139. ////发送udp,附件补采 才需要
  140. //data := map[string]interface{}{
  141. // "stype": "update",
  142. // "gtid": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  143. // "lteid": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  144. //}
  145. //target := &net.UDPAddr{
  146. // Port: 1782,
  147. // IP: net.ParseIP("127.0.0.1"),
  148. //}
  149. //bytes, _ := json.Marshal(data)
  150. //err := UdpClient.WriteUdp(bytes, udp.OP_TYPE_DATA, target)
  151. //if err != nil {
  152. // log.Info("biddingTask ", zap.Any("WriteUdp err", err), zap.Any("target", target))
  153. //}
  154. //
  155. //log.Info("biddingTask ", zap.Any("target", target), zap.Any("data", data))
  156. //
  157. //
  158. //重采平台需要
  159. //mapInfo["stype"] = ""
  160. //datas, _ := json.Marshal(mapInfo)
  161. //var next = &net.UDPAddr{
  162. // IP: net.ParseIP("127.0.0.1"),
  163. // Port: 1910,
  164. //}
  165. //log.Info("bidding index es over", zap.Any("es", next), zap.String("mapinfo", string(datas)))
  166. }
  167. // biddingAllTask 补充存量数据
  168. func biddingAllTask(mapInfo map[string]interface{}) {
  169. defer util.Catch()
  170. stype := util.ObjToString(mapInfo["stype"])
  171. q, _ := mapInfo["query"].(map[string]interface{})
  172. if q == nil {
  173. q = map[string]interface{}{
  174. "_id": map[string]interface{}{
  175. "$gt": mongodb.StringTOBsonId(mapInfo["gtid"].(string)),
  176. "$lte": mongodb.StringTOBsonId(mapInfo["lteid"].(string)),
  177. },
  178. }
  179. } else {
  180. //针对gte/lte,单独转换
  181. q = convertToMongoID(q)
  182. }
  183. ch := make(chan bool, 50)
  184. wg := &sync.WaitGroup{}
  185. //bidding库
  186. biddingConn := MgoB.GetMgoConn()
  187. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(config.Conf.DB.MongoB.Coll).Find(&q).Select(map[string]interface{}{
  188. "contenthtml": 0,
  189. }).Iter()
  190. c1, index := 0, 0
  191. var indexLock sync.Mutex
  192. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  193. if c1%20000 == 0 {
  194. log.Info("biddingAllTask", zap.Int("current:", c1))
  195. log.Info("biddingAllTask", zap.Any("current:_id =>", tmp["_id"]))
  196. }
  197. ch <- true
  198. wg.Add(1)
  199. go func(tmp map[string]interface{}) {
  200. defer func() {
  201. <-ch
  202. wg.Done()
  203. }()
  204. ////判断是否是预处理数据;pre_id 是标识
  205. //if config.Conf.Env.OpenPre {
  206. // if pre_id, ok := tmp["pre_id"]; ok {
  207. // preID := util.ObjToString(pre_id)
  208. // if preID != "" {
  209. // deletePreEsData(preID)
  210. // }
  211. // }
  212. //}
  213. if sensitive := util.ObjToString(tmp["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  214. tmp = make(map[string]interface{})
  215. return
  216. }
  217. // 针对存量数据,重复数据不进索引
  218. if util.IntAll(tmp["extracttype"]) == -1 {
  219. return
  220. }
  221. //针对产权数据,暂时不入es 索引库
  222. if util.IntAll(tmp["infoformat"]) == 3 {
  223. return
  224. }
  225. indexLock.Lock()
  226. index++
  227. indexLock.Unlock()
  228. newTmp, update := GetEsField(tmp, stype)
  229. //针对中国政府采购网,单独处理
  230. if util.ObjToString(tmp["site"]) == "中国政府采购网" {
  231. objectType := MatchService(tmp)
  232. if objectType != "" {
  233. newTmp["object_type"] = objectType
  234. }
  235. }
  236. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  237. if len(update) > 0 {
  238. updateBiddingPool <- []map[string]interface{}{{
  239. "_id": tmp["_id"],
  240. },
  241. {"$set": update},
  242. }
  243. }
  244. saveEsPool <- newTmp
  245. }(tmp)
  246. tmp = map[string]interface{}{}
  247. }
  248. wg.Wait()
  249. log.Info("biddingAllTask over", zap.Int("count", c1), zap.Int("index", index))
  250. }
  251. // biddingAllDataTask 处理配置文件的存量数据
  252. func biddingAllDataTask() {
  253. type Biddingall struct {
  254. Coll string
  255. Gtid string
  256. Lteid string
  257. }
  258. type RoutinesConf struct {
  259. Num int
  260. }
  261. type AllConf struct {
  262. All map[string]Biddingall
  263. Routines RoutinesConf
  264. }
  265. var all AllConf
  266. viper.SetConfigFile("biddingall.toml")
  267. viper.SetConfigName("biddingall") // 配置文件名称(无扩展名)
  268. viper.SetConfigType("toml") // 如果配置文件的名称中没有扩展名,则需要配置此项
  269. viper.AddConfigPath("./")
  270. err := viper.ReadInConfig() // 查找并读取配置文件
  271. if err != nil { // 处理读取配置文件的错误
  272. fmt.Println("ReadInConfig err =>", err)
  273. return
  274. }
  275. err = viper.Unmarshal(&all)
  276. if err != nil {
  277. fmt.Println("biddingAllDataTask Unmarshal err =>", err)
  278. return
  279. }
  280. for k, conf := range all.All {
  281. go dealData(conf.Coll, conf.Gtid, conf.Lteid, k, all.Routines.Num)
  282. }
  283. }
  284. func dealData(coll, gtid, lteid, kword string, routines int) {
  285. ch := make(chan bool, routines)
  286. wg := &sync.WaitGroup{}
  287. q := map[string]interface{}{
  288. "_id": map[string]interface{}{
  289. "$gt": mongodb.StringTOBsonId(gtid),
  290. "$lte": mongodb.StringTOBsonId(lteid),
  291. },
  292. }
  293. biddingConn := MgoB.GetMgoConn()
  294. it := biddingConn.DB(config.Conf.DB.MongoB.Dbname).C(coll).Find(&q).Select(map[string]interface{}{
  295. "contenthtml": 0,
  296. }).Iter()
  297. c1, index := 0, 0
  298. var indexLock sync.Mutex
  299. for tmp := make(map[string]interface{}); it.Next(tmp); c1++ {
  300. if c1%20000 == 0 {
  301. log.Info(kword, zap.Int("current:", c1))
  302. log.Info(kword, zap.Any("current:_id =>", tmp["_id"]))
  303. }
  304. ch <- true
  305. wg.Add(1)
  306. go func(tmp map[string]interface{}) {
  307. defer func() {
  308. <-ch
  309. wg.Done()
  310. }()
  311. if sensitive := util.ObjToString(tmp["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  312. tmp = make(map[string]interface{})
  313. return
  314. }
  315. // 针对存量数据,重复数据不进索引
  316. if util.IntAll(tmp["extracttype"]) == -1 {
  317. return
  318. }
  319. //针对产权数据,暂时不入es 索引库
  320. if util.IntAll(tmp["infoformat"]) == 3 {
  321. return
  322. }
  323. indexLock.Lock()
  324. index++
  325. indexLock.Unlock()
  326. newTmp, update := GetEsField(tmp, "biddingall")
  327. //针对中国政府采购网,单独处理
  328. if util.ObjToString(tmp["site"]) == "中国政府采购网" {
  329. objectType := MatchService(tmp)
  330. if objectType != "" {
  331. newTmp["object_type"] = objectType
  332. }
  333. }
  334. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  335. if len(update) > 0 {
  336. updateBiddingPool <- []map[string]interface{}{{
  337. "_id": tmp["_id"],
  338. },
  339. {"$set": update},
  340. }
  341. }
  342. saveEsPool <- newTmp
  343. }(tmp)
  344. tmp = map[string]interface{}{}
  345. }
  346. wg.Wait()
  347. log.Info(fmt.Sprintf("%s over", kword), zap.Int("count", c1), zap.Int("index", index))
  348. }
  349. func biddingTaskById(mapInfo map[string]interface{}) {
  350. defer util.Catch()
  351. stype := util.ObjToString(mapInfo["stype"])
  352. infoid := util.ObjToString(mapInfo["infoid"])
  353. tmp, _ := MgoB.FindById(config.Conf.DB.MongoB.Coll, infoid, map[string]interface{}{"contenthtml": 0})
  354. ////判断是否是预处理数据;pre_id 是标识
  355. //if config.Conf.Env.OpenPre {
  356. // if pre_id, ok := (*tmp)["pre_id"]; ok {
  357. // preID := util.ObjToString(pre_id)
  358. // if preID != "" {
  359. // deletePreEsData(preID)
  360. // }
  361. // }
  362. //}
  363. if sensitive := util.ObjToString((*tmp)["sensitive"]); sensitive == "测试" { //bidding中有敏感词,不生索引
  364. return
  365. }
  366. if util.IntAll((*tmp)["extracttype"]) == 1 {
  367. newTmp, update := GetEsField(*tmp, stype)
  368. newTmp["dataweight"] = 0 //索引数据新增 jy置顶字段
  369. if len(update) > 0 {
  370. //updateBiddingPool <- []map[string]interface{}{{
  371. // "_id": mongodb.StringTOBsonId(infoid),
  372. //},
  373. // {"$set": update},
  374. //}
  375. }
  376. saveEsPool <- newTmp
  377. }
  378. log.Info("biddingTaskById over", zap.Any("mapInfo", mapInfo))
  379. }
  380. // GetEsField @Description ES字段
  381. // @Author J 2022/6/7 11:34 AM
  382. func GetEsField(tmp map[string]interface{}, stype string) (map[string]interface{}, map[string]interface{}) {
  383. newTmp := make(map[string]interface{})
  384. update := make(map[string]interface{}) // bidding 修改字段
  385. saveErr := make(map[string]interface{})
  386. //for field, ftype := range config.Conf.DB.Es.FieldEs {
  387. for field, ftype := range BiddingField {
  388. if tmp[field] != nil { //
  389. if field == "purchasinglist" { //标的物处理
  390. purchasinglist_new := []map[string]interface{}{}
  391. if pcl, _ := tmp[field].([]interface{}); len(pcl) > 0 {
  392. for _, ls := range pcl {
  393. lsm_new := make(map[string]interface{})
  394. lsm := ls.(map[string]interface{})
  395. for pf, pftype := range BiddingLevelField[field] {
  396. lsmv := lsm[pf]
  397. if lsmv != nil && reflect.TypeOf(lsmv).String() == pftype {
  398. lsm_new[pf] = lsm[pf]
  399. }
  400. }
  401. if lsm_new != nil && len(lsm_new) > 0 {
  402. purchasinglist_new = append(purchasinglist_new, lsm_new)
  403. }
  404. }
  405. }
  406. if len(purchasinglist_new) > 0 {
  407. newTmp[field] = purchasinglist_new
  408. }
  409. } else if field == "procurementlist" {
  410. if tmp["procurementlist"] != nil {
  411. var arr []interface{}
  412. plist := tmp["procurementlist"].([]interface{})
  413. for _, p := range plist {
  414. p1 := p.(map[string]interface{})
  415. p2 := make(map[string]interface{})
  416. for k, v := range BiddingLevelField[field] {
  417. if k == "projectname" && util.ObjToString(p1[k]) == "" {
  418. p2[k] = util.ObjToString(tmp["projectname"])
  419. } else if k == "buyer" && util.ObjToString(p1[k]) == "" && util.ObjToString(tmp["buyer"]) != "" {
  420. p2[k] = util.ObjToString(tmp["buyer"])
  421. } else if k == "expurasingtime" && util.ObjToString(p1[k]) != "" {
  422. res := getMethod(util.ObjToString(p1[k]))
  423. if res != 0 {
  424. p2[k] = res
  425. }
  426. } else if p1[k] != nil && reflect.TypeOf(p1[k]).String() == v {
  427. p2[k] = p1[k]
  428. }
  429. }
  430. arr = append(arr, p2)
  431. }
  432. if len(arr) > 0 {
  433. newTmp[field] = arr
  434. }
  435. }
  436. } else if field == "projectscope" {
  437. ps, _ := tmp["projectscope"].(string)
  438. newTmp["projectscope"] = ps
  439. //新版本已无需记录长度
  440. //if len(ps) > pscopeLength {
  441. // saveErr["projectscope"] = ps
  442. // saveErr["projectscope_length"] = len(ps)
  443. //}
  444. } else if field == "winnerorder" { //中标候选
  445. winnerorder_new := []map[string]interface{}{}
  446. if winnerorder, _ := tmp[field].([]interface{}); len(winnerorder) > 0 {
  447. for _, win := range winnerorder {
  448. winMap_new := make(map[string]interface{})
  449. winMap := win.(map[string]interface{})
  450. for wf, wftype := range BiddingLevelField[field] {
  451. wfv := winMap[wf]
  452. if wfv != nil && reflect.TypeOf(wfv).String() == wftype {
  453. if wf == "sort" && util.Int64All(wfv) > 100 {
  454. continue
  455. }
  456. winMap_new[wf] = winMap[wf]
  457. }
  458. }
  459. if winMap_new != nil && len(winMap_new) > 0 {
  460. winnerorder_new = append(winnerorder_new, winMap_new)
  461. }
  462. }
  463. }
  464. if len(winnerorder_new) > 0 {
  465. newTmp[field] = winnerorder_new
  466. }
  467. } else if field == "qualifies" {
  468. //项目资质
  469. qs := []string{}
  470. if q, _ := tmp[field].([]interface{}); len(q) > 0 {
  471. for _, v := range q {
  472. v1 := v.(map[string]interface{})
  473. qs = append(qs, util.ObjToString(v1["key"]))
  474. }
  475. }
  476. if len(qs) > 0 {
  477. newTmp[field] = strings.Join(qs, ",")
  478. }
  479. } else if field == "bidopentime" {
  480. if tmp[field] != nil && tmp["bidendtime"] == nil {
  481. newTmp["bidendtime"] = tmp[field]
  482. newTmp[field] = tmp[field]
  483. } else if tmp[field] == nil && tmp["bidendtime"] != nil {
  484. newTmp["bidendtime"] = tmp[field]
  485. newTmp[field] = tmp["bidendtime"]
  486. } else {
  487. if tmp["bidopentime"] != nil {
  488. newTmp[field] = tmp["bidopentime"]
  489. }
  490. }
  491. } else if field == "detail" { //过滤
  492. detail, _ := tmp[field].(string)
  493. detail = filterSpace.ReplaceAllString(detail, "")
  494. // 不需要再保存记录长度
  495. //if len(detail) > pscopeLength {
  496. // saveErr["detail"] = detail
  497. // saveErr["detail_length"] = len(detail)
  498. //}
  499. if tmp["cleartag"] != nil {
  500. if tmp["cleartag"].(bool) {
  501. text, _ := FilterDetail(detail)
  502. newTmp[field] = util.ObjToString(tmp["title"]) + " " + text
  503. } else {
  504. newTmp[field] = util.ObjToString(tmp["title"]) + " " + detail
  505. }
  506. } else {
  507. text, b := FilterDetail(detail)
  508. newTmp[field] = util.ObjToString(tmp["title"]) + " " + text
  509. update["cleartag"] = b
  510. }
  511. } else if field == "topscopeclass" || field == "entidlist" {
  512. newTmp[field] = tmp[field]
  513. } else if field == "_id" {
  514. newTmp["_id"] = mongodb.BsonIdToSId(tmp["_id"])
  515. newTmp["id"] = mongodb.BsonIdToSId(tmp["_id"])
  516. } else if field == "publishtime" || field == "comeintime" {
  517. //字段类型不正确,特别处理
  518. if tmp[field] != nil && util.Int64All(tmp[field]) > 0 {
  519. newTmp[field] = util.Int64All(tmp[field])
  520. }
  521. } else if field == "package" {
  522. //分包信息处理
  523. packages := dealPackage(tmp)
  524. if len(packages) > 0 {
  525. newTmp["package"] = packages
  526. newTmp["subpackage"] = 1
  527. }
  528. } else if field == "infoformat" {
  529. newTmp[field] = tmp[field]
  530. } else { //其它字段判断数据类型,不正确舍弃
  531. if fieldval := tmp[field]; reflect.TypeOf(fieldval).String() != ftype {
  532. continue
  533. } else {
  534. if fieldval != "" {
  535. newTmp[field] = fieldval
  536. }
  537. }
  538. }
  539. }
  540. }
  541. // 附件内容长度不做限制,大于长度限制做记录
  542. filetext := getFileText(tmp)
  543. if len([]rune(filetext)) > 10 {
  544. //去除 空格
  545. newTmp["filetext"] = strings.Replace(filetext, " ", "", -1)
  546. if len([]rune(filetext)) > fileLength {
  547. //saveErr["filetext"] = filetext
  548. saveErr["filetext_length"] = len([]rune(filetext))
  549. }
  550. }
  551. YuceEndtime(newTmp) // 预测结果时间
  552. if stype == "bidding" || stype == "bidding_history" || stype == "index-by-id" {
  553. newTmp["createtime"] = time.Now().Unix() // es库数据创建时间,只有增量数据有
  554. newTmp["pici"] = time.Now().Unix() //createtime跟pici一样,为了剑鱼功能需要,并行存在一段时间,之后可以删掉createtime
  555. update["pici"] = time.Now().Unix()
  556. }
  557. if len(saveErr) > 0 {
  558. saveErr["infoid"] = mongodb.BsonIdToSId(tmp["_id"])
  559. saveErr["time"] = time.Now().Unix()
  560. saveErrBidPool <- saveErr
  561. }
  562. return newTmp, update
  563. }
  564. // @Description 采购意向 预计采购时间处理
  565. // @Author J 2022/6/7 8:04 PM
  566. func getMethod(str string) int64 {
  567. if TimeV1.MatchString(str) {
  568. arr := TimeV1.FindStringSubmatch(str)
  569. st := arr[1] + "0000"
  570. parseInt, err := strconv.ParseInt(st, 10, 64)
  571. if err == nil {
  572. return parseInt
  573. }
  574. } else if TimeV2.MatchString(str) {
  575. arr := TimeV2.FindStringSubmatch(str)
  576. str1 := arr[2]
  577. if len(str1) == 1 {
  578. str1 = "0" + str1
  579. }
  580. str2 := TimeClear.ReplaceAllString(arr[1], "") + TimeClear.ReplaceAllString(str1, "") + "00"
  581. parseInt, err := strconv.ParseInt(str2, 10, 64)
  582. if err == nil {
  583. return parseInt
  584. }
  585. } else if TimeV3.MatchString(str) {
  586. match := TimeV3.FindStringSubmatch(str)
  587. if len(match) >= 4 {
  588. year, _ := strconv.Atoi(match[1])
  589. month, _ := strconv.Atoi(match[2])
  590. day, _ := strconv.Atoi(match[3])
  591. dateInt64 := int64(year*10000 + month*100 + day)
  592. return dateInt64
  593. }
  594. }
  595. return 0
  596. }
  597. func FilterDetail(text string) (string, bool) {
  598. b := false // 清理标记
  599. for _, s := range config.Conf.DB.Es.DetailFilter {
  600. reg := regexp.MustCompile(s)
  601. if reg.MatchString(text) {
  602. text = reg.ReplaceAllString(text, "")
  603. if !b {
  604. b = true
  605. }
  606. }
  607. }
  608. return text, b
  609. }
  610. // @Description 附件内容
  611. // @Author J 2022/6/7 1:54 PM
  612. func getFileText(tmp map[string]interface{}) (filetext string) {
  613. if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok {
  614. for _, tmpData1 := range attchMap {
  615. if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok {
  616. for _, result := range tmpData2 {
  617. if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
  618. if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
  619. bs := oss.OssGetObject(attach_url, mongodb.BsonIdToSId(tmp["_id"])) //oss读数据
  620. //附件总长度限制550000,其中最后一个文件长度限制50000
  621. size := config.Conf.DB.Oss.Filesize
  622. if size <= 0 {
  623. size = 500000
  624. }
  625. if utf8.RuneCountInString(filetext+bs) < 50000+size {
  626. filetext += bs + "\n"
  627. } else {
  628. if len(bs) > 50000 {
  629. filetext += bs[0:50000]
  630. } else {
  631. filetext += bs
  632. }
  633. }
  634. //附件总长度限制550000
  635. if utf8.RuneCountInString(filetext) >= 50000+size {
  636. return
  637. }
  638. //正式环境
  639. //if utf8.RuneCountInString(filetext+bs) < fileLength {
  640. // filetext += bs + "\n"
  641. //} else {
  642. // if utf8.RuneCountInString(bs) > fileLength {
  643. // filetext = bs[0:fileLength]
  644. // } else {
  645. // filetext = bs
  646. // }
  647. // break
  648. //}
  649. }
  650. }
  651. }
  652. }
  653. }
  654. }
  655. return
  656. }
  657. // 预测结果时间
  658. func YuceEndtime(tmp map[string]interface{}) {
  659. flag := false
  660. flag2 := false
  661. falseOld := false
  662. scopeOld := []string{"服务采购_法律咨询", "服务采购_会计", "服务采购_物业", "服务采购_审计", "服务采购_安保", "服务采购_仓储物流",
  663. "服务采购_广告宣传印刷"}
  664. scope := []string{"信息技术_运维服务", "信息技术_软件开发", "信息技术_系统集成及安全", "信息技术_其他"}
  665. titles := []string{"短信服务", "短信发送服务"}
  666. details := []string{"短信发送服务", "短信服务平台", "短信服务项目"}
  667. subscopeclass := util.ObjToString(tmp["s_subscopeclass"])
  668. //1.先判断老的
  669. for _, v := range scopeOld {
  670. if strings.Contains(subscopeclass, v) {
  671. falseOld = true
  672. break
  673. }
  674. }
  675. //2.判断满足 s_subscopeclass 条件
  676. for _, v := range scope {
  677. if strings.Contains(subscopeclass, v) {
  678. flag = true
  679. break
  680. }
  681. }
  682. //不满足旧的,判断新的规则
  683. if !falseOld {
  684. //满足 s_subscopeclass ,再去判断title detail
  685. if flag {
  686. title := util.ObjToString(tmp["title"])
  687. for _, v := range titles {
  688. if strings.Contains(title, v) {
  689. flag2 = true
  690. }
  691. }
  692. if !flag2 {
  693. detail := util.ObjToString(tmp["detail"])
  694. for _, v := range details {
  695. if strings.Contains(detail, v) {
  696. flag2 = true
  697. }
  698. }
  699. }
  700. }
  701. if !flag2 {
  702. return
  703. }
  704. }
  705. subtype := util.ObjToString(tmp["subtype"])
  706. if subtype == "成交" || subtype == "合同" {
  707. // yucestarttime、yuceendtime
  708. yucestarttime, yuceendtime := int64(0), int64(0)
  709. // 项目周期中
  710. if util.ObjToString(tmp["projectperiod"]) != "" {
  711. dateStr := date1.FindAllString(util.ObjToString(tmp["projectperiod"]), -1)
  712. if len(dateStr) == 2 {
  713. sdate := FormatDateStr(dateStr[0])
  714. edate := FormatDateStr(dateStr[1])
  715. if sdate < edate && sdate != 0 && edate != 0 {
  716. yucestarttime = sdate
  717. yuceendtime = edate
  718. }
  719. }
  720. }
  721. if yucestarttime > 0 && yuceendtime > yucestarttime {
  722. tmp["yuceendtime"] = yuceendtime
  723. return
  724. }
  725. // 预测开始时间 合同签订日期
  726. if yucestarttime == 0 {
  727. if util.IntAll(tmp["signaturedate"]) <= 0 {
  728. if util.IntAll(tmp["publishtime"]) <= 0 {
  729. return
  730. } else {
  731. yucestarttime = util.Int64All(tmp["publishtime"])
  732. }
  733. } else {
  734. yucestarttime = util.Int64All(tmp["signaturedate"])
  735. }
  736. }
  737. // 预测结束时间
  738. if yucestarttime > 0 && yuceendtime == 0 {
  739. if util.IntAll(tmp["project_duration"]) > 0 && util.ObjToString(tmp["project_timeunit"]) != "" {
  740. yuceendtime = YcEndTime(yucestarttime, util.IntAll(tmp["project_duration"]), util.ObjToString(tmp["project_timeunit"]))
  741. tmp["yuceendtime"] = yuceendtime
  742. }
  743. }
  744. }
  745. }
  746. func FormatDateStr(ds string) int64 {
  747. ds = strings.Replace(ds, "年", "-", -1)
  748. ds = strings.Replace(ds, "月", "-", -1)
  749. ds = strings.Replace(ds, "日", "", -1)
  750. ds = strings.Replace(ds, "/", "-", -1)
  751. ds = strings.Replace(ds, ".", "-", -1)
  752. layout1 := "2006-1-2"
  753. location, err := time.ParseInLocation(util.Date_Short_Layout, ds, time.Local)
  754. if err != nil {
  755. location, err := time.ParseInLocation(layout1, ds, time.Local)
  756. if err != nil {
  757. log.Error("FormatDateStr", zap.Error(err))
  758. return 0
  759. } else {
  760. return location.Unix()
  761. }
  762. } else {
  763. return location.Unix()
  764. }
  765. }
  766. func YcEndTime(starttime int64, num int, unit string) int64 {
  767. yuceendtime := int64(0)
  768. if unit == "日历天" || unit == "天" || unit == "日" {
  769. yuceendtime = starttime + int64(num*86400)
  770. } else if unit == "周" {
  771. yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num*7).Unix()
  772. } else if unit == "月" {
  773. yuceendtime = time.Unix(starttime, 0).AddDate(0, num, 0).Unix()
  774. } else if unit == "年" {
  775. yuceendtime = time.Unix(starttime, 0).AddDate(num, 0, 0).Unix()
  776. } else if unit == "工作日" {
  777. n := num / 7 * 2
  778. yuceendtime = time.Unix(starttime, 0).AddDate(0, 0, num+n).Unix()
  779. }
  780. return yuceendtime
  781. }
  782. // UdpMethod @Description rpc调用信息发布程序接口
  783. // @Author J 2022/4/13 9:13 AM
  784. func UdpMethod(id string) {
  785. mapinfo := map[string]interface{}{
  786. "infoid": id,
  787. "stype": "jyfb_data_over",
  788. }
  789. datas, _ := json.Marshal(mapinfo)
  790. log.Info("UdpMethod", zap.Any("JyUdpAddr", JyUdpAddr), zap.String("mapinfo", string(datas)))
  791. _ = UdpClient.WriteUdp(datas, udp.OP_TYPE_DATA, JyUdpAddr)
  792. }
  793. // MatchService 针对中国招标网,匹配关键词打标签,object_type,货物、服务、工程,jsondata.item
  794. func MatchService(tmp map[string]interface{}) (res string) {
  795. if jsondata, ok := tmp["jsondata"]; ok {
  796. if da, ok := jsondata.(map[string]interface{}); ok {
  797. if item, ok := da["item"]; ok {
  798. services := []string{"货物", "服务", "工程"}
  799. for _, v := range services {
  800. if strings.Contains(util.ObjToString(item), v) {
  801. return v
  802. }
  803. }
  804. }
  805. }
  806. }
  807. return
  808. }
  809. // dealPackage 处理package 字段
  810. func dealPackage(tmp map[string]interface{}) (newpackages []map[string]interface{}) {
  811. package1, ok1 := tmp["package"]
  812. s_winner, ok2 := tmp["s_winner"]
  813. bidamount, ok3 := tmp["bidamount"]
  814. var innerWinners = make([]string, 0)
  815. var biaoAmounts = make([]float64, 0)
  816. // 三个字段都存在
  817. if ok3 && ok2 && ok1 {
  818. packageMap, ok := package1.(map[string]interface{})
  819. if ok {
  820. if len(packageMap) >= 2 {
  821. var packages = make([]map[string]interface{}, 0)
  822. //var newTmp = make(map[string]interface{})
  823. winner_amount_count := 0
  824. for _, pack := range packageMap {
  825. var newPackage = make(map[string]interface{})
  826. pac, okk := pack.(map[string]interface{})
  827. if okk {
  828. _, okk1 := pac["winner"]
  829. _, okk2 := pac["bidamount"]
  830. _, okk3 := pac["name"]
  831. if okk1 {
  832. innerWinners = append(innerWinners, util.ObjToString(pac["winner"]))
  833. }
  834. if okk2 {
  835. biaoAmounts = append(biaoAmounts, util.Float64All(pac["bidamount"]))
  836. }
  837. //winner bidamount 二个字段都存在
  838. if okk1 && okk2 {
  839. winner_amount_count++
  840. newPackage["winner"] = pac["winner"]
  841. newPackage["bidamount"] = pac["bidamount"]
  842. if okk3 {
  843. newPackage["name"] = pac["name"]
  844. }
  845. packages = append(packages, newPackage)
  846. }
  847. }
  848. }
  849. //出现次数大于1
  850. if winner_amount_count > 1 {
  851. swinner := util.ObjToString(s_winner)
  852. swinners := strings.Split(swinner, ",")
  853. //判断里外 winner 是否相等
  854. eq := StringSliceValuesEqual(swinners, innerWinners)
  855. if eq {
  856. //判断金额相等
  857. if Float64Equal1Precision(Float64SliceSum(biaoAmounts), util.Float64All(bidamount)) {
  858. newpackages = packages
  859. }
  860. }
  861. }
  862. }
  863. }
  864. }
  865. return
  866. }