main.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. package main
  2. import (
  3. "go.uber.org/zap"
  4. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  6. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  7. "regexp"
  8. "sync"
  9. )
  10. var (
  11. MgoB *mongodb.MongodbSim
  12. //标题匹配关键词
  13. //titleMatchRegStr = `(开业|新员工|员工培训|正式投用|成立新公司|图书馆正式开馆|项目顺利竣工|学校新建|投产运营|启动运营|竣工验收|正式投用|建成|竣工交付|项目竣工|工程开工|正式开工|全面完工|项目投产|工程竣工|揭牌成立|正式投用|新签合同|施工完成|封顶|正式竣工|新员工|项目开工|建设总包项目|建设项目|办公大楼搬迁|选址|设立分支机构|开新店|门店拓展|项目规划启动会|获批变更|项目顺利推进)`
  14. //titleMatchRegStr2 = `(重点项目|新动作|项目签约|助力企业新发展|捐赠|项目移交|投资|打造|落户|收购|合并|签约|合作|数字经济|数字化|人员调整|签约.+项目)`
  15. //titleMatchRegStr3 = `(培训)`
  16. //titleOutRegStr = `(习近平|十八大|十九大|二十大|端午节|中秋节|建军节|建党节|诈骗|主题教育|招标|中标|成交|询价|磋商|竞价|招租|采购|马拉松|救人义举|安全生产月|防汛抗旱|社区|爱心|台风|比选公告|教育活动|消费|老党员|开展.*活动|举办.*活动|整改整治|新华|会见|等奖|获奖|进校园|反诈|百姓|专项整治工作|保障|日报|成功举办|新征程|获评|审查|可行性研究|接受.+查|一带一路|故事|荣获|候选人|宣传|专题|慰问|圆满完成|竞争性磋商|政治|思想|人民网|央广网|系统升级|网站维护|比选|入围.+名单|获.+奖|演习|新纪录|刷新.+记录|创新高|巡视|表彰|培训班|考察|涉嫌|减持|卖出|流出|致辞|买入.+股|粮食安全)`
  17. //titleOutRegStr2 = `(新加披|美国|英国|波黑|竞赛|领导拜访|会议|会谈|进博会|工作座谈)`
  18. //titleOutRegStr3 = `(技能|专项|业务)`
  19. //
  20. //detailOutRegStr = `(责任编辑|会议指出|通讯员|报道|通讯.+报道|文章来源|.+会在.+成功举办|理财|端午节|中秋节|建军节|建党节|大会期间|致辞)`
  21. titleMatchRegStr string
  22. titleOutRegStr string
  23. detailOutRegStr string
  24. )
  25. func main() {
  26. titleMatchRegStr = GF.Cron.TitleMatch
  27. titleOutRegStr = GF.Cron.TitleOut
  28. detailOutRegStr = GF.Cron.DetailOut
  29. dealBidding()
  30. select {}
  31. }
  32. //dealBidding 处理标讯数据
  33. func dealBidding() {
  34. sess := MgoB.GetMgoConn()
  35. defer MgoB.DestoryMongoConn(sess)
  36. // 指定对应的时间格式
  37. //layout := "2006-01-02 15:04:05"
  38. // 获取当前时间
  39. //now := time.Now()
  40. var q interface{}
  41. //targetTime := time.Date(now.Year(), now.Month(), now.Day()+GF.Cron.Start, 04, 20, 0, 0, now.Location())
  42. //targetTime := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
  43. //todayTime := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
  44. if GF.Cron.Start != "" {
  45. if GF.Cron.End != "" {
  46. q = map[string]interface{}{
  47. "createtime": map[string]interface{}{
  48. "$gt": GF.Cron.Start,
  49. "$lte": GF.Cron.End,
  50. },
  51. }
  52. } else {
  53. q = map[string]interface{}{
  54. "createtime": map[string]interface{}{
  55. "$gt": GF.Cron.Start,
  56. },
  57. }
  58. }
  59. } else {
  60. q = nil
  61. }
  62. log.Info("dealBidding", zap.Any("q", q))
  63. query := sess.DB(GF.MongoB.DB).C(GF.MongoB.Coll).Find(q).Select(map[string]interface{}{
  64. "title": 1, "detail": 1}).Iter()
  65. count := 0
  66. ch := make(chan bool, 10)
  67. wg := &sync.WaitGroup{}
  68. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  69. if count%100 == 0 {
  70. log.Info("dealBidding", zap.Int("current", count))
  71. }
  72. ch <- true
  73. wg.Add(1)
  74. go func(tmp map[string]interface{}) {
  75. defer func() {
  76. <-ch
  77. wg.Done()
  78. }()
  79. tagData(tmp)
  80. }(tmp)
  81. tmp = map[string]interface{}{}
  82. }
  83. wg.Wait()
  84. log.Info("dealBidding", zap.Int("over ", count))
  85. }
  86. func tagData(tmp map[string]interface{}) {
  87. var label int
  88. var labelField string
  89. var labelWords []string
  90. var update = make(map[string]interface{}, 0)
  91. title := util.ObjToString(tmp["title"])
  92. detail := util.ObjToString(tmp["detail"])
  93. titleOutReg := regexp.MustCompile(titleOutRegStr)
  94. titleOutMatchs := titleOutReg.FindAllString(title, -1)
  95. //标题排除词
  96. if len(titleOutMatchs) > 0 {
  97. label = 2
  98. labelWords = titleOutMatchs
  99. labelField = "title_out"
  100. } else {
  101. //内容排除词
  102. detailOutReg := regexp.MustCompile(detailOutRegStr)
  103. detailOutMatchs := detailOutReg.FindAllString(detail, -1)
  104. if len(detailOutMatchs) > 0 {
  105. label = 2
  106. labelWords = detailOutMatchs
  107. labelField = "detail_out"
  108. } else {
  109. titleMatchReg := regexp.MustCompile(titleMatchRegStr)
  110. titleMatchs := titleMatchReg.FindAllString(title, -1)
  111. if len(titleMatchs) > 0 {
  112. label = 1
  113. labelWords = titleMatchs
  114. labelField = "title_match"
  115. } else {
  116. label = 3
  117. }
  118. }
  119. }
  120. update["label"] = label
  121. update["label_field"] = labelField
  122. update["label_words"] = labelWords
  123. where := map[string]interface{}{
  124. "_id": tmp["_id"],
  125. }
  126. MgoB.Update(GF.MongoB.Coll, where, map[string]interface{}{"$set": update}, true, false)
  127. }
  128. //tagData 数据打标签
  129. //func tagData2(tmp map[string]interface{}) {
  130. // var label int
  131. // var labelField string
  132. // var labelWords []string
  133. // var update = make(map[string]interface{}, 0)
  134. // title := util.ObjToString(tmp["title"])
  135. // detail := util.ObjToString(tmp["detail"])
  136. //
  137. // titleMatchReg := regexp.MustCompile(titleMatchRegStr)
  138. // titleMatchReg2 := regexp.MustCompile(titleMatchRegStr2)
  139. // titleMatchReg3 := regexp.MustCompile(titleMatchRegStr3)
  140. // titleOutReg := regexp.MustCompile(titleOutRegStr)
  141. // titleOutReg2 := regexp.MustCompile(titleOutRegStr2)
  142. // titleOutReg3 := regexp.MustCompile(titleOutRegStr3)
  143. // detailOutReg := regexp.MustCompile(detailOutRegStr)
  144. // //
  145. // titleMatchs := titleMatchReg.FindAllString(title, -1)
  146. // titleMatchs2 := titleMatchReg2.FindAllString(title, -1)
  147. // titleMatchs3 := titleMatchReg3.FindAllString(title, -1)
  148. // //
  149. // titleOutMatchs := titleOutReg.FindAllString(title, -1)
  150. // titleOutMatchs2 := titleOutReg2.FindAllString(title, -1)
  151. // titleOutMatchs3 := titleOutReg3.FindAllString(title, -1)
  152. //
  153. // if len(titleMatchs) > 0 {
  154. // label = 1
  155. // labelWords = titleMatchs
  156. // labelField = "title_match"
  157. // } else if len(titleMatchs2) > 0 {
  158. // //标题规则2 匹配
  159. // if len(titleOutMatchs2) == 0 {
  160. // label = 1
  161. // labelWords = titleMatchs2
  162. // labelField = "title_match"
  163. // } else {
  164. // label = 2
  165. // labelField = "title_out2"
  166. // labelWords = titleOutMatchs2
  167. // }
  168. // } else if len(titleMatchs3) > 0 {
  169. // if len(titleOutMatchs3) == 0 {
  170. // label = 1
  171. // labelWords = titleMatchs2
  172. // labelField = "title_match3"
  173. // } else {
  174. // label = 2
  175. // labelField = "title_out3"
  176. // labelWords = titleOutMatchs3
  177. // }
  178. // } else {
  179. // //标题排除词
  180. // if len(titleOutMatchs) > 0 {
  181. // label = 2
  182. // labelField = "title_out"
  183. // labelWords = titleOutMatchs
  184. // } else {
  185. // //内容匹配
  186. // detailOutMatchs := detailOutReg.FindAllString(detail, -1)
  187. // if len(detailOutMatchs) > 0 {
  188. // label = 2
  189. // labelField = "detail_out"
  190. // labelWords = detailOutMatchs
  191. // } else {
  192. // label = 3
  193. // }
  194. // }
  195. // }
  196. //
  197. // update["label"] = label
  198. // update["label_field"] = labelField
  199. // update["label_words"] = labelWords
  200. // where := map[string]interface{}{
  201. // "_id": tmp["_id"],
  202. // }
  203. //
  204. // MgoB.Update(GF.MongoB.Coll, where, map[string]interface{}{"$set": update}, true, false)
  205. //
  206. //}