tagTask.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. package main
  2. import (
  3. "fmt"
  4. "go.uber.org/zap"
  5. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  6. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  7. "proposed_project/config"
  8. "strings"
  9. "sync"
  10. "time"
  11. )
  12. var TagField = map[string]string{
  13. "owner": "ownerclass",
  14. //"projecttype": "",
  15. "top_category": "category",
  16. "sub_category": "category",
  17. "nature": "nature",
  18. "project_stage": "project_stage",
  19. }
  20. var (
  21. //MatchArr []TagMatching
  22. //不同标签 规则
  23. ruleMatch = make(map[string][]TagMatching, 0)
  24. SelectF = make(map[string]int)
  25. )
  26. func InitRule() {
  27. info, _ := MgoBid.Find(config.Conf.Serve.TagRule, nil, `{"_id": 1}`, nil, false, -1, -1)
  28. for _, m := range *info {
  29. tag := TagMatching{}
  30. tag.tagName = util.ObjToString(m["label_name"])
  31. tag.tagCode = util.ObjToString(m["code"])
  32. // 关键词
  33. if f := util.ObjToString(m["match_keyword"]); f != "" {
  34. tag.matchField = strings.Split(f, ",")
  35. for _, s := range tag.matchField {
  36. SelectF[s] = 1
  37. }
  38. if v := util.ObjToString(m["keyword"]); v != "" {
  39. tag.matchKey = util.ObjToString(m["keyword"])
  40. tag.matchKeyReg = GetRegex(util.ObjToString(m["keyword"]))
  41. }
  42. }
  43. // 附件词
  44. if f := util.ObjToString(m["match_fjword"]); f != "" {
  45. tag.addField = strings.Split(f, ",")
  46. for _, s := range tag.addField {
  47. SelectF[s] = 1
  48. }
  49. if v := util.ObjToString(m["fjword"]); v != "" {
  50. tag.addKey = util.ObjToString(m["fjword"])
  51. tag.addKeyReg = GetRegex(util.ObjToString(m["fjword"]))
  52. }
  53. }
  54. // 排除词
  55. if f := util.ObjToString(m["match_pcword"]); f != "" {
  56. tag.excludeField = strings.Split(f, ",")
  57. for _, s := range tag.excludeField {
  58. SelectF[s] = 1
  59. }
  60. if v := util.ObjToString(m["pcword"]); v != "" {
  61. tag.excludeKey = util.ObjToString(m["pcword"])
  62. tag.excludeKeyReg = GetRegex(util.ObjToString(m["pcword"]))
  63. }
  64. }
  65. // 清理词
  66. if v := util.ObjToString(m["qlword"]); v != "" {
  67. tag.clearKey = strings.Split(util.ObjToString(m["qlword"]), ",")
  68. }
  69. rules, _ := ruleMatch[tag.tagName]
  70. rules = append(rules, tag)
  71. ruleMatch[tag.tagName] = rules
  72. }
  73. for K, v := range ruleMatch {
  74. log.Info("InitRule", zap.Int(K, len(v)))
  75. }
  76. }
  77. func taskRun() {
  78. sess := MgoPro.GetMgoConn()
  79. defer MgoPro.DestoryMongoConn(sess)
  80. ch := make(chan bool, config.Conf.Serve.Thread)
  81. wg := &sync.WaitGroup{}
  82. query := sess.DB(config.Conf.DB.MongoP.Dbname).C(config.Conf.DB.MongoP.ProposedColl).Find(nil).Select(SelectF).Iter()
  83. count := 0
  84. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  85. if count%20000 == 0 {
  86. log.Info(fmt.Sprintf("current --- %d", count))
  87. }
  88. ch <- true
  89. wg.Add(1)
  90. go func(tmp map[string]interface{}) {
  91. defer func() {
  92. <-ch
  93. wg.Done()
  94. }()
  95. tag := taskFuc(tmp)
  96. update := make(map[string]interface{})
  97. //项目性质
  98. if tag["nature"] != "" {
  99. update["nature_code"] = tag["nature"]
  100. tmp["nature_code"] = tag["nature"]
  101. } else {
  102. update["nature_code"] = "00"
  103. tmp["nature_code"] = "00"
  104. }
  105. //项目阶段
  106. if tag["project_stage"] != "" {
  107. update["project_stage_code"] = tag["project_stage"]
  108. tmp["project_stage_code"] = tag["project_stage"]
  109. } else {
  110. update["project_stage_code"] = "00"
  111. tmp["project_stage_code"] = "00"
  112. }
  113. //业主类型
  114. if tag["owner"] != "" {
  115. update["ownerclass_code"] = tag["owner"]
  116. tmp["ownerclass_code"] = tag["owner"]
  117. } else {
  118. update["ownerclass_code"] = "00"
  119. tmp["ownerclass_code"] = "00"
  120. }
  121. //项目类别
  122. if tag["sub_category"] != "" {
  123. update["category_code"] = tag["sub_category"]
  124. tmp["category_code"] = tag["sub_category"]
  125. } else {
  126. if tag["top_category"] != "" {
  127. update["category_code"] = tag["top_category"]
  128. tmp["category_code"] = tag["top_category"]
  129. }
  130. }
  131. if util.ObjToString(update["category_code"]) == "" {
  132. update["category_code"] = "04"
  133. tmp["category_code"] = "04"
  134. }
  135. //updatePool <- []map[string]interface{}{
  136. // {"_id": tmp["_id"]},
  137. // {"$set": update},
  138. //}
  139. savePool <- tmp
  140. }(tmp)
  141. tmp = make(map[string]interface{})
  142. }
  143. wg.Wait()
  144. log.Info(fmt.Sprintf("over --- %d", count))
  145. }
  146. func taskFuc(tmp map[string]interface{}) map[string]string {
  147. tag := make(map[string]string) // 打上的标签
  148. for _, MatchArr := range ruleMatch {
  149. for _, v := range MatchArr {
  150. // 同个类型的标签如果存在,就不需要再打
  151. if tag[v.tagName] != "" {
  152. continue
  153. }
  154. // 排除词
  155. if len(v.excludeField) > 0 && len(v.excludeKeyReg) > 0 {
  156. for _, f := range v.excludeField {
  157. if val := util.ObjToString(tmp[f]); val != "" {
  158. for _, e1 := range v.excludeKeyReg {
  159. flag := false
  160. if e1.regs != nil && e1.regs.MatchString(val) {
  161. flag = true
  162. } else {
  163. // && 特殊处理
  164. if strings.Contains(e1.keyStr, "&&") {
  165. for _, s := range strings.Split(e1.keyStr, "&&") {
  166. if strings.Contains(val, s) {
  167. flag = true
  168. break
  169. }
  170. }
  171. }
  172. }
  173. if flag {
  174. goto L
  175. }
  176. }
  177. }
  178. }
  179. }
  180. // 清理词
  181. if len(v.clearKey) > 0 && len(v.matchField) > 0 {
  182. for _, s := range v.clearKey {
  183. for _, f := range v.matchField {
  184. if val := util.ObjToString(tmp[f]); val != "" {
  185. tmp[f] = strings.ReplaceAll(val, s, "")
  186. }
  187. }
  188. }
  189. }
  190. // 关键词
  191. if len(v.matchField) > 0 && len(v.matchKeyReg) > 0 {
  192. for _, f := range v.matchField {
  193. if val := util.ObjToString(tmp[f]); val != "" {
  194. for _, r1 := range v.matchKeyReg {
  195. if r1.regs.MatchString(val) {
  196. if len(v.addField) > 0 && len(v.addKeyReg) > 0 {
  197. // 匹配附加词
  198. isCt := false
  199. for _, f1 := range v.addField {
  200. if v1 := util.ObjToString(tmp[f1]); v1 != "" {
  201. for _, r2 := range v.addKeyReg {
  202. if r2.regs != nil && r2.regs.MatchString(v1) {
  203. isCt = true
  204. } else {
  205. // && 特殊处理
  206. if strings.Contains(r2.keyStr, "&&") {
  207. flag := true
  208. for _, s := range strings.Split(r2.keyStr, "&&") {
  209. if !strings.Contains(v1, s) {
  210. flag = false
  211. break
  212. }
  213. }
  214. if flag {
  215. isCt = true
  216. }
  217. }
  218. }
  219. }
  220. }
  221. }
  222. if isCt {
  223. tag[v.tagName] = v.tagCode
  224. }
  225. } else {
  226. tag[v.tagName] = v.tagCode
  227. }
  228. }
  229. }
  230. }
  231. }
  232. }
  233. L:
  234. }
  235. }
  236. return tag
  237. }
  238. func taskFuc1(tmp map[string]interface{}) map[string]string {
  239. tag := make(map[string]string) // 打上的标签
  240. for _, v := range StageCode {
  241. // 同个类型的标签如果存在,就不需要再打
  242. if tag[v.tagName] != "" {
  243. continue
  244. }
  245. // 排除词
  246. if len(v.excludeField) > 0 && len(v.excludeKeyReg) > 0 {
  247. for _, f := range v.excludeField {
  248. if val := util.ObjToString(tmp[f]); val != "" {
  249. for _, e1 := range v.excludeKeyReg {
  250. flag := false
  251. if e1.regs != nil && e1.regs.MatchString(val) {
  252. flag = true
  253. } else {
  254. // && 特殊处理
  255. if strings.Contains(e1.keyStr, "&&") {
  256. for _, s := range strings.Split(e1.keyStr, "&&") {
  257. if strings.Contains(val, s) {
  258. flag = true
  259. break
  260. }
  261. }
  262. }
  263. }
  264. if flag {
  265. goto L
  266. }
  267. }
  268. }
  269. }
  270. }
  271. // 清理词
  272. if len(v.clearKey) > 0 && len(v.matchField) > 0 {
  273. for _, s := range v.clearKey {
  274. for _, f := range v.matchField {
  275. if val := util.ObjToString(tmp[f]); val != "" {
  276. tmp[f] = strings.ReplaceAll(val, s, "")
  277. }
  278. }
  279. }
  280. }
  281. // 关键词
  282. if len(v.matchField) > 0 && len(v.matchKeyReg) > 0 {
  283. for _, f := range v.matchField {
  284. if val := util.ObjToString(tmp[f]); val != "" {
  285. for _, r1 := range v.matchKeyReg {
  286. if r1.regs.MatchString(val) {
  287. if len(v.addField) > 0 && len(v.addKeyReg) > 0 {
  288. // 匹配附加词
  289. isCt := false
  290. for _, f1 := range v.addField {
  291. if v1 := util.ObjToString(tmp[f1]); v1 != "" {
  292. for _, r2 := range v.addKeyReg {
  293. if r2.regs != nil && r2.regs.MatchString(v1) {
  294. isCt = true
  295. } else {
  296. // && 特殊处理
  297. if strings.Contains(r2.keyStr, "&&") {
  298. flag := true
  299. for _, s := range strings.Split(r2.keyStr, "&&") {
  300. if !strings.Contains(v1, s) {
  301. flag = false
  302. break
  303. }
  304. }
  305. if flag {
  306. isCt = true
  307. }
  308. }
  309. }
  310. }
  311. }
  312. }
  313. if isCt {
  314. tag[v.tagName] = v.tagCode
  315. }
  316. } else {
  317. tag[v.tagName] = v.tagCode
  318. }
  319. }
  320. }
  321. }
  322. }
  323. }
  324. L:
  325. }
  326. return tag
  327. }
  328. func UpdateMethod() {
  329. arru := make([][]map[string]interface{}, saveSize)
  330. indexu := 0
  331. for {
  332. select {
  333. case v := <-updatePool:
  334. arru[indexu] = v
  335. indexu++
  336. if indexu == saveSize {
  337. updateSp <- true
  338. go func(arru [][]map[string]interface{}) {
  339. defer func() {
  340. <-updateSp
  341. }()
  342. MgoPro.UpdateBulk(config.Conf.DB.MongoP.ProposedColl, arru...)
  343. }(arru)
  344. arru = make([][]map[string]interface{}, saveSize)
  345. indexu = 0
  346. }
  347. case <-time.After(1000 * time.Millisecond):
  348. if indexu > 0 {
  349. updateSp <- true
  350. go func(arru [][]map[string]interface{}) {
  351. defer func() {
  352. <-updateSp
  353. }()
  354. MgoPro.UpdateBulk(config.Conf.DB.MongoP.ProposedColl, arru...)
  355. }(arru[:indexu])
  356. arru = make([][]map[string]interface{}, saveSize)
  357. indexu = 0
  358. }
  359. }
  360. }
  361. }