tagTask.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. package main
  2. import (
  3. util "app.yhyue.com/data_processing/common_utils"
  4. "app.yhyue.com/data_processing/common_utils/log"
  5. "fmt"
  6. "go.uber.org/zap"
  7. "proposed_project/config"
  8. "strings"
  9. "sync"
  10. "time"
  11. )
  12. var TagField = map[string]string{
  13. "owner": "ownerclass",
  14. //"projecttype": "",
  15. "top_category": "category",
  16. "sub_category": "category",
  17. "nature": "nature",
  18. "project_stage": "project_stage",
  19. }
  20. var (
  21. //MatchArr []TagMatching
  22. //不同标签 规则
  23. ruleMatch = make(map[string][]TagMatching, 0)
  24. SelectF = make(map[string]int)
  25. )
  26. func InitRule() {
  27. info, _ := MgoBid.Find(config.Conf.Serve.TagRule, nil, `{"_id": 1}`, nil, false, -1, -1)
  28. for _, m := range *info {
  29. tag := TagMatching{}
  30. tag.tagName = util.ObjToString(m["label_name"])
  31. tag.tagCode = util.ObjToString(m["code"])
  32. // 关键词
  33. if f := util.ObjToString(m["match_keyword"]); f != "" {
  34. tag.matchField = strings.Split(f, ",")
  35. for _, s := range tag.matchField {
  36. SelectF[s] = 1
  37. }
  38. if v := util.ObjToString(m["keyword"]); v != "" {
  39. tag.matchKey = util.ObjToString(m["keyword"])
  40. tag.matchKeyReg = GetRegex(util.ObjToString(m["keyword"]))
  41. }
  42. }
  43. // 附件词
  44. if f := util.ObjToString(m["match_fjword"]); f != "" {
  45. tag.addField = strings.Split(f, ",")
  46. for _, s := range tag.addField {
  47. SelectF[s] = 1
  48. }
  49. if v := util.ObjToString(m["fjword"]); v != "" {
  50. tag.addKey = util.ObjToString(m["fjword"])
  51. tag.addKeyReg = GetRegex(util.ObjToString(m["fjword"]))
  52. }
  53. }
  54. // 排除词
  55. if f := util.ObjToString(m["match_pcword"]); f != "" {
  56. tag.excludeField = strings.Split(f, ",")
  57. for _, s := range tag.excludeField {
  58. SelectF[s] = 1
  59. }
  60. if v := util.ObjToString(m["pcword"]); v != "" {
  61. tag.excludeKey = util.ObjToString(m["pcword"])
  62. tag.excludeKeyReg = GetRegex(util.ObjToString(m["pcword"]))
  63. }
  64. }
  65. // 清理词
  66. if v := util.ObjToString(m["qlword"]); v != "" {
  67. tag.clearKey = strings.Split(util.ObjToString(m["qlword"]), ",")
  68. }
  69. rules, _ := ruleMatch[tag.tagName]
  70. rules = append(rules, tag)
  71. ruleMatch[tag.tagName] = rules
  72. }
  73. for K, v := range ruleMatch {
  74. log.Info("InitRule", zap.Int(K, len(v)))
  75. }
  76. }
  77. func taskRun() {
  78. sess := MgoPro.GetMgoConn()
  79. defer MgoPro.DestoryMongoConn(sess)
  80. ch := make(chan bool, config.Conf.Serve.Thread)
  81. wg := &sync.WaitGroup{}
  82. query := sess.DB(config.Conf.DB.MongoP.Dbname).C(config.Conf.DB.MongoP.Coll).Find(nil).Select(SelectF).Iter()
  83. count := 0
  84. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  85. if count%20000 == 0 {
  86. log.Info(fmt.Sprintf("current --- %d", count))
  87. }
  88. ch <- true
  89. wg.Add(1)
  90. go func(tmp map[string]interface{}) {
  91. defer func() {
  92. <-ch
  93. wg.Done()
  94. }()
  95. tag := taskFuc(tmp)
  96. update := make(map[string]interface{})
  97. //项目性质
  98. if tag["nature"] != "" {
  99. update["nature_code"] = tag["nature"]
  100. tmp["nature_code"] = tag["nature"]
  101. } else {
  102. update["nature_code"] = "00"
  103. tmp["nature_code"] = "00"
  104. }
  105. //项目阶段
  106. if tag["project_stage"] != "" {
  107. update["project_stage_code"] = tag["project_stage"]
  108. tmp["project_stage_code"] = tag["project_stage"]
  109. } else {
  110. update["project_stage_code"] = "00"
  111. tmp["project_stage_code"] = "00"
  112. }
  113. //业主类型
  114. if tag["owner"] != "" {
  115. update["ownerclass_code"] = tag["owner"]
  116. tmp["ownerclass_code"] = tag["owner"]
  117. } else {
  118. update["ownerclass_code"] = "00"
  119. tmp["ownerclass_code"] = "00"
  120. }
  121. //项目类别
  122. if tag["sub_category"] != "" {
  123. update["category_code"] = tag["sub_category"]
  124. tmp["category_code"] = tag["sub_category"]
  125. } else {
  126. if tag["top_category"] != "" {
  127. update["category_code"] = tag["top_category"]
  128. tmp["category_code"] = tag["top_category"]
  129. }
  130. }
  131. if util.ObjToString(update["category_code"]) == "" {
  132. update["category_code"] = "04"
  133. tmp["category_code"] = "04"
  134. }
  135. //updatePool <- []map[string]interface{}{
  136. // {"_id": tmp["_id"]},
  137. // {"$set": update},
  138. //}
  139. savePool <- tmp
  140. }(tmp)
  141. tmp = make(map[string]interface{})
  142. }
  143. wg.Wait()
  144. log.Info(fmt.Sprintf("over --- %d", count))
  145. }
  146. func taskFuc(tmp map[string]interface{}) map[string]string {
  147. tag := make(map[string]string) // 打上的标签
  148. for _, MatchArr := range ruleMatch {
  149. for _, v := range MatchArr {
  150. // 同个类型的标签如果存在,就不需要再打
  151. if tag[v.tagName] != "" {
  152. continue
  153. }
  154. // 排除词
  155. if len(v.excludeField) > 0 && len(v.excludeKeyReg) > 0 {
  156. for _, f := range v.excludeField {
  157. if val := util.ObjToString(tmp[f]); val != "" {
  158. for _, e1 := range v.excludeKeyReg {
  159. flag := false
  160. if e1.regs != nil && e1.regs.MatchString(val) {
  161. flag = true
  162. } else {
  163. // && 特殊处理
  164. if strings.Contains(e1.keyStr, "&&") {
  165. for _, s := range strings.Split(e1.keyStr, "&&") {
  166. if strings.Contains(val, s) {
  167. flag = true
  168. break
  169. }
  170. }
  171. }
  172. }
  173. if flag {
  174. goto L
  175. }
  176. }
  177. }
  178. }
  179. }
  180. // 清理词
  181. if len(v.clearKey) > 0 && len(v.matchField) > 0 {
  182. for _, s := range v.clearKey {
  183. for _, f := range v.matchField {
  184. if val := util.ObjToString(tmp[f]); val != "" {
  185. tmp[f] = strings.ReplaceAll(val, s, "")
  186. }
  187. }
  188. }
  189. }
  190. // 关键词
  191. if len(v.matchField) > 0 && len(v.matchKeyReg) > 0 {
  192. for _, f := range v.matchField {
  193. if val := util.ObjToString(tmp[f]); val != "" {
  194. for _, r1 := range v.matchKeyReg {
  195. if r1.regs.MatchString(val) {
  196. if len(v.addField) > 0 && len(v.addKeyReg) > 0 {
  197. // 匹配附加词
  198. isCt := false
  199. for _, f1 := range v.addField {
  200. if v1 := util.ObjToString(tmp[f1]); v1 != "" {
  201. for _, r2 := range v.addKeyReg {
  202. if r2.regs != nil && r2.regs.MatchString(v1) {
  203. isCt = true
  204. } else {
  205. // && 特殊处理
  206. if strings.Contains(r2.keyStr, "&&") {
  207. flag := true
  208. for _, s := range strings.Split(r2.keyStr, "&&") {
  209. if !strings.Contains(v1, s) {
  210. flag = false
  211. break
  212. }
  213. }
  214. if flag {
  215. isCt = true
  216. }
  217. }
  218. }
  219. }
  220. }
  221. }
  222. if isCt {
  223. tag[v.tagName] = v.tagCode
  224. }
  225. } else {
  226. tag[v.tagName] = v.tagCode
  227. }
  228. }
  229. }
  230. }
  231. }
  232. }
  233. L:
  234. }
  235. }
  236. return tag
  237. }
  238. func UpdateMethod() {
  239. arru := make([][]map[string]interface{}, saveSize)
  240. indexu := 0
  241. for {
  242. select {
  243. case v := <-updatePool:
  244. arru[indexu] = v
  245. indexu++
  246. if indexu == saveSize {
  247. updateSp <- true
  248. go func(arru [][]map[string]interface{}) {
  249. defer func() {
  250. <-updateSp
  251. }()
  252. MgoPro.UpdateBulk(config.Conf.DB.MongoP.Coll, arru...)
  253. }(arru)
  254. arru = make([][]map[string]interface{}, saveSize)
  255. indexu = 0
  256. }
  257. case <-time.After(1000 * time.Millisecond):
  258. if indexu > 0 {
  259. updateSp <- true
  260. go func(arru [][]map[string]interface{}) {
  261. defer func() {
  262. <-updateSp
  263. }()
  264. MgoPro.UpdateBulk(config.Conf.DB.MongoP.Coll, arru...)
  265. }(arru[:indexu])
  266. arru = make([][]map[string]interface{}, saveSize)
  267. indexu = 0
  268. }
  269. }
  270. }
  271. }