123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- package main
- import (
- util "app.yhyue.com/data_processing/common_utils"
- "app.yhyue.com/data_processing/common_utils/log"
- "fmt"
- "go.uber.org/zap"
- "proposed_project/config"
- "strings"
- "sync"
- "time"
- )
- var TagField = map[string]string{
- "owner": "ownerclass",
- //"projecttype": "",
- "top_category": "category",
- "sub_category": "category",
- "nature": "nature",
- "project_stage": "project_stage",
- }
- var (
- //MatchArr []TagMatching
- //不同标签 规则
- ruleMatch = make(map[string][]TagMatching, 0)
- SelectF = make(map[string]int)
- )
- func InitRule() {
- info, _ := MgoBid.Find(config.Conf.Serve.TagRule, nil, `{"_id": 1}`, nil, false, -1, -1)
- for _, m := range *info {
- tag := TagMatching{}
- tag.tagName = util.ObjToString(m["label_name"])
- tag.tagCode = util.ObjToString(m["code"])
- // 关键词
- if f := util.ObjToString(m["match_keyword"]); f != "" {
- tag.matchField = strings.Split(f, ",")
- for _, s := range tag.matchField {
- SelectF[s] = 1
- }
- if v := util.ObjToString(m["keyword"]); v != "" {
- tag.matchKey = util.ObjToString(m["keyword"])
- tag.matchKeyReg = GetRegex(util.ObjToString(m["keyword"]))
- }
- }
- // 附件词
- if f := util.ObjToString(m["match_fjword"]); f != "" {
- tag.addField = strings.Split(f, ",")
- for _, s := range tag.addField {
- SelectF[s] = 1
- }
- if v := util.ObjToString(m["fjword"]); v != "" {
- tag.addKey = util.ObjToString(m["fjword"])
- tag.addKeyReg = GetRegex(util.ObjToString(m["fjword"]))
- }
- }
- // 排除词
- if f := util.ObjToString(m["match_pcword"]); f != "" {
- tag.excludeField = strings.Split(f, ",")
- for _, s := range tag.excludeField {
- SelectF[s] = 1
- }
- if v := util.ObjToString(m["pcword"]); v != "" {
- tag.excludeKey = util.ObjToString(m["pcword"])
- tag.excludeKeyReg = GetRegex(util.ObjToString(m["pcword"]))
- }
- }
- // 清理词
- if v := util.ObjToString(m["qlword"]); v != "" {
- tag.clearKey = strings.Split(util.ObjToString(m["qlword"]), ",")
- }
- rules, _ := ruleMatch[tag.tagName]
- rules = append(rules, tag)
- ruleMatch[tag.tagName] = rules
- }
- for K, v := range ruleMatch {
- log.Info("InitRule", zap.Int(K, len(v)))
- }
- }
- func taskRun() {
- sess := MgoPro.GetMgoConn()
- defer MgoPro.DestoryMongoConn(sess)
- ch := make(chan bool, config.Conf.Serve.Thread)
- wg := &sync.WaitGroup{}
- query := sess.DB(config.Conf.DB.MongoP.Dbname).C(config.Conf.DB.MongoP.ProposedColl).Find(nil).Select(SelectF).Iter()
- count := 0
- for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
- if count%20000 == 0 {
- log.Info(fmt.Sprintf("current --- %d", count))
- }
- ch <- true
- wg.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-ch
- wg.Done()
- }()
- tag := taskFuc(tmp)
- update := make(map[string]interface{})
- //项目性质
- if tag["nature"] != "" {
- update["nature_code"] = tag["nature"]
- tmp["nature_code"] = tag["nature"]
- } else {
- update["nature_code"] = "00"
- tmp["nature_code"] = "00"
- }
- //项目阶段
- if tag["project_stage"] != "" {
- update["project_stage_code"] = tag["project_stage"]
- tmp["project_stage_code"] = tag["project_stage"]
- } else {
- update["project_stage_code"] = "00"
- tmp["project_stage_code"] = "00"
- }
- //业主类型
- if tag["owner"] != "" {
- update["ownerclass_code"] = tag["owner"]
- tmp["ownerclass_code"] = tag["owner"]
- } else {
- update["ownerclass_code"] = "00"
- tmp["ownerclass_code"] = "00"
- }
- //项目类别
- if tag["sub_category"] != "" {
- update["category_code"] = tag["sub_category"]
- tmp["category_code"] = tag["sub_category"]
- } else {
- if tag["top_category"] != "" {
- update["category_code"] = tag["top_category"]
- tmp["category_code"] = tag["top_category"]
- }
- }
- if util.ObjToString(update["category_code"]) == "" {
- update["category_code"] = "04"
- tmp["category_code"] = "04"
- }
- //updatePool <- []map[string]interface{}{
- // {"_id": tmp["_id"]},
- // {"$set": update},
- //}
- savePool <- tmp
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg.Wait()
- log.Info(fmt.Sprintf("over --- %d", count))
- }
- func taskFuc(tmp map[string]interface{}) map[string]string {
- tag := make(map[string]string) // 打上的标签
- for _, MatchArr := range ruleMatch {
- for _, v := range MatchArr {
- // 同个类型的标签如果存在,就不需要再打
- if tag[v.tagName] != "" {
- continue
- }
- // 排除词
- if len(v.excludeField) > 0 && len(v.excludeKeyReg) > 0 {
- for _, f := range v.excludeField {
- if val := util.ObjToString(tmp[f]); val != "" {
- for _, e1 := range v.excludeKeyReg {
- flag := false
- if e1.regs != nil && e1.regs.MatchString(val) {
- flag = true
- } else {
- // && 特殊处理
- if strings.Contains(e1.keyStr, "&&") {
- for _, s := range strings.Split(e1.keyStr, "&&") {
- if strings.Contains(val, s) {
- flag = true
- break
- }
- }
- }
- }
- if flag {
- goto L
- }
- }
- }
- }
- }
- // 清理词
- if len(v.clearKey) > 0 && len(v.matchField) > 0 {
- for _, s := range v.clearKey {
- for _, f := range v.matchField {
- if val := util.ObjToString(tmp[f]); val != "" {
- tmp[f] = strings.ReplaceAll(val, s, "")
- }
- }
- }
- }
- // 关键词
- if len(v.matchField) > 0 && len(v.matchKeyReg) > 0 {
- for _, f := range v.matchField {
- if val := util.ObjToString(tmp[f]); val != "" {
- for _, r1 := range v.matchKeyReg {
- if r1.regs.MatchString(val) {
- if len(v.addField) > 0 && len(v.addKeyReg) > 0 {
- // 匹配附加词
- isCt := false
- for _, f1 := range v.addField {
- if v1 := util.ObjToString(tmp[f1]); v1 != "" {
- for _, r2 := range v.addKeyReg {
- if r2.regs != nil && r2.regs.MatchString(v1) {
- isCt = true
- } else {
- // && 特殊处理
- if strings.Contains(r2.keyStr, "&&") {
- flag := true
- for _, s := range strings.Split(r2.keyStr, "&&") {
- if !strings.Contains(v1, s) {
- flag = false
- break
- }
- }
- if flag {
- isCt = true
- }
- }
- }
- }
- }
- }
- if isCt {
- tag[v.tagName] = v.tagCode
- }
- } else {
- tag[v.tagName] = v.tagCode
- }
- }
- }
- }
- }
- }
- L:
- }
- }
- return tag
- }
- func taskFuc1(tmp map[string]interface{}) map[string]string {
- tag := make(map[string]string) // 打上的标签
- for _, v := range StageCode {
- // 同个类型的标签如果存在,就不需要再打
- if tag[v.tagName] != "" {
- continue
- }
- // 排除词
- if len(v.excludeField) > 0 && len(v.excludeKeyReg) > 0 {
- for _, f := range v.excludeField {
- if val := util.ObjToString(tmp[f]); val != "" {
- for _, e1 := range v.excludeKeyReg {
- flag := false
- if e1.regs != nil && e1.regs.MatchString(val) {
- flag = true
- } else {
- // && 特殊处理
- if strings.Contains(e1.keyStr, "&&") {
- for _, s := range strings.Split(e1.keyStr, "&&") {
- if strings.Contains(val, s) {
- flag = true
- break
- }
- }
- }
- }
- if flag {
- goto L
- }
- }
- }
- }
- }
- // 清理词
- if len(v.clearKey) > 0 && len(v.matchField) > 0 {
- for _, s := range v.clearKey {
- for _, f := range v.matchField {
- if val := util.ObjToString(tmp[f]); val != "" {
- tmp[f] = strings.ReplaceAll(val, s, "")
- }
- }
- }
- }
- // 关键词
- if len(v.matchField) > 0 && len(v.matchKeyReg) > 0 {
- for _, f := range v.matchField {
- if val := util.ObjToString(tmp[f]); val != "" {
- for _, r1 := range v.matchKeyReg {
- if r1.regs.MatchString(val) {
- if len(v.addField) > 0 && len(v.addKeyReg) > 0 {
- // 匹配附加词
- isCt := false
- for _, f1 := range v.addField {
- if v1 := util.ObjToString(tmp[f1]); v1 != "" {
- for _, r2 := range v.addKeyReg {
- if r2.regs != nil && r2.regs.MatchString(v1) {
- isCt = true
- } else {
- // && 特殊处理
- if strings.Contains(r2.keyStr, "&&") {
- flag := true
- for _, s := range strings.Split(r2.keyStr, "&&") {
- if !strings.Contains(v1, s) {
- flag = false
- break
- }
- }
- if flag {
- isCt = true
- }
- }
- }
- }
- }
- }
- if isCt {
- tag[v.tagName] = v.tagCode
- }
- } else {
- tag[v.tagName] = v.tagCode
- }
- }
- }
- }
- }
- }
- L:
- }
- return tag
- }
- func UpdateMethod() {
- arru := make([][]map[string]interface{}, saveSize)
- indexu := 0
- for {
- select {
- case v := <-updatePool:
- arru[indexu] = v
- indexu++
- if indexu == saveSize {
- updateSp <- true
- go func(arru [][]map[string]interface{}) {
- defer func() {
- <-updateSp
- }()
- MgoPro.UpdateBulk(config.Conf.DB.MongoP.ProposedColl, arru...)
- }(arru)
- arru = make([][]map[string]interface{}, saveSize)
- indexu = 0
- }
- case <-time.After(1000 * time.Millisecond):
- if indexu > 0 {
- updateSp <- true
- go func(arru [][]map[string]interface{}) {
- defer func() {
- <-updateSp
- }()
- MgoPro.UpdateBulk(config.Conf.DB.MongoP.ProposedColl, arru...)
- }(arru[:indexu])
- arru = make([][]map[string]interface{}, saveSize)
- indexu = 0
- }
- }
- }
- }
|