Browse Source

修改-敏感词项目-相关增量条件

apple 4 years ago
parent
commit
b4c5b03c58

+ 0 - 2
udpdataclear/udpSensitiveWords/main.go

@@ -5,11 +5,9 @@ import (
 )
 
 func init() {
-	
 	util.InitC()
 }
 func main() {
-
 	go util.AddTaskSensitiveWordsData() //增量
 	// 主函数中添加
 	util.ExtractUdp() //udp通知抽取

+ 122 - 0
udpdataclear/udpSensitiveWords/util/hisdata.go

@@ -0,0 +1,122 @@
+package util
+
+import (
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/mongo/options"
+	"log"
+	"strings"
+	"sync"
+)
+
+func HistoryReadyData() {
+	mgo, err := InitMgoEn("mongodb://172.17.4.187:27082,172.17.145.163:27083", 10, "fengweiqiang", "fwq@123123")
+	if err != nil {
+		log.Fatalln(err)
+	}
+
+	con := mgo.GetCon()
+	if con == nil {
+		log.Fatalln("mgo con err")
+	}
+
+	cursor, err := con.Database("mixdata").Collection("qyxy_std").Find(nil, bson.M{}, options.Find().SetProjection(
+		bson.M{
+			"company_name": 1,
+			"company_type": 1,
+		}).SetSort(bson.M{"_id": 1}))
+	if err != nil {
+		log.Fatalln(err)
+	}
+	defer cursor.Close(nil)
+
+	wg := sync.WaitGroup{}
+	bools := make(chan bool, 10)
+	var num uint64
+	for tmp := map[string]interface{}{}; cursor.Next(nil); tmp = map[string]interface{}{} {
+		num++
+		err := cursor.Decode(&tmp)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+		bools<- true
+		wg.Add(1)
+		go func(t map[string]interface{}) {
+			defer func() {
+				<-bools
+				wg.Done()
+			}()
+			if company_name, ok := t["company_name"].(string); ok {
+				if reglen.MatchString(company_name) || !unstart_strReg.MatchString(company_name) ||
+					con_strReg.MatchString(company_name) {
+					return
+				}
+				company_type:= ObjToString(t["company_type"])
+				if strings.Contains(company_type,"个人")||strings.Contains(company_type,"个体"){
+					return
+				}
+				//保存企业名
+				con.Database("mixdata").Collection("unique_qyxy").InsertOne(nil, bson.M{
+					"qy_name": company_name,
+				})
+			}
+		}(tmp)
+
+		if num%1000000 == 0 {
+			log.Println("qyxy_std:", num)
+		}
+	}
+	wg.Wait()
+	log.Println("qyxy_std ok")
+	num = 0
+	cursormixdata, err := con.Database("mixdata").Collection("qyxy_historyname").Find(nil, bson.M{}, options.Find().SetProjection(
+		bson.M{
+			"company_name": 1,
+			"company_type": 1,
+		}).SetSort(bson.M{"_id": 1}))
+	if err != nil {
+		log.Fatalln(err)
+	}
+	defer cursormixdata.Close(nil)
+	for tmp := map[string]interface{}{}; cursormixdata.Next(nil); tmp = map[string]interface{}{} {
+		num++
+		err := cursormixdata.Decode(&tmp)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+		wg.Add(1)
+		go func(t map[string]interface{}) {
+			bools<- true
+			defer func() {
+				wg.Done()
+				<-bools
+			}()
+			if company_name, ok := t["company_name"].(string); ok {
+
+				if reglen.MatchString(company_name) || !unstart_strReg.MatchString(company_name) ||
+					con_strReg.MatchString(company_name) {
+					return
+				}
+				company_type:= ObjToString(t["company_type"])
+				if strings.Contains(company_type,"个人")||strings.Contains(company_type,"个体"){
+					return
+				}
+				//保存企业名
+				con.Database("mixdata").Collection("unique_qyxy").InsertOne(nil, bson.M{
+					"qy_name": company_name,
+				})
+			}
+		}(tmp)
+
+		if num%1000000 == 0 {
+			log.Println("qyxy_historyname:", num)
+		}
+	}
+
+	log.Println("qyxy_historyname ok")
+
+
+
+}
+

+ 5 - 3
udpdataclear/udpSensitiveWords/util/udpdata.go

@@ -274,10 +274,14 @@ func AddTaskSensitiveWordsData() {
 			if err == nil {
 				if company_name, ok := tmp["company_name"].(string); ok {
 
-					if strReg.MatchString(company_name) || !unstart_strReg.MatchString(company_name) ||
+					if reglen.MatchString(company_name) || !unstart_strReg.MatchString(company_name) ||
 						con_strReg.MatchString(company_name) {
 						continue
 					}
+					company_type:= ObjToString(tmp["company_type"])
+					if strings.Contains(company_type,"个人")||strings.Contains(company_type,"个体"){
+						continue
+					}
 					//存mgo
 					new_tmp ,err:= con.Database("mixdata").Collection("unique_qyxy").InsertOne(nil, bson.M{
 						"qy_name": company_name,
@@ -285,8 +289,6 @@ func AddTaskSensitiveWordsData() {
 					if err==nil {
 						dealWithEsData(company_name, BsonTOStringId(new_tmp.InsertedID))
 					}
-
-
 				}
 			}
 		}