apple 4 жил өмнө
parent
commit
1d30230260

+ 0 - 130
data_monitoring/words_vaild/src/main.go

@@ -1,130 +0,0 @@
-package main
-
-import (
-	"log"
-	qu "qfw/util"
-	"qfw/util/elastic"
-	"strings"
-	"sync"
-	"unicode/utf8"
-)
-
-var (
-	save_mgo        	*MongodbSim
-)
-
-func init()  {
-	save_mgo = &MongodbSim{
-		MongodbAddr: "172.17.4.187:27082,172.17.145.163:27083",
-		DbName:      "mixdata",
-		Size:        20,
-		UserName: "fengweiqiang",
-		Password: "fwq@123123",
-	}
-	save_mgo.InitPool()
-
-	elastic.InitElasticSize("http://172.17.145.170:9800",20)
-
-
-}
-
-func main()  {
-
-	defer qu.Catch()
-	log.Println("处理 ... 指定企业名称 ...")
-
-
-	//生索引
-	readyDataEs()
-}
-
-
-func dealWithScoreRules(name string) (string,bool) {
-	new_name,isok :="",false
-	query:= `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_2","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":1,"sort":[],"facets":{}}`
-	//默认取最高分-分析多个分-遍历器查询
-	data := *elastic.Get("azktest","azktest",query)
-	if len(data)>0 && data != nil {
-		new_name = qu.ObjToString(data[0]["name"])
-	}
-	if new_name!="" { //分析hit比例
-		total,hit := dealWithWordsRules(name,new_name)
-		if float64(hit)/float64(total)>=0.8 {
-			isok = true
-		}
-	}
-	return new_name,isok
-}
-
-//击中数量以及比例
-func dealWithWordsRules(info_name string ,source_name string) (int,int){
-	total,hit :=0,0
-	nameArr,_ := calculateWordCount(info_name)
-	_,total = calculateWordCount(source_name)
-	for _,v1 := range nameArr {
-		if strings.Contains(source_name,v1) {
-			hit++
-		}
-	}
-	return total,hit
-}
-
-//分词结果
-func calculateWordCount(name string) ([]string,int) {
-
-	arr ,space:= make([]string,0),2
-	total := utf8.RuneCountInString(name)-(space-1)
-	if name == "" || total<=0  {
-		return arr,0
-	}
-	nameRune := []rune(name)
-	for i:=0;i<total ;i++  {
-		new_str := string(nameRune[i:space+i])
-		arr = append(arr,new_str)
-	}
-	return arr,len(arr)
-}
-
-
-
-func readyDataEs()  {
-
-	q := map[string]interface{}{}
-	sess := save_mgo.GetMgoConn()
-	defer save_mgo.DestoryMongoConn(sess)
-	//多线程升索引
-	pool_es := make(chan bool, 20)
-	wg_es := &sync.WaitGroup{}
-	//细节才需要遍历
-	it := sess.DB(save_mgo.DbName).C("unique_qyxy").Find(&q).Iter()
-	total:=0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 100000 == 0 {
-			log.Println("current index",total,tmp["_id"])
-		}
-		savetmp := make(map[string]interface{}, 0)
-		savetmp["_id"] = tmp["_id"]
-		savetmp["name"] = qu.ObjToString(tmp["company_name"])
-		savetmp["name_word"] = qu.ObjToString(tmp["company_name"])
-		pool_es <- true
-		wg_es.Add(1)
-		go func(savetmp map[string]interface{}) {
-			defer func() {
-				<-pool_es
-				wg_es.Done()
-			}()
-			elastic.Save("unique_qy","unique_qy", savetmp)
-		}(savetmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_es.Wait()
-
-	log.Println("is over",total)
-}
-
-
-
-
-
-
-

+ 0 - 328
data_monitoring/words_vaild/src/mgo.go

@@ -1,328 +0,0 @@
-package main
-
-import (
-	"context"
-	"log"
-	"time"
-
-	"go.mongodb.org/mongo-driver/bson"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-	"go.mongodb.org/mongo-driver/mongo"
-	"go.mongodb.org/mongo-driver/mongo/options"
-)
-
-type MgoSess struct {
-	Db     string
-	Coll   string
-	Query  interface{}
-	Sorts  []string
-	fields interface{}
-	limit  int64
-	skip   int64
-	M      *MongodbSim
-}
-
-type MgoIter struct {
-	Cursor *mongo.Cursor
-}
-
-func (mt *MgoIter) Next(result interface{}) bool {
-	if mt.Cursor != nil {
-		if mt.Cursor.Next(nil) {
-			err := mt.Cursor.Decode(result)
-			if err != nil {
-				log.Println("mgo cur err", err.Error())
-				mt.Cursor.Close(nil)
-				return false
-			}
-			return true
-		} else {
-			mt.Cursor.Close(nil)
-			return false
-		}
-	} else {
-		return false
-	}
-
-}
-
-func (ms *MgoSess) DB(name string) *MgoSess {
-	ms.Db = name
-	return ms
-}
-
-func (ms *MgoSess) C(name string) *MgoSess {
-	ms.Coll = name
-	return ms
-}
-
-func (ms *MgoSess) Find(q interface{}) *MgoSess {
-	ms.Query = q
-	return ms
-}
-
-func (ms *MgoSess) Select(fields interface{}) *MgoSess {
-	ms.fields = fields
-	return ms
-}
-
-func (ms *MgoSess) Limit(limit int64) *MgoSess {
-	ms.limit = limit
-	return ms
-}
-func (ms *MgoSess) Skip(skip int64) *MgoSess {
-	ms.skip = skip
-	return ms
-}
-
-func (ms *MgoSess) Sort(sorts ...string) *MgoSess {
-	ms.Sorts = sorts
-	return ms
-}
-
-func (ms *MgoSess) Iter() *MgoIter {
-	it := &MgoIter{}
-	find := options.Find()
-	if ms.skip > 0 {
-		find.SetSkip(ms.skip)
-	}
-	if ms.limit > 0 {
-		find.SetLimit(ms.limit)
-	}
-	find.SetBatchSize(100)
-	if len(ms.Sorts) > 0 {
-		sort := bson.M{}
-		for _, k := range ms.Sorts {
-			switch k[:1] {
-			case "-":
-				sort[k[1:]] = -1
-			case "+":
-				sort[k[1:]] = 1
-			default:
-				sort[k] = 1
-			}
-		}
-		find.SetSort(sort)
-	}
-	if ms.fields != nil {
-		find.SetProjection(ms.fields)
-	}
-	cur, err := ms.M.C.Database(ms.Db).Collection(ms.Coll).Find(ms.M.Ctx, ms.Query, find)
-	if err != nil {
-		log.Println("mgo find err", err.Error())
-	} else {
-		it.Cursor = cur
-	}
-	return it
-}
-
-type MongodbSim struct {
-	MongodbAddr string
-	Size        int
-	//	MinSize     int
-	DbName   string
-	C        *mongo.Client
-	Ctx      context.Context
-	ShortCtx context.Context
-	pool     chan bool
-	UserName string
-	Password string
-}
-
-func (m *MongodbSim) GetMgoConn() *MgoSess {
-	//m.Open()
-	ms := &MgoSess{}
-	ms.M = m
-	return ms
-}
-
-func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
-	//m.Close()
-	ms.M = nil
-	ms = nil
-}
-
-func (m *MongodbSim) InitPool() {
-	opts := options.Client()
-	opts.SetConnectTimeout(3 * time.Second)
-	opts.ApplyURI("mongodb://" + m.MongodbAddr)
-	opts.SetMaxPoolSize(uint64(m.Size))
-	m.pool = make(chan bool, m.Size)
-
-	if m.UserName !="" && m.Password !="" {
-		cre := options.Credential{
-			Username:m.UserName,
-			Password:m.Password,
-		}
-		opts.SetAuth(cre)
-	}
-
-
-
-	opts.SetMaxConnIdleTime(2 * time.Hour)
-	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
-	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
-	client, err := mongo.Connect(m.ShortCtx, opts)
-	if err != nil {
-		log.Println("mgo init error:", err.Error())
-	} else {
-		m.C = client
-		log.Println("init success")
-	}
-}
-
-func (m *MongodbSim) Open() {
-	m.pool <- true
-}
-func (m *MongodbSim) Close() {
-	<-m.pool
-}
-
-//批量插入
-func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	var writes []mongo.WriteModel
-	for _, d := range doc {
-		write := mongo.NewUpdateOneModel()
-		write.SetFilter(d[0])
-		write.SetUpdate(d[1])
-		write.SetUpsert(true)
-		writes = append(writes, write)
-	}
-	r, e := coll.BulkWrite(m.Ctx, writes)
-	if e != nil {
-		log.Println("mgo upsert error:", e.Error())
-		return nil, false
-	}
-	//	else {
-	//		if r.UpsertedCount != int64(len(doc)) {
-	//			log.Println("mgo upsert uncomplete:uc/dc", r.UpsertedCount, len(doc))
-	//		}
-	//		return true
-	//	}
-	return r.UpsertedIDs, true
-}
-
-//批量插入
-func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	var writes []mongo.WriteModel
-	for _, d := range doc {
-		write := mongo.NewInsertOneModel()
-		write.SetDocument(d)
-		writes = append(writes, write)
-	}
-	_, e := coll.BulkWrite(m.Ctx, writes)
-	if e != nil {
-		log.Println("mgo savebulk error:", e.Error())
-		return false
-	}
-	return true
-}
-
-//保存
-func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.InsertOne(m.Ctx, doc)
-	if err != nil {
-		return nil
-	}
-	return r.InsertedID
-}
-
-//更新by Id
-func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
-	if err != nil {
-		return false
-	}
-	return true
-}
-
-//删除by id
-func (m *MongodbSim) DeleteById(c, id string) int64 {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.DeleteOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
-	if err != nil {
-		return 0
-	}
-	return r.DeletedCount
-}
-
-//通过条件删除
-func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.DeleteMany(m.Ctx, query)
-	if err != nil {
-		return 0
-	}
-	return r.DeletedCount
-}
-
-//findbyid
-func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
-	v := map[string]interface{}{}
-	r.Decode(&v)
-	return v
-}
-
-//findone
-func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r := coll.FindOne(m.Ctx, query)
-	v := map[string]interface{}{}
-	r.Decode(&v)
-	return v
-}
-
-//find
-func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	op := options.Find()
-	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields))
-	if err != nil {
-		log.Fatal(err)
-		return nil, err
-	}
-	var results []map[string]interface{}
-	if err = r.All(m.Ctx, &results); err != nil {
-		log.Fatal(err)
-		return nil, err
-	}
-	return results, nil
-}
-
-//创建_id
-func NewObjectId() primitive.ObjectID {
-	return primitive.NewObjectID()
-}
-
-func StringTOBsonId(id string) primitive.ObjectID {
-	objectId, _ := primitive.ObjectIDFromHex(id)
-	return objectId
-}
-
-func BsonTOStringId(id interface{}) string {
-	return id.(primitive.ObjectID).Hex()
-}

+ 0 - 304
data_monitoring/words_vaild/src1/main.go

@@ -1,304 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"fmt"
-	"github.com/tealeg/xlsx"
-	"log"
-	"os"
-	qu "qfw/util"
-	"qfw/util/elastic"
-	"strings"
-	"sync"
-	"unicode/utf8"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-
-)
-var (
-	sysconfig			map[string]interface{} //配置文件
-	save_mgo        	*MongodbSim
-)
-
-func init()  {
-	save_mgo = &MongodbSim{
-		MongodbAddr: "192.168.3.207:27092",
-		DbName:      "zhengkun",
-		Size:        5,
-	}
-	save_mgo.InitPool()
-
-	elastic.InitElasticSize("http://192.168.3.11:9800",20)
-}
-
-func dealWithDataXlsx()  {
-
-	q := map[string]interface{}{}
-	sess := save_mgo.GetMgoConn()
-	defer save_mgo.DestoryMongoConn(sess)
-	it := sess.DB(save_mgo.DbName).C("zk_test_words").Find(&q).Iter()
-	total:=0
-	saveArr := make([]map[string]string,0)
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 10000 == 0 {
-			log.Println("current index",total,tmp["_id"])
-		}
-
-		if total % 30 ==0 {
-			name:=qu.ObjToString(tmp["name"])
-			dict := make(map[string]string)
-			dict["name"] = name
-			for i:=0; i<5;i++ {
-				value,total,hit :="","",""
-				key := "word_"+fmt.Sprintf("%d",i)
-				if tmp[key]!=nil {
-
-					if arr,ok := tmp[key].(primitive.A);ok {
-						dataArr :=qu.ObjArrToMapArr(arr)
-						value =qu.ObjToString(dataArr[0]["name"])
-						if i!=0 {
-							total = fmt.Sprintf("%d",dataArr[0]["all_words"])
-							hit = fmt.Sprintf("%d",dataArr[0]["hit_words"])
-						}
-					}
-
-				}
-				key1,key2:="total"+fmt.Sprintf("%d",i),"hit"+fmt.Sprintf("%d",i)
-				dict[key] = value
-				dict[key1] = total
-				dict[key2] = hit
-
-			}
-			saveArr= append(saveArr,dict)
-		}
-		tmp = make(map[string]interface{})
-	}
-
-
-	os.Remove("words.xlsx")	//写excle
-	f :=xlsx.NewFile()
-
-
-	for i:=0; i<5;i++ {
-		key := "word_"+fmt.Sprintf("%d",i)
-		sheet, _ := f.AddSheet("统计"+key)
-		row := sheet.AddRow()
-		row.AddCell().Value = "name"
-		row.AddCell().Value = key
-		if i!=0 {
-			row.AddCell().Value = "total"
-			row.AddCell().Value = "hit"
-		}
-		key1,key2:="total"+fmt.Sprintf("%d",i),"hit"+fmt.Sprintf("%d",i)
-
-		for _,tmp := range saveArr {
-			row = sheet.AddRow()
-			row.AddCell().SetString(tmp["name"])
-			row.AddCell().SetString(tmp[key])
-			row.AddCell().SetString(fmt.Sprintf("%s",tmp[key1]))
-			row.AddCell().SetString(fmt.Sprintf("%s",tmp[key2]))
-		}
-	}
-
-	err := f.Save("words.xlsx")
-	if err != nil {
-		log.Println("保存xlsx失败:", err)
-	}else {
-		log.Println("保存xlsx成功:", err)
-	}
-}
-
-func main()  {
-
-	//导出xlsx
-	dealWithDataXlsx()
-	return
-
-
-
-
-	defer qu.Catch()
-	log.Println("处理 ... 指定企业名称 ...")
-
-	//分析错误数据
-	//
-	q := map[string]interface{}{}
-	sess := save_mgo.GetMgoConn()
-	defer save_mgo.DestoryMongoConn(sess)
-	//细节才需要遍历
-	it := sess.DB(save_mgo.DbName).C("zk_company_test").Find(&q).Iter()
-	total:=0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 10000 == 0 {
-			log.Println("current index",total,tmp["_id"])
-		}
-
-		name:=qu.ObjToString(tmp["name"])
-		save_dict := make(map[string]interface{},0)
-		for i:=0; i<5;i++ {
-			key := "word_"+fmt.Sprintf("%d",i)
-			dataArr :=dealWithScoreRules(name,i)
-			if dataArr ==nil || len(dataArr)<1 {
-				//无数据
-			}else {
-				save_dict[key] = dealWithWordsRules(name,dataArr,i)
-			}
-
-		}
-
-		if len(save_dict)>0 {
-			save_dict["name"]  = name
-			save_mgo.Save("zk_test_words",save_dict)
-		}
-
-		tmp = make(map[string]interface{})
-	}
-
-}
-
-//分数维度
-func dealWithScoreRules(name string,space int) []map[string]interface{} {
-	key := ""
-	if space>0&&space<5{
-		key = fmt.Sprintf("%d",space)
-	}
-	query:= `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_`+key+`","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":3,"sort":[],"facets":{}}`
-
-	if key=="" {
-		query = `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":3,"sort":[],"facets":{}}`
-	}
-	client := elastic.GetEsConn()
-	defer elastic.DestoryEsConn(client)
-	searchResult, err := client.Search().Index("azktest").Type("azktest").Source(query).Do()
-	if err != nil {
-		log.Println("从ES查询出错", err.Error())
-		return nil
-	}
-	resNum := len(searchResult.Hits.Hits)
-	res := make([]map[string]interface{}, resNum)
-	if searchResult.Hits != nil {
-		if resNum < 5000 {
-			for i, hit := range searchResult.Hits.Hits {
-				data := make(map[string]interface{},0)
-				json.Unmarshal(*hit.Source, &data)
-				res[i] = map[string]interface{}{
-					"name":data["name"],
-					"score":*hit.Score,
-				}
-			}
-		} else {
-			log.Println("查询结果太多,查询到:", resNum, "条")
-		}
-
-	}
-	return res
-}
-
-
-
-//击中数量以及比例
-func dealWithWordsRules(name string ,source []map[string]interface{},space int) []map[string]interface{} {
-
-	nameArr,_ := calculateWordCount(name,space)
-	newArr := make([]map[string]interface{},0)
-	for _,v := range source {
-		total,hit :=0,0
-		source_name :=qu.ObjToString(v["name"])
-		_,total = calculateWordCount(source_name,space)
-		for _,v1 := range nameArr {
-			if strings.Contains(source_name,v1) {
-				hit++
-			}
-		}
-
-
-		if space==0 {
-			newArr = append(newArr, map[string]interface{}{
-				"name":source_name,
-				"score":qu.Float64All(v["score"]),
-			})
-		}else {
-			newArr = append(newArr, map[string]interface{}{
-				"name":source_name,
-				"score":qu.Float64All(v["score"]),
-				"all_words" : total,
-				"hit_words" : hit,
-			})
-		}
-	}
-	return newArr
-}
-
-//分词结果
-func calculateWordCount(name string,space int) ([]string,int) {
-	arr := make([]string,0)
-	total := utf8.RuneCountInString(name)-(space-1)
-	if name == "" || space<=0 || total<=0  {
-		return arr,0
-	}
-	nameRune := []rune(name)
-	for i:=0;i<total ;i++  {
-		new_str := string(nameRune[i:space+i])
-		arr = append(arr,new_str)
-	}
-	return arr,len(arr)
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-func readyDataEs()  {
-
-	q := map[string]interface{}{}
-	sess := save_mgo.GetMgoConn()
-	defer save_mgo.DestoryMongoConn(sess)
-	//多线程升索引
-	pool_es := make(chan bool, 10)
-	wg_es := &sync.WaitGroup{}
-	//细节才需要遍历
-	it := sess.DB(save_mgo.DbName).C("zk_company_name").Find(&q).Iter()
-	total:=0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 10000 == 0 {
-			log.Println("current index",total,tmp["_id"])
-		}
-		savetmp := make(map[string]interface{}, 0)
-		savetmp["_id"] = tmp["_id"]
-		savetmp["name"] = qu.ObjToString(tmp["company_name"])
-		savetmp["name_1"] = qu.ObjToString(tmp["company_name"])
-		savetmp["name_2"] = qu.ObjToString(tmp["company_name"])
-		savetmp["name_3"] = qu.ObjToString(tmp["company_name"])
-		savetmp["name_4"] = qu.ObjToString(tmp["company_name"])
-		pool_es <- true
-		wg_es.Add(1)
-		go func(savetmp map[string]interface{}) {
-			defer func() {
-				<-pool_es
-				wg_es.Done()
-			}()
-			elastic.Save("azktest","azktest", savetmp)
-		}(savetmp)
-		tmp = make(map[string]interface{})
-	}
-	wg_es.Wait()
-
-
-	log.Println("is over",total)
-}
-

+ 0 - 328
data_monitoring/words_vaild/src1/mgo.go

@@ -1,328 +0,0 @@
-package main
-
-import (
-	"context"
-	"log"
-	"time"
-
-	"go.mongodb.org/mongo-driver/bson"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-	"go.mongodb.org/mongo-driver/mongo"
-	"go.mongodb.org/mongo-driver/mongo/options"
-)
-
-type MgoSess struct {
-	Db     string
-	Coll   string
-	Query  interface{}
-	Sorts  []string
-	fields interface{}
-	limit  int64
-	skip   int64
-	M      *MongodbSim
-}
-
-type MgoIter struct {
-	Cursor *mongo.Cursor
-}
-
-func (mt *MgoIter) Next(result interface{}) bool {
-	if mt.Cursor != nil {
-		if mt.Cursor.Next(nil) {
-			err := mt.Cursor.Decode(result)
-			if err != nil {
-				log.Println("mgo cur err", err.Error())
-				mt.Cursor.Close(nil)
-				return false
-			}
-			return true
-		} else {
-			mt.Cursor.Close(nil)
-			return false
-		}
-	} else {
-		return false
-	}
-
-}
-
-func (ms *MgoSess) DB(name string) *MgoSess {
-	ms.Db = name
-	return ms
-}
-
-func (ms *MgoSess) C(name string) *MgoSess {
-	ms.Coll = name
-	return ms
-}
-
-func (ms *MgoSess) Find(q interface{}) *MgoSess {
-	ms.Query = q
-	return ms
-}
-
-func (ms *MgoSess) Select(fields interface{}) *MgoSess {
-	ms.fields = fields
-	return ms
-}
-
-func (ms *MgoSess) Limit(limit int64) *MgoSess {
-	ms.limit = limit
-	return ms
-}
-func (ms *MgoSess) Skip(skip int64) *MgoSess {
-	ms.skip = skip
-	return ms
-}
-
-func (ms *MgoSess) Sort(sorts ...string) *MgoSess {
-	ms.Sorts = sorts
-	return ms
-}
-
-func (ms *MgoSess) Iter() *MgoIter {
-	it := &MgoIter{}
-	find := options.Find()
-	if ms.skip > 0 {
-		find.SetSkip(ms.skip)
-	}
-	if ms.limit > 0 {
-		find.SetLimit(ms.limit)
-	}
-	find.SetBatchSize(100)
-	if len(ms.Sorts) > 0 {
-		sort := bson.M{}
-		for _, k := range ms.Sorts {
-			switch k[:1] {
-			case "-":
-				sort[k[1:]] = -1
-			case "+":
-				sort[k[1:]] = 1
-			default:
-				sort[k] = 1
-			}
-		}
-		find.SetSort(sort)
-	}
-	if ms.fields != nil {
-		find.SetProjection(ms.fields)
-	}
-	cur, err := ms.M.C.Database(ms.Db).Collection(ms.Coll).Find(ms.M.Ctx, ms.Query, find)
-	if err != nil {
-		log.Println("mgo find err", err.Error())
-	} else {
-		it.Cursor = cur
-	}
-	return it
-}
-
-type MongodbSim struct {
-	MongodbAddr string
-	Size        int
-	//	MinSize     int
-	DbName   string
-	C        *mongo.Client
-	Ctx      context.Context
-	ShortCtx context.Context
-	pool     chan bool
-	UserName string
-	Password string
-}
-
-func (m *MongodbSim) GetMgoConn() *MgoSess {
-	//m.Open()
-	ms := &MgoSess{}
-	ms.M = m
-	return ms
-}
-
-func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
-	//m.Close()
-	ms.M = nil
-	ms = nil
-}
-
-func (m *MongodbSim) InitPool() {
-	opts := options.Client()
-	opts.SetConnectTimeout(3 * time.Second)
-	opts.ApplyURI("mongodb://" + m.MongodbAddr)
-	opts.SetMaxPoolSize(uint64(m.Size))
-	m.pool = make(chan bool, m.Size)
-
-	if m.UserName !="" && m.Password !="" {
-		cre := options.Credential{
-			Username:m.UserName,
-			Password:m.Password,
-		}
-		opts.SetAuth(cre)
-	}
-
-
-
-	opts.SetMaxConnIdleTime(2 * time.Hour)
-	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
-	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
-	client, err := mongo.Connect(m.ShortCtx, opts)
-	if err != nil {
-		log.Println("mgo init error:", err.Error())
-	} else {
-		m.C = client
-		log.Println("init success")
-	}
-}
-
-func (m *MongodbSim) Open() {
-	m.pool <- true
-}
-func (m *MongodbSim) Close() {
-	<-m.pool
-}
-
-//批量插入
-func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	var writes []mongo.WriteModel
-	for _, d := range doc {
-		write := mongo.NewUpdateOneModel()
-		write.SetFilter(d[0])
-		write.SetUpdate(d[1])
-		write.SetUpsert(true)
-		writes = append(writes, write)
-	}
-	r, e := coll.BulkWrite(m.Ctx, writes)
-	if e != nil {
-		log.Println("mgo upsert error:", e.Error())
-		return nil, false
-	}
-	//	else {
-	//		if r.UpsertedCount != int64(len(doc)) {
-	//			log.Println("mgo upsert uncomplete:uc/dc", r.UpsertedCount, len(doc))
-	//		}
-	//		return true
-	//	}
-	return r.UpsertedIDs, true
-}
-
-//批量插入
-func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	var writes []mongo.WriteModel
-	for _, d := range doc {
-		write := mongo.NewInsertOneModel()
-		write.SetDocument(d)
-		writes = append(writes, write)
-	}
-	_, e := coll.BulkWrite(m.Ctx, writes)
-	if e != nil {
-		log.Println("mgo savebulk error:", e.Error())
-		return false
-	}
-	return true
-}
-
-//保存
-func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.InsertOne(m.Ctx, doc)
-	if err != nil {
-		return nil
-	}
-	return r.InsertedID
-}
-
-//更新by Id
-func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
-	if err != nil {
-		return false
-	}
-	return true
-}
-
-//删除by id
-func (m *MongodbSim) DeleteById(c, id string) int64 {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.DeleteOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
-	if err != nil {
-		return 0
-	}
-	return r.DeletedCount
-}
-
-//通过条件删除
-func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.DeleteMany(m.Ctx, query)
-	if err != nil {
-		return 0
-	}
-	return r.DeletedCount
-}
-
-//findbyid
-func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
-	v := map[string]interface{}{}
-	r.Decode(&v)
-	return v
-}
-
-//findone
-func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r := coll.FindOne(m.Ctx, query)
-	v := map[string]interface{}{}
-	r.Decode(&v)
-	return v
-}
-
-//find
-func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	op := options.Find()
-	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields))
-	if err != nil {
-		log.Fatal(err)
-		return nil, err
-	}
-	var results []map[string]interface{}
-	if err = r.All(m.Ctx, &results); err != nil {
-		log.Fatal(err)
-		return nil, err
-	}
-	return results, nil
-}
-
-//创建_id
-func NewObjectId() primitive.ObjectID {
-	return primitive.NewObjectID()
-}
-
-func StringTOBsonId(id string) primitive.ObjectID {
-	objectId, _ := primitive.ObjectIDFromHex(id)
-	return objectId
-}
-
-func BsonTOStringId(id interface{}) string {
-	return id.(primitive.ObjectID).Hex()
-}

+ 64 - 37
udpdataclear/udpSensitiveWords/grpc_server/data.go

@@ -1,49 +1,18 @@
 package main
 
 import (
+	"encoding/json"
 	"github.com/importcjj/sensitive"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	"go.mongodb.org/mongo-driver/mongo/options"
 	"gopkg.in/mgo.v2/bson"
 	"log"
-	"regexp"
 	"runtime"
 	"strings"
+	"sync"
 	"time"
-	"encoding/json"
 )
 
-var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
-	"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
-	"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
-	"地方金融监督管理局|住房保障和房屋管理局|质量技术监督局|人力资源与社会保障局|公路管理局|国土资源局|" +
-	"卫生和计划生育局|民事政务局|公众安全局|交通管理局|人力资源和社会保障局|劳动和社会保障局|" +
-	"住房和城乡建设局|就业服务局|文物管理局|环境保护局|粮食和物资储备局|教育体育局|" +
-	"体育局|教育局|招商局|农业局|农机局|水务局|林业局|财政局|审计局|统计局|商务局)$")
-var reglen *regexp.Regexp = regexp.MustCompile("^(.{1,5}|.{40,})$")
-var strReg *regexp.Regexp = regexp.MustCompile("^(.{0,3}工程队|.{0,3}总公司|_+|.{0,2}设备安装公司|.{0,2}装[饰修潢]公司|.{0,2}开发公司|.{0,4}有限公司|.{0,4}有限责任公司|.{0,4}设计院|建筑设计研?究?院|省文物考古研究所|经济开发区|省.*|镇人民政府|.{0,2}服务公司|" +
-	".{0,2}工程质量监督站|.{0,3}经[营销]部|.{0,3}事务所|.{0,4}工程公司|.{0,4}责任公司|.*勘测|.{0,4}研究院|.*能源建|.{0,2}安装工程|.*[市省]{1}|.{0,4}中心|.*区.?|" +
-	".{0,3}税务局|.{0,3}财政局|.{0,3}商行|.{0,2}公安处|.{0,2}测绘院|.{0,3}开发|.{0,2}建设局|.{0,2}经销部|.{0,3}委员会|.{0,2}分公司|.{0,2}管理站|.{0,2}事务管理局|" +
-	".*资料|.{0,2}办公用品.{1,2}|.*唯亭|.*设备|.+安装|.{0,2}技术服务|市.+[台院社局司]|城?区.+[府局室院]|县.+[院台局]|.{0,2}发展公司|经济技术开发|" +
-	"发展和改革局|贵州有色地质|铝塑门窗加工|生产力促进中心|特殊普通合伙|工业集团公司|人民调解协会|人民政府办公厅|机电设备公司|房地产开发有限公司|.{0,4}商店|中等专业学校|" +
-	"农村信用联社|.{0,4}经营部|.{0,4}销售部|驾驶员培训学校|.{2}县.{2}镇|保安服务总公司|住房和城乡建设局|地产评估事务所|生产资料门市部|×+|.{0,3}[0-9]{15}|.*[0-9]+|.*路|.*无字号名称.*|.*车|.*[,,]{1}.*|.*个体工商户|.*运输户)$")
-
-//非中文开头...
-var unstart_strReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
-//开头
-var start_strReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|省|市|县|区|业绩|资格|中标|项目|预算单位)")
-//结尾
-var end_strReg *regexp.Regexp =  regexp.MustCompile("(\\.|\\.\\.|餐馆|店|腻子|肉庄|画社|美发屋|发廊|网吧|网咖|零售点|新街|包子铺|奶茶铺|(株)|先生|女士|小姐|" +
-	"资格|业绩|中标|项目|预算单位|摊位号|号|厅|室|部|点|馆|场|厂|床|所|处|站|行|中心|合作社|ATMS|" +
-	"吧|楼|摊|摊位|廊|茶社|坊|圃|汤锅|园|民宿|美容院|房|排挡|府|庄|栈|队|批发|苑|养殖户|棋牌|农家乐|货运|" +
-	"城|社|基地|会|服务|娱乐|种植|百货|汽修|农家菜|亭|小吃|快餐|粮库|卫生院|书画院|面|门窗|鸡排|屋|橱|堂|肉铺|服务|服饰|/*)$")
-//包含
-var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|" +
-	"法人|&nbsp|国家拨入|借款|积累资金|单位自有|认股人|--|、|&|`|美元|[\u4e00-\u9fa5]{2,6}·[\u4e00-\u9fa5]{2,6})|" +
-	"[a-zA-Z]{5,}")
-
-var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
-
 
 //部署-历史-敏感词库
 func initSensitiveWordsData()  {
@@ -81,7 +50,7 @@ func initSensitiveWordsData()  {
 
 
 
-//定时增量数据处理
+//定时增量数据处理---冯
 func addTaskSensitiveWordsData()  {
 	mmmgo, err := InitMgoEn("mongodb://172.17.4.187:27082,172.17.145.163:27083", 20, "fengweiqiang", "fwq@123123")
 	if err != nil {
@@ -108,7 +77,10 @@ func addTaskSensitiveWordsData()  {
 			err := findByupdate.Decode(&tmp)
 			if err == nil {
 				if company_name, ok := tmp["company_name"].(string); ok {
-					if reglen.MatchString(company_name) || strReg.MatchString(company_name) {
+					if reglen.MatchString(company_name) || strReg.MatchString(company_name) ||
+						!uncon_strReg.MatchString(company_name)|| !unstart_strReg.MatchString(company_name)||
+						start_strReg.MatchString(company_name)|| end_strReg.MatchString(company_name)||
+						con_strReg.MatchString(company_name) {
 						continue
 					}
 					if strings.Contains(ObjToString(tmp["company_type"]),"个人")||
@@ -117,6 +89,7 @@ func addTaskSensitiveWordsData()  {
 						strings.Contains(ObjToString(tmp["company_type_old"]),"个体") {
 						continue
 					}
+
 					//存mgo
 					con.Database("mixdata").Collection("unique_qyxy").InsertOne(nil, bson.M{
 						"qy_name": company_name,
@@ -131,6 +104,17 @@ func addTaskSensitiveWordsData()  {
 		log.Println("tick ok", cronData)
 	}
 }
+
+
+
+
+
+
+
+
+
+
+
 //处理是否新增es
 func dealWithEsData(name string,tmpid string)  {
 	query:= `{"query":{"bool":{"must":[{"term":{"`+es_index+`.name":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}`
@@ -165,10 +149,8 @@ func dealWithEsData(name string,tmpid string)  {
 
 
 
-
 //处理内存分段
 func dealWithDataMemory()  {
-
 	iter := MixDataMgo.GetMgoConn().C("unique_qyxy").Find(map[string]interface{}{
 		"_id": map[string]interface{}{
 			"$gte": BsonTOStringId("1fffffffffffffffffffffff"),
@@ -214,3 +196,48 @@ func dealWithDataMemory()  {
 
 	log.Println("memory is ok", initnum)
 }
+
+
+
+
+
+func temporaryTest()  {
+	log.Println("测试......导出数据")
+	q := map[string]interface{}{}
+	sess := data_mgo.GetMgoConn()
+	defer data_mgo.DestoryMongoConn(sess)
+	//多线程升索引
+	pool_es := make(chan bool, 20)
+	wg_es := &sync.WaitGroup{}
+	it := sess.DB(data_mgo.DbName).C("zk_company_test").Find(&q).Iter()
+	total,isOK:=0,0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total % 1000 == 0 {
+			log.Println("current index",total,isOK)
+		}
+		name:=ObjToString(tmp["name"])
+		pool_es <- true
+		wg_es.Add(1)
+
+		go func(name string) {
+			defer func() {
+				<-pool_es
+				wg_es.Done()
+			}()
+
+			new_name,b :=dealWithNameScoreRules(name)
+			if new_name!="" && b {
+				isOK++
+				data_mgo.Save("zk_words_test_test", map[string]interface{}{
+					"name":name,
+					"new_name":new_name,
+				})
+			}
+		}(name)
+		tmp = make(map[string]interface{})
+	}
+
+	wg_es.Wait()
+
+	log.Println("is over",total)
+}

+ 47 - 4
udpdataclear/udpSensitiveWords/grpc_server/main.go

@@ -6,22 +6,54 @@ import (
 	"github.com/importcjj/sensitive"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	"google.golang.org/grpc"
+	"gopkg.in/olivere/elastic.v1"
 	"gopkg.in/yaml.v2"
 	"io/ioutil"
 	"log"
 	"math/big"
 	"net"
 	"net/http"
+	"regexp"
 	"sensitiveWords.udp/proto_grpc"
 	"sensitiveWords.udp/util"
 	"strconv"
 	"strings"
-	"gopkg.in/olivere/elastic.v1"
 )
 
 const (
 	YAMLFILE = "./server.yaml"
 )
+var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
+	"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
+	"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
+	"地方金融监督管理局|住房保障和房屋管理局|质量技术监督局|人力资源与社会保障局|公路管理局|国土资源局|" +
+	"卫生和计划生育局|民事政务局|公众安全局|交通管理局|人力资源和社会保障局|劳动和社会保障局|" +
+	"住房和城乡建设局|就业服务局|文物管理局|环境保护局|粮食和物资储备局|教育体育局|" +
+	"体育局|教育局|招商局|农业局|农机局|水务局|林业局|财政局|审计局|统计局|商务局)$")
+var reglen *regexp.Regexp = regexp.MustCompile("^(.{1,5}|.{40,})$")
+var strReg *regexp.Regexp = regexp.MustCompile("^(.{0,3}工程队|.{0,3}总公司|_+|.{0,2}设备安装公司|.{0,2}装[饰修潢]公司|.{0,2}开发公司|.{0,4}有限公司|.{0,4}有限责任公司|.{0,4}设计院|建筑设计研?究?院|省文物考古研究所|经济开发区|省.*|镇人民政府|.{0,2}服务公司|" +
+	".{0,2}工程质量监督站|.{0,3}经[营销]部|.{0,3}事务所|.{0,4}工程公司|.{0,4}责任公司|.*勘测|.{0,4}研究院|.*能源建|.{0,2}安装工程|.*[市省]{1}|.{0,4}中心|.*区.?|" +
+	".{0,3}税务局|.{0,3}财政局|.{0,3}商行|.{0,2}公安处|.{0,2}测绘院|.{0,3}开发|.{0,2}建设局|.{0,2}经销部|.{0,3}委员会|.{0,2}分公司|.{0,2}管理站|.{0,2}事务管理局|" +
+	".*资料|.{0,2}办公用品.{1,2}|.*唯亭|.*设备|.+安装|.{0,2}技术服务|市.+[台院社局司]|城?区.+[府局室院]|县.+[院台局]|.{0,2}发展公司|经济技术开发|" +
+	"发展和改革局|贵州有色地质|铝塑门窗加工|生产力促进中心|特殊普通合伙|工业集团公司|人民调解协会|人民政府办公厅|机电设备公司|房地产开发有限公司|.{0,4}商店|中等专业学校|" +
+	"农村信用联社|.{0,4}经营部|.{0,4}销售部|驾驶员培训学校|.{2}县.{2}镇|保安服务总公司|住房和城乡建设局|地产评估事务所|生产资料门市部|×+|.{0,3}[0-9]{15}|.*[0-9]+|.*路|.*无字号名称.*|.*车|.*[,,]{1}.*|.*个体工商户|.*运输户)$")
+
+//非中文开头...
+var unstart_strReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
+//开头
+var start_strReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|省|市|县|区|业绩|资格|中标|项目|预算单位)")
+//结尾
+var end_strReg *regexp.Regexp =  regexp.MustCompile("(\\.|\\.\\.|餐馆|店|腻子|肉庄|画社|美发屋|发廊|网吧|网咖|零售点|新街|包子铺|奶茶铺|(株)|先生|女士|小姐|" +
+	"资格|业绩|中标|项目|预算单位|摊位号|号|厅|室|部|点|馆|场|厂|床|所|处|站|行|中心|合作社|ATMS|" +
+	"吧|楼|摊|摊位|廊|茶社|坊|圃|汤锅|园|民宿|美容院|房|排挡|府|庄|栈|队|批发|苑|养殖户|棋牌|农家乐|货运|" +
+	"城|社|基地|会|服务|娱乐|种植|百货|汽修|农家菜|亭|小吃|快餐|粮库|卫生院|书画院|面|门窗|鸡排|屋|橱|堂|肉铺|服务|服饰|/*)$")
+//包含
+var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|" +
+	"法人|&nbsp|国家拨入|借款|积累资金|单位自有|认股人|--|、|&|`|美元|[\u4e00-\u9fa5]{2,6}·[\u4e00-\u9fa5]{2,6})|" +
+	"[a-zA-Z]{5,}")
+
+var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
+
 
 var YamlConfig YAMLConfig
 var MixDataMgo *util.MongodbSim
@@ -29,6 +61,8 @@ var Filter *sensitive.Filter
 var es_type, es_index	string
 var Client_Es  *elastic.Client
 
+var data_mgo  *MongodbSim
+
 func init() {
 	yamlFile, err := ioutil.ReadFile(YAMLFILE)
 	if err != nil {
@@ -48,6 +82,15 @@ func init() {
 	}
 	MixDataMgo.InitPool()
 
+	data_mgo = &MongodbSim{
+		MongodbAddr: "192.168.3.207:27092",
+		DbName:      "zhengkun",
+		Size:        10,
+		UserName:    "",
+		Password:    "",
+	}
+	data_mgo.InitPool()
+
 	Client_Es ,_= elastic.NewClient(http.DefaultClient, "http://192.168.3.11:9800")
 	es_type, es_index = "azktest","azktest"
 
@@ -55,10 +98,10 @@ func init() {
 }
 
 
-
 func main() {
-	
-	//淡赌跑断
+	//测试
+	temporaryTest()
+	return
 
 	if YamlConfig.IsAddTask==0{
 		initSensitiveWordsData() //初始化敏感词数据

+ 322 - 0
udpdataclear/udpSensitiveWords/grpc_server/mgo.go

@@ -2,8 +2,12 @@ package main
 
 import (
 	"context"
+	"go.mongodb.org/mongo-driver/bson/primitive"
 	"go.mongodb.org/mongo-driver/mongo"
 	"go.mongodb.org/mongo-driver/mongo/options"
+	"gopkg.in/mgo.v2/bson"
+	"log"
+	"time"
 )
 
 type Mgo struct {
@@ -12,6 +16,324 @@ type Mgo struct {
 	mgoEn              *mongo.Client
 }
 
+type MgoSess struct {
+	Db     string
+	Coll   string
+	Query  interface{}
+	Sorts  []string
+	fields interface{}
+	limit  int64
+	skip   int64
+	M      *MongodbSim
+}
+
+type MgoIter struct {
+	Cursor *mongo.Cursor
+}
+
+func (mt *MgoIter) Next(result interface{}) bool {
+	if mt.Cursor != nil {
+		if mt.Cursor.Next(nil) {
+			err := mt.Cursor.Decode(result)
+			if err != nil {
+				log.Println("mgo cur err", err.Error())
+				mt.Cursor.Close(nil)
+				return false
+			}
+			return true
+		} else {
+			mt.Cursor.Close(nil)
+			return false
+		}
+	} else {
+		return false
+	}
+
+}
+
+func (ms *MgoSess) DB(name string) *MgoSess {
+	ms.Db = name
+	return ms
+}
+
+func (ms *MgoSess) C(name string) *MgoSess {
+	ms.Coll = name
+	return ms
+}
+
+func (ms *MgoSess) Find(q interface{}) *MgoSess {
+	ms.Query = q
+	return ms
+}
+
+func (ms *MgoSess) Select(fields interface{}) *MgoSess {
+	ms.fields = fields
+	return ms
+}
+
+func (ms *MgoSess) Limit(limit int64) *MgoSess {
+	ms.limit = limit
+	return ms
+}
+func (ms *MgoSess) Skip(skip int64) *MgoSess {
+	ms.skip = skip
+	return ms
+}
+
+func (ms *MgoSess) Sort(sorts ...string) *MgoSess {
+	ms.Sorts = sorts
+	return ms
+}
+
+func (ms *MgoSess) Iter() *MgoIter {
+	it := &MgoIter{}
+	find := options.Find()
+	if ms.skip > 0 {
+		find.SetSkip(ms.skip)
+	}
+	if ms.limit > 0 {
+		find.SetLimit(ms.limit)
+	}
+	find.SetBatchSize(100)
+	if len(ms.Sorts) > 0 {
+		sort := bson.M{}
+		for _, k := range ms.Sorts {
+			switch k[:1] {
+			case "-":
+				sort[k[1:]] = -1
+			case "+":
+				sort[k[1:]] = 1
+			default:
+				sort[k] = 1
+			}
+		}
+		find.SetSort(sort)
+	}
+	if ms.fields != nil {
+		find.SetProjection(ms.fields)
+	}
+	cur, err := ms.M.C.Database(ms.Db).Collection(ms.Coll).Find(ms.M.Ctx, ms.Query, find)
+	if err != nil {
+		log.Println("mgo find err", err.Error())
+	} else {
+		it.Cursor = cur
+	}
+	return it
+}
+
+type MongodbSim struct {
+	MongodbAddr string
+	Size        int
+	//	MinSize     int
+	DbName   string
+	C        *mongo.Client
+	Ctx      context.Context
+	ShortCtx context.Context
+	pool     chan bool
+	UserName string
+	Password string
+}
+
+func (m *MongodbSim) GetMgoConn() *MgoSess {
+	//m.Open()
+	ms := &MgoSess{}
+	ms.M = m
+	return ms
+}
+
+func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
+	//m.Close()
+	ms.M = nil
+	ms = nil
+}
+
+func (m *MongodbSim) InitPool() {
+	opts := options.Client()
+	opts.SetConnectTimeout(3 * time.Second)
+	opts.ApplyURI("mongodb://" + m.MongodbAddr)
+	opts.SetMaxPoolSize(uint64(m.Size))
+	m.pool = make(chan bool, m.Size)
+
+	if m.UserName !="" && m.Password !="" {
+		cre := options.Credential{
+			Username:m.UserName,
+			Password:m.Password,
+		}
+		opts.SetAuth(cre)
+	}
+
+
+
+	opts.SetMaxConnIdleTime(2 * time.Hour)
+	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
+	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
+	client, err := mongo.Connect(m.ShortCtx, opts)
+	if err != nil {
+		log.Println("mgo init error:", err.Error())
+	} else {
+		m.C = client
+		log.Println("init success")
+	}
+}
+
+func (m *MongodbSim) Open() {
+	m.pool <- true
+}
+func (m *MongodbSim) Close() {
+	<-m.pool
+}
+
+//批量插入
+func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	var writes []mongo.WriteModel
+	for _, d := range doc {
+		write := mongo.NewUpdateOneModel()
+		write.SetFilter(d[0])
+		write.SetUpdate(d[1])
+		write.SetUpsert(true)
+		writes = append(writes, write)
+	}
+	r, e := coll.BulkWrite(m.Ctx, writes)
+	if e != nil {
+		log.Println("mgo upsert error:", e.Error())
+		return nil, false
+	}
+	//	else {
+	//		if r.UpsertedCount != int64(len(doc)) {
+	//			log.Println("mgo upsert uncomplete:uc/dc", r.UpsertedCount, len(doc))
+	//		}
+	//		return true
+	//	}
+	return r.UpsertedIDs, true
+}
+
+//批量插入
+func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	var writes []mongo.WriteModel
+	for _, d := range doc {
+		write := mongo.NewInsertOneModel()
+		write.SetDocument(d)
+		writes = append(writes, write)
+	}
+	_, e := coll.BulkWrite(m.Ctx, writes)
+	if e != nil {
+		log.Println("mgo savebulk error:", e.Error())
+		return false
+	}
+	return true
+}
+
+//保存
+func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.InsertOne(m.Ctx, doc)
+	if err != nil {
+		return nil
+	}
+	return r.InsertedID
+}
+
+//更新by Id
+func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
+	if err != nil {
+		return false
+	}
+	return true
+}
+
+//删除by id
+func (m *MongodbSim) DeleteById(c, id string) int64 {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.DeleteOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	if err != nil {
+		return 0
+	}
+	return r.DeletedCount
+}
+
+//通过条件删除
+func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.DeleteMany(m.Ctx, query)
+	if err != nil {
+		return 0
+	}
+	return r.DeletedCount
+}
+
+//findbyid
+func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	v := map[string]interface{}{}
+	r.Decode(&v)
+	return v
+}
+
+//findone
+func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, query)
+	v := map[string]interface{}{}
+	r.Decode(&v)
+	return v
+}
+
+//find
+func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	op := options.Find()
+	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields))
+	if err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	var results []map[string]interface{}
+	if err = r.All(m.Ctx, &results); err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	return results, nil
+}
+
+//创建_id
+func NewObjectId() primitive.ObjectID {
+	return primitive.NewObjectID()
+}
+
+
+
+
+
+
+
+
+
+
+
+
 func InitMgoEn(uri string, poolSize uint64,username_password ... string) (*Mgo, error) {
 	//fengweiqiang fwq@123123
 	m := Mgo{}

+ 104 - 0
udpdataclear/udpSensitiveWords/grpc_server/words.go

@@ -0,0 +1,104 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"os"
+	"strings"
+	"unicode/utf8"
+)
+
+func dealWithNameScoreRules(name string) (string,bool) {
+	new_name,new_score,isok :="",float64(0),false
+	old_name := escape(name)
+	query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_2","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
+	tmp := make(map[string]interface{})
+	json.Unmarshal([]byte(query),&tmp)
+	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
+	if err != nil {
+		log.Println("从ES查询出错", err.Error())
+	}
+	resNum := len(searchResult.Hits.Hits)
+	res := make([]map[string]interface{}, resNum)
+	if searchResult.Hits != nil {
+		if resNum < 5000 {
+			for i, hit := range searchResult.Hits.Hits {
+				data := make(map[string]interface{},0)
+				json.Unmarshal(*hit.Source, &data)
+				res[i] = map[string]interface{}{
+					"name":data["name"],
+					"score":*hit.Score,
+				}
+			}
+		} else {
+			log.Println("查询结果太多,查询到:", resNum, "条")
+		}
+	}
+	if len(res)>0 && res != nil {
+		new_name = ObjToString(res[0]["name"])
+		new_score = Float64All(res[0]["score"])
+	}
+	if new_name!="" { //分析hit比例
+		total,hit := dealWithWordsRules(name,new_name)
+		proportion := float64(hit)/float64(total)
+		if proportion >=1.0 {
+			isok = true
+		}else {
+			if float64(hit)/float64(total)>=0.8 && new_score> 4.0{
+				isok = true
+			}
+		}
+	}
+	return new_name,isok
+}
+//击中数量以及比例
+func dealWithWordsRules(info_name string ,source_name string) (int,int){
+	total,hit :=0,0
+
+	//字符串处理,替换指定字符
+	info_name = strings.ReplaceAll(info_name,"(","")
+	info_name = strings.ReplaceAll(info_name,")","")
+	info_name = strings.ReplaceAll(info_name,"(","")
+	info_name = strings.ReplaceAll(info_name,")","")
+	source_name = strings.ReplaceAll(source_name,"(","")
+	source_name = strings.ReplaceAll(source_name,")","")
+	source_name = strings.ReplaceAll(source_name,"(","")
+	source_name = strings.ReplaceAll(source_name,")","")
+
+	nameArr,_ := calculateWordCount(info_name)
+	_,total = calculateWordCount(source_name)
+	for _,v1 := range nameArr {
+		if strings.Contains(source_name,v1) {
+			hit++
+		}
+	}
+	return total,hit
+}
+
+//分词结果
+func calculateWordCount(name string) ([]string,int) {
+	arr ,space:= make([]string,0),2
+	total := utf8.RuneCountInString(name)-(space-1)
+	if name == "" || total<=0  {
+		return arr,0
+	}
+	nameRune := []rune(name)
+	for i:=0;i<total ;i++  {
+		new_str := string(nameRune[i:space+i])
+		arr = append(arr,new_str)
+	}
+	return arr,len(arr)
+}
+
+func escape(s string) string {
+	news := ""
+	for _, c := range s {
+		if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
+			a := string([]rune{os.PathSeparator, '\\'})
+			news = news + a + `\` + string(c)
+		} else {
+			news = news + string(c)
+		}
+	}
+	return news
+}

+ 6 - 0
udpdataclear/udpSensitiveWords/util/word.go

@@ -4,11 +4,17 @@ import (
 	"encoding/json"
 	"log"
 	"strings"
+	"unicode"
 	"unicode/utf8"
 )
 
 func dealWithScoreRules(name string) (string,bool) {
 	new_name,isok :="",false
+	for _, c := range name {
+		if !(unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c)) {
+			return "",false
+		}
+	}
 	query:= `{"query":{"bool":{"must":[{"query_string":{"default_field":"`+es_index+`.name_word","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":1,"sort":[],"facets":{}}`
 	//默认取最高分-分析多个分-遍历器查询
 	tmp := make(map[string]interface{})