fengweiqiang há 4 anos atrás
pai
commit
ecf541c4ed

+ 0 - 1
udpdataclear/udpSensitiveWords/base.bat

@@ -1 +0,0 @@
-protoc -I=protos --go_out=proto_grpc --go_opt=paths=source_relative --go-grpc_out=proto_grpc --experimental_allow_proto3_optional --go-grpc_opt=paths=source_relative protos/*.proto

+ 2 - 7
udpdataclear/udpSensitiveWords/config.json

@@ -12,10 +12,6 @@
   "agency_c": "",
   "winner_c": "",
   "udpport": "1484",
-  "query_addrs": [
-    "127.0.0.1:8889",
-    "127.0.0.1:8888"
-  ],
   "nextNode": [
     {
       "addr": "127.0.0.1",
@@ -25,7 +21,6 @@
   ],
   "userName": "",
   "passWord": "",
-  "es_type": "azktest",
-  "es_index": "azktest",
-  "certFile": "./ca.crt"
+  "es_type": "unique_qy",
+  "es_index": "unique_qy"
 }

+ 0 - 298
udpdataclear/udpSensitiveWords/grpc_server/data.go

@@ -1,298 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"github.com/importcjj/sensitive"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-	"go.mongodb.org/mongo-driver/mongo/options"
-	"gopkg.in/mgo.v2/bson"
-	"log"
-	"regexp"
-	"runtime"
-	"sensitiveWords.udp/util"
-	"strings"
-	"time"
-)
-
-var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
-	"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
-	"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
-	"地方金融监督管理局|住房保障和房屋管理局|质量技术监督局|人力资源与社会保障局|公路管理局|国土资源局|" +
-	"卫生和计划生育局|民事政务局|公众安全局|交通管理局|人力资源和社会保障局|劳动和社会保障局|" +
-	"住房和城乡建设局|就业服务局|文物管理局|环境保护局|粮食和物资储备局|教育体育局|" +
-	"体育局|教育局|招商局|农业局|农机局|水务局|林业局|财政局|审计局|统计局|商务局)$")
-var reglen *regexp.Regexp = regexp.MustCompile("^(.{1,5}|.{40,})$")
-var strReg *regexp.Regexp = regexp.MustCompile("^(.{0,3}工程队|.{0,3}总公司|_+|.{0,2}设备安装公司|.{0,2}装[饰修潢]公司|.{0,2}开发公司|.{0,4}有限公司|.{0,4}有限责任公司|.{0,4}设计院|建筑设计研?究?院|省文物考古研究所|经济开发区|省.*|镇人民政府|.{0,2}服务公司|" +
-	".{0,2}工程质量监督站|.{0,3}经[营销]部|.{0,3}事务所|.{0,4}工程公司|.{0,4}责任公司|.*勘测|.{0,4}研究院|.*能源建|.{0,2}安装工程|.*[市省]{1}|.{0,4}中心|.*区.?|" +
-	".{0,3}税务局|.{0,3}财政局|.{0,3}商行|.{0,2}公安处|.{0,2}测绘院|.{0,3}开发|.{0,2}建设局|.{0,2}经销部|.{0,3}委员会|.{0,2}分公司|.{0,2}管理站|.{0,2}事务管理局|" +
-	".*资料|.{0,2}办公用品.{1,2}|.*唯亭|.*设备|.+安装|.{0,2}技术服务|市.+[台院社局司]|城?区.+[府局室院]|县.+[院台局]|.{0,2}发展公司|经济技术开发|" +
-	"发展和改革局|贵州有色地质|铝塑门窗加工|生产力促进中心|特殊普通合伙|工业集团公司|人民调解协会|人民政府办公厅|机电设备公司|房地产开发有限公司|.{0,4}商店|中等专业学校|" +
-	"农村信用联社|.{0,4}经营部|.{0,4}销售部|驾驶员培训学校|.{2}县.{2}镇|保安服务总公司|住房和城乡建设局|地产评估事务所|生产资料门市部|×+|.{0,3}[0-9]{15}|.*[0-9]+|.*路|.*无字号名称.*|.*车|.*[,,]{1}.*|.*个体工商户|.*运输户)$")
-
-//非中文开头...
-var unstart_strReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
-//开头
-var start_strReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|省|市|县|区|业绩|资格|中标|项目|预算单位)")
-//结尾
-var end_strReg *regexp.Regexp =  regexp.MustCompile("(\\.|\\.\\.|餐馆|店|腻子|肉庄|画社|美发屋|发廊|网吧|网咖|零售点|新街|包子铺|奶茶铺|(株)|先生|女士|小姐|" +
-	"资格|业绩|中标|项目|预算单位|摊位号|号|厅|室|部|点|馆|场|厂|床|所|处|站|行|中心|合作社|ATMS|" +
-	"吧|楼|摊|摊位|廊|茶社|坊|圃|汤锅|园|民宿|美容院|房|排挡|府|庄|栈|队|批发|苑|养殖户|棋牌|农家乐|货运|" +
-	"城|社|基地|会|服务|娱乐|种植|百货|汽修|农家菜|亭|小吃|快餐|粮库|卫生院|书画院|面|门窗|鸡排|屋|橱|堂|肉铺|服务|服饰|/*)$")
-//包含
-var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|" +
-	"法人|&nbsp|国家拨入|借款|积累资金|单位自有|认股人|--|、|&|`|美元|[\u4e00-\u9fa5]{2,6}·[\u4e00-\u9fa5]{2,6})|" +
-	"[a-zA-Z]{5,}")
-
-var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
-
-
-
-//部署-历史-敏感词库
-func initSensitiveWordsData()  {
-	log.Println("初始化敏感词-源数据...")
-	gteid, err := primitive.ObjectIDFromHex(YamlConfig.TaskGteId)
-	if err != nil {
-		log.Fatalln(err)
-	}
-	lteid, err := primitive.ObjectIDFromHex(YamlConfig.TaskLteId)
-	if err != nil {
-		log.Fatalln(err)
-	}
-	log.Println("id段落:",util.BsonTOStringId(gteid),util.BsonTOStringId(lteid))
-	sess := MixDataMgo.GetMgoConn()
-	defer MixDataMgo.DestoryMongoConn(sess)
-	iter := sess.DB(MixDataMgo.DbName).C("unique_qyxy").Find(map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gte": gteid,
-			"$lte": lteid,
-		},
-	}).Iter()
-	Filter = sensitive.New()
-	var initnum uint
-	for tmp := map[string]interface{}{}; iter.Next(&tmp); tmp = map[string]interface{}{} {
-		initnum++
-		if initnum%10000==0 {
-			log.Println("current index ", initnum,tmp["qy_name"])
-		}
-		Filter.AddWord(tmp["qy_name"].(string))
-	}
-	log.Println("init ok", initnum)
-}
-
-
-
-
-
-
-
-//定时增量数据处理---冯
-func addTaskSensitiveWordsData()  {
-	defer func() {
-		if err := recover(); err != nil {
-			log.Println("func() addTaskSensitiveWordsData",err)
-		}
-	}()
-
-	mmmgo, err := util.InitMgoEn("mongodb://172.17.4.187:27082,172.17.145.163:27083", 20, "fengweiqiang", "fwq@123123")
-	if err != nil {
-		log.Fatalln(err)
-	}
-	con := mmmgo.GetCon()
-	if con == nil {
-		log.Fatalln("mgo con err")
-	}
-	Filter = sensitive.New()
-	tick := time.Tick(time.Hour * 24 * 7)//查询七天前
-	for {//定时任务
-		ctime := <-tick
-		cronData := time.Date(ctime.Year(), ctime.Month(), ctime.Day()-7, ctime.Hour(), ctime.Minute(), ctime.Second(), 0, time.Local)
-		findByupdate, err := con.Database("mixdata").Collection("qyxy_std").Find(nil, bson.M{
-			"updatetime": bson.M{"$gte": cronData.Unix()},
-		}, options.Find().SetProjection(bson.M{"company_name": 1, "updatetime": 1,"company_type": 1,"company_type_old": 1}))
-		if err != nil {
-			log.Println("tick err", cronData)
-			continue
-		}
-		defer findByupdate.Close(nil)
-		for tmp := make(map[string]interface{}); findByupdate.Next(nil); tmp = map[string]interface{}{} {
-			err := findByupdate.Decode(&tmp)
-			if err == nil {
-				if company_name, ok := tmp["company_name"].(string); ok {
-					if reglen.MatchString(company_name) || strReg.MatchString(company_name) ||
-						!uncon_strReg.MatchString(company_name)|| !unstart_strReg.MatchString(company_name)||
-						start_strReg.MatchString(company_name)|| end_strReg.MatchString(company_name)||
-						con_strReg.MatchString(company_name) {
-						continue
-					}
-					if strings.Contains(util.ObjToString(tmp["company_type"]),"个人")||
-						strings.Contains(util.ObjToString(tmp["company_type"]),"个体")||
-						strings.Contains(util.ObjToString(tmp["company_type_old"]),"个人")||
-						strings.Contains(util.ObjToString(tmp["company_type_old"]),"个体") {
-						continue
-					}
-
-					//存mgo
-					con.Database("mixdata").Collection("unique_qyxy").InsertOne(nil, bson.M{
-						"qy_name": company_name,
-					})
-					//存敏感词
-					Filter.AddWord(company_name)
-					//存es=判断+新增
-					dealWithEsData(company_name,util.BsonTOStringId(tmp["_id"]))
-				}
-			}
-		}
-		log.Println("tick ok", cronData)
-	}
-}
-
-
-
-
-
-
-
-
-
-
-
-//处理是否新增es
-func dealWithEsData(name string,tmpid string)  {
-	query:= `{"query":{"bool":{"must":[{"term":{"`+es_index+`.name":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}`
-	tmp := make(map[string]interface{})
-	json.Unmarshal([]byte(query),&tmp)
-	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
-	if err != nil {
-		log.Println("从ES查询出错", err.Error())
-	}else {
-		data := make(map[string]interface{},0)
-		if searchResult.Hits != nil {
-			for _, hit := range searchResult.Hits.Hits {
-				json.Unmarshal(*hit.Source, &data)
-			}
-		}
-		if len(data)==0{
-			//log.Println("无数据-新增")
-			_, err := Client_Es.Index().Index(es_index).Type(es_type).Id(tmpid).BodyJson(map[string]interface{}{
-				"name":name,
-				"name_word":name,
-			}).Do()
-			if err != nil {
-				//log.Println("新增失败:",name,tmpid)
-			}
-		}
-	}
-
-}
-
-
-
-
-
-
-//处理内存分段
-func dealWithDataMemory()  {
-
-	//临时测试
-	MixDataMgo = &util.MongodbSim{
-		MongodbAddr: "172.17.4.187:27082,172.17.145.163:27083",
-		Size:        20,
-		DbName:      "mixdata",
-		UserName:    "fengweiqiang",
-		PassWord:    "fwq@123123",
-	}
-	MixDataMgo.InitPool()
-
-	sess := MixDataMgo.GetMgoConn()
-	defer MixDataMgo.DestoryMongoConn(sess)
-	iter := sess.DB(MixDataMgo.DbName).C("unique_qyxy").Find(map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gte": util.StringTOBsonId("1fffffffffffffffffffffff"),
-			"$lte":  util.StringTOBsonId("9fffffffffffffffffffffff"),
-		},
-	}).Sort("_id").Iter()
-	Filter = sensitive.New()
-	var initnum uint
-	saveIdArr ,start_id:= make([]map[string]string,0),""
-	var m runtime.MemStats
-	for tmp := map[string]interface{}{}; iter.Next(&tmp); tmp = map[string]interface{}{} {
-		if start_id=="" {
-			start_id = util.BsonTOStringId(tmp["_id"])
-		}
-		Filter.AddWord(tmp["qy_name"].(string))
-		initnum++
-		if initnum%50000==0 {
-			runtime.ReadMemStats(&m)
-			men :=util.ToMegaBytes(m.HeapAlloc)
-			log.Printf("current index %d\tos %.2f M",initnum, men)
-			if men>7.5*1024 { //7.5G
-				saveIdArr = append(saveIdArr, map[string]string{
-					"start":start_id,
-					"end":util.BsonTOStringId(tmp["_id"]),
-				})
-				log.Println("分段:",start_id,util.BsonTOStringId(tmp["_id"]),men)
-				Filter = sensitive.New()
-				runtime.GC()
-				start_id = ""
-				time.Sleep(time.Second*30)
-			}
-		}
-	}
-
-	saveIdArr = append(saveIdArr, map[string]string{
-		"start":start_id,
-		"end":"",
-	})
-
-	for k,v:=range saveIdArr{
-		log.Println("第",k,"段",v["start"],v["end"])
-	}
-
-	log.Println("memory is ok", initnum)
-}
-
-
-
-
-
-func temporaryTest()  {
-	log.Println("测试......导出数据")
-	q := map[string]interface{}{
-		"check_history":map[string]interface{}{
-			"$exists":0,
-		},
-	}
-	sess := MixDataMgo.GetMgoConn()
-	defer MixDataMgo.DestoryMongoConn(sess)
-	it := sess.DB(MixDataMgo.DbName).C("winner_err_new").Find(&q).Iter()
-	total,isOK:=0,0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total % 100 == 0 {
-			log.Println("current index",total,isOK)
-		}
-
-		name:=util.ObjToString(tmp["name"])
-		tmpid := util.BsonTOStringId(tmp["_id"])
-		new_name,b :=dealWithNameScoreRules(name)
-		if new_name!="" && b {
-			isOK++
-			MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
-				"$set": map[string]interface{}{
-					"is_word": 1,
-					"name_word" : new_name,
-				},
-			})
-		}else {
-			MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
-				"$set": map[string]interface{}{
-					"is_word": -1,
-					"name_word" : new_name,
-				},
-			})
-		}
-		tmp = make(map[string]interface{})
-	}
-
-
-	log.Println("is over",total,isOK)
-}

+ 0 - 160
udpdataclear/udpSensitiveWords/grpc_server/main.go

@@ -1,160 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"github.com/grpc-ecosystem/go-grpc-prometheus"
-	"github.com/importcjj/sensitive"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promhttp"
-	"google.golang.org/grpc"
-	"google.golang.org/grpc/credentials"
-	"gopkg.in/olivere/elastic.v1"
-	"gopkg.in/yaml.v3"
-	"io/ioutil"
-	"log"
-	"net/http"
-	"sensitiveWords.udp/proto_grpc"
-	"sensitiveWords.udp/util"
-	"strings"
-)
-
-/*
-2021/04/28 14:31:35.755969 data.go:248: 第 0 段 6082579e19a23f9d3f39eff2 60825b1d9ce9fc2e6c8a5b92
-2021/04/28 14:31:35.756001 data.go:248: 第 1 段 60825b1d9ce9fc2e6c8a5b93 60825e5d9ce9fc2e6ccea709
-2021/04/28 14:31:35.756008 data.go:248: 第 2 段 60825e5d9ce9fc2e6ccea70a 608261cd9ce9fc2e6c13382e
-2021/04/28 14:31:35.756014 data.go:248: 第 3 段 608261cd9ce9fc2e6c13382f 6082658c9ce9fc2e6c574971
-2021/04/28 14:31:35.756021 data.go:248: 第 4 段 6082658c9ce9fc2e6c574972 608269619ce9fc2e6c9c668c
-2021/04/28 14:31:35.756029 data.go:248: 第 5 段 608269619ce9fc2e6c9c668d 60826d3e9ce9fc2e6ce1c9d8
-2021/04/28 14:31:35.756039 data.go:248: 第 6 段 60826d3e9ce9fc2e6ce1c9d9 608271559ce9fc2e6c26aca5
-2021/04/28 14:31:35.756046 data.go:248: 第 7 段 608271559ce9fc2e6c26aca6 608275929ce9fc2e6c6ca1ec
-2021/04/28 14:31:35.756053 data.go:248: 第 8 段 608275929ce9fc2e6c6ca1ed 608279e19ce9fc2e6cb2ddb8
-2021/04/28 14:31:35.756060 data.go:248: 第 9 段 608279e19ce9fc2e6cb2ddb9 60827e359ce9fc2e6cf96417
-2021/04/28 14:31:35.756069 data.go:248: 第 10 段 60827e359ce9fc2e6cf96419 608282299ce9fc2e6c4034ee
-2021/04/28 14:31:35.756077 data.go:248: 第 11 段 608282299ce9fc2e6c4034ef 608285b09ce9fc2e6c868546
-2021/04/28 14:31:35.756087 data.go:248: 第 12 段 608285b09ce9fc2e6c868547 608289199ce9fc2e6ccbc72e
-2021/04/28 14:31:35.756095 data.go:248: 第 13 段 608289199ce9fc2e6ccbc72f 608293f49ce9fc2e6cfdbf7b
-2021/04/28 14:31:35.756103 data.go:248: 第 14 段 608293f49ce9fc2e6cfdbfa7 
-*/
-
-const (
-	YAMLFILE = "./server.yaml"
-)
-
-var YamlConfig YAMLConfig
-var Filter *sensitive.Filter
-var es_type, es_index string
-var Client_Es *elastic.Client
-
-var MixDataMgo *util.MongodbSim
-var (
-	// Create a metrics registry.
-	reg = prometheus.NewRegistry()
-
-	// Create some standard server metrics.
-	grpcMetrics             = grpc_prometheus.NewServerMetrics()
-	customizedCounterMetric = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Name: "demo_server_search_method_handle_count",
-		Help: "Total number of RPCs handled on the server.",
-	}, []string{"name"})
-)
-
-func init() {
-	yamlFile, err := ioutil.ReadFile(YAMLFILE)
-	if err != nil {
-		log.Fatalln("load conf error")
-	}
-	err = yaml.Unmarshal(yamlFile, &YamlConfig)
-	if err != nil {
-		fmt.Println(err.Error())
-	}
-	log.Printf("%#v", YamlConfig)
-	MixDataMgo = &util.MongodbSim{
-		MongodbAddr: YamlConfig.MixdataMgoAddr,
-		Size:        YamlConfig.MongodbPoolSize,
-		DbName:      YamlConfig.DbName,
-		UserName:    YamlConfig.UserName,
-		PassWord:    YamlConfig.PassWord,
-	}
-	MixDataMgo.InitPool()
-
-
-	//Client_Es ,_= elastic.NewClient(http.DefaultClient, "http://172.17.145.170:9800")
-
-
-	es_type, es_index = "unique_qy","unique_qy"
-
-
-	reg.MustRegister(grpcMetrics, customizedCounterMetric)
-
-}
-
-func main() {
-
-	if YamlConfig.IsAddTask == 0 {
-		initSensitiveWordsData() //初始化敏感词数据
-	} else {
-		go addTaskSensitiveWordsData() //增量-改配置文件
-	}
-
-	mux := http.NewServeMux()
-	mux.Handle("/",  promhttp.HandlerFor(reg, promhttp.HandlerOpts{}))
-	cred, err := credentials.NewServerTLSFromFile(YamlConfig.CertFile, YamlConfig.KeyFile)
-	if err != nil {
-		log.Fatalln(err)
-	}
-	grpcServer := grpc.NewServer(
-		grpc.Creds(cred),
-		grpc.UnaryInterceptor(grpcMetrics.UnaryServerInterceptor()),
-	)
-	proto_grpc.RegisterSensitiveWordsServer(grpcServer, &server{})
-	grpcMetrics.InitializeMetrics(grpcServer)
-
-	log.Println("Listen ",YamlConfig.Port)
-	if err = http.ListenAndServeTLS(
-		fmt.Sprintf("%s", YamlConfig.Port),
-		YamlConfig.CertFile,
-		YamlConfig.KeyFile,
-		http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			if r.ProtoMajor == 2 && strings.HasPrefix(
-				r.Header.Get("Content-Type"), "application/grpc") {
-				grpcServer.ServeHTTP(w, r)
-			} else {
-				mux.ServeHTTP(w, r)
-			}
-
-			return
-		}),
-	);err != nil{
-		log.Fatalln(err)
-	}
-
-}
-
-//协议方法---等
-type server struct {
-	proto_grpc.SensitiveWordsServer
-}
-
-func (s *server) Search(ctx context.Context, in *proto_grpc.Request) (*proto_grpc.ResultSensitiveWords, error) {
-	customizedCounterMetric.WithLabelValues("search_server").Inc()
-	text := in.GetText()
-	//log.Println(text)
-	findAll := Filter.FindAll(text)
-	rada := strings.Join(findAll, ",")
-	return &proto_grpc.ResultSensitiveWords{SensitiveWords: rada}, nil
-}
-
-type YAMLConfig struct {
-	MixdataMgoAddr  string `yaml:"mixdataMgoAddr"`
-	UserName        string `yaml:"userName"`
-	PassWord        string `yaml:"passWord"`
-	DbName          string `yaml:"dbName"`
-	MongodbPoolSize int    `yaml:"mongodbPoolSize"`
-	TaskGteId       string `yaml:"taskGteId"`
-	TaskLteId       string `yaml:"taskLteId"`
-	IsAddTask       int    `yaml:"isAddTask"`
-	Port            string `yaml:"port"`
-	CertFile        string `yaml:"certFile"`
-	KeyFile         string `yaml:"keyFile"`
-}

+ 0 - 27
udpdataclear/udpSensitiveWords/grpc_server/server.key

@@ -1,27 +0,0 @@
------BEGIN RSA PRIVATE KEY-----
-MIIEpQIBAAKCAQEA7uEtfsDYUHAJTfYJQZXoxIHvUYuGBMuOBGMEwA/yBAoOyJ8m
-WFxZxqH7SID9NwOJ33sg7hbrBpz1gm9GcraYUdzF31RCzgiadp0X8jRXuacEBs4f
-jRW8YVbgPZi47yHVKzogohWfGOGUTDIKilryIgvYExV+zRJcBvnuE0O+hjCis64c
-+CTJHbRAMoQuqPayJqG1TSfKkO+4tM1gzF44r+X18fLklF+YbQv5CqTORC6MNYiI
-Kbzx+nA8AzGQxHVXaKZg+2V6nxQ0WXFqwVcF/efM/F4DRIbB0rSurOgIT3Qilfov
-ULJhiN53FUyzMOKdAGB38zMYeOHx5vh11jOAYwIDAQABAoIBAQCFcDHayVzMahk4
-fISlJ1l0U+Tcl2TQzuhRGQLxSrb1fKzPct+Xolbf92IIGU3ZaledCAnYFeNJuWoH
-Z6NhtxAVQo8h9s6uiu3F7nEO8iBC68H7Dy9ev1KQyVfP8Lz1j9+lGv5ZrFxIKNKP
-UbDDopP5YsZburkvUt8nYnuUNVZ7XCFeWENnpePMeW8CmYLJWs190o6lIvTaQjar
-Z8PiR4eLG2cDyU4hLTam8aRm2she0iYmH2t458Oo3TmRCAST+GKHINOvdX4bXC+l
-kevds06sXVZcj9eLOEQEkikIlF1S8xTTUFF1DZVfmX5kX6RMvmHLyzgeOo5K7Eo7
-zQTL4BBxAoGBAPzNQd8gDFq9diaUA7TvyH3jq/fURrCpSXmHIWNIDgmhBChD6kfd
-Vrm0DanVPR2ofZiG0F7KZvBKjVqRPyWMyQTIzmDLLehimm53nBDda0wMsd56bkEn
-2kNB5YeSW4p7uPL2HtIGFmW7JqterC6XJ05a3HL1KAh2qdlxj65br4IZAoGBAPHm
-1LrgVLaNoZ5SU1H2kPjE8SnetiMtBIZBjKO1MnVmwto1qf/9FhZu7GX83B5iWLPn
-9zDRRnbwLdOeVxqnQDJD2cyX5ffV/KaFZfzML5wntm00QH8e0mouyFvGgaAW9REw
-XpJ9fR8PYaTyNmsS9FDcrpJHcSzLdE12qo5z8X3bAoGBAMxvGKPdgOAoctGDxPOf
-LR1Q7hJ+YaQfYkGa2480B9YVlg0RzvXKsoV+a8Y5oTDIvriH3R2qxE/xmhnxd4Vg
-MndiBDMWW8ufYY+Pvm+M/2YCphxqGZsrJUBam8lkvA5jOw+PpSDmt1oRXJYc+1S5
-+5lLtRZp1rPxci1bvnubvXSRAoGASeYftNtVZdOMvSYfSBnQHf9kQUtFTyewiyc5
-RMXVHWKAQlNElOyHM+E9HMcLo8idmpBRxrsYELu89DoaGA1iIjjyL3t0CKt5nJGC
-H6FtJgJHy8K7tVoAI+oY0ViqPvLkeRTSv2PQto1nYhGQqFNJCe7w8cUbgPjCleub
-nRs0eiMCgYEA4zZ7e0ScTbizyEdMZcWw6FLe/yfoY/DlKZCbvmqpArjXvV0IhGC1
-dlkWVnv7b6rEvml4gL2d2a8smX1mvAd9SyQ0iJlLC9ZnxIXe7fXqhg0aAVtQEaHb
-sB1u3B7lcmjxxjNLQc1rFXUIKk8EVjlncTuVzZyqc/bXa9l941ip+7Q=
------END RSA PRIVATE KEY-----

+ 0 - 14
udpdataclear/udpSensitiveWords/grpc_server/server.pem

@@ -1,14 +0,0 @@
------BEGIN CERTIFICATE-----
-MIICODCCAb6gAwIBAgIUMPYKo8E41OB9mZI1X8Gr0RpDD3MwCgYIKoZIzj0EAwIw
-UzELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGElu
-dGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEMMAoGA1UEAwwDdG9wMB4XDTIxMDQyODAx
-NTc1NloXDTMxMDQyNjAxNTc1NlowUzELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNv
-bWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEMMAoG
-A1UEAwwDdG9wMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEIG95RQBEOA9GeW+7rnqZ
-hSWnVehfnQBx8WWensms90PeoY95SMtT7oGEWA5iSS5QC8bihO2ZXENpOA4fhoix
-kTlFFv7cjV3mbzrs+H1s1dUjHRUldoFbxeqGVEHKxNvno1MwUTAdBgNVHQ4EFgQU
-46JBxQa+Q908CC1YGU+uzruM9AcwHwYDVR0jBBgwFoAU46JBxQa+Q908CC1YGU+u
-zruM9AcwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAgNoADBlAjEAw3DyG4op
-xa8nbG/xJlUUC61F6n0PaQbpGX8Zo5dvJ7O9RkfltY8+HPP4euUw0KUmAjACjA1J
-U8xLgkVaDmGsrUPVqeayN7gY9iwH4kiuYbhn9tnLbB/y1HybAm6Nitf+h2A=
------END CERTIFICATE-----

+ 0 - 11
udpdataclear/udpSensitiveWords/grpc_server/server.yaml

@@ -1,11 +0,0 @@
-mixdataMgoAddr: 192.168.3.166:27082
-port: :8888
-mongodbPoolSize: 10
-dbName: mixdata
-userName:
-passWord:
-taskGteId: 605d4f3ea15e7ed8e49ec97c
-taskLteId: 605d4f3ea15e7ed8e49ec9ad
-isAddTask: 0
-certFile: ./server.crt
-keyFile: ./server.key

+ 0 - 136
udpdataclear/udpSensitiveWords/grpc_server/words.go

@@ -1,136 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"log"
-	"sensitiveWords.udp/util"
-	"strings"
-	"unicode"
-	"unicode/utf8"
-)
-
-func dealWithNameScoreRules(name string) (string,bool) {
-	new_name,new_score,isok :="",float64(0),false
-	old_name := escapeNew(name)
-	if old_name=="" {
-		return "",false
-	}
-	query := `{"query":{"bool":{"must":[{"query_string":{"default_field":"unique_qy.name_word","query":"`+old_name+`"}}],"must_not":[],"should":[]}},"from":"0","size":"1"}`
-	tmp := make(map[string]interface{})
-	json.Unmarshal([]byte(query),&tmp)
-	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
-	if err != nil {
-		log.Println("ES查询出错",name,old_name)
-		return "",false
-	}
-	resNum := len(searchResult.Hits.Hits)
-	res := make([]map[string]interface{}, resNum)
-	if searchResult.Hits != nil {
-		if resNum < 5000 {
-			for i, hit := range searchResult.Hits.Hits {
-				data := make(map[string]interface{},0)
-				json.Unmarshal(*hit.Source, &data)
-				res[i] = map[string]interface{}{
-					"name":data["name"],
-					"score":*hit.Score,
-				}
-			}
-		} else {
-			log.Println("查询结果太多,查询到:", resNum, "条")
-		}
-	}
-	if len(res)>0 && res != nil {
-		//分析分数...取最大
-
-		new_name = util.ObjToString(res[0]["name"])
-		new_score = util.Float64All(res[0]["score"])
-	}
-	if new_name!="" { //分析hit比例
-		total,hit := dealWithWordsRules(name,new_name)
-		proportion := float64(hit)/float64(total)
-		if proportion >=1.0 {
-			isok = true
-		}else {
-			if float64(hit)/float64(total)>=0.8 && new_score> 4.0{
-				isok = true
-			}
-		}
-	}
-	return new_name,isok
-}
-//击中数量以及比例
-func dealWithWordsRules(info_name string ,source_name string) (int,int){
-	total,hit :=0,0
-
-	//字符串处理,替换指定字符
-	info_name = strings.ReplaceAll(info_name,"(","")
-	info_name = strings.ReplaceAll(info_name,")","")
-	info_name = strings.ReplaceAll(info_name,"(","")
-	info_name = strings.ReplaceAll(info_name,")","")
-	source_name = strings.ReplaceAll(source_name,"(","")
-	source_name = strings.ReplaceAll(source_name,")","")
-	source_name = strings.ReplaceAll(source_name,"(","")
-	source_name = strings.ReplaceAll(source_name,")","")
-
-	nameArr,_ := calculateWordCount(info_name)
-	_,total = calculateWordCount(source_name)
-	for _,v1 := range nameArr {
-		if strings.Contains(source_name,v1) {
-			hit++
-		}
-	}
-	return total,hit
-}
-
-//分词结果
-func calculateWordCount(name string) ([]string,int) {
-	arr ,space:= make([]string,0),2
-	total := utf8.RuneCountInString(name)-(space-1)
-	if name == "" || total<=0  {
-		return arr,0
-	}
-	nameRune := []rune(name)
-	for i:=0;i<total ;i++  {
-		new_str := string(nameRune[i:space+i])
-		arr = append(arr,new_str)
-	}
-	return arr,len(arr)
-}
-
-//func escape(s string) string {
-//	news := ""
-//	s = strings.ReplaceAll(s," ","")
-//	for _, c := range s {
-//		//if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
-//		//	news = news + string(c)
-//		//}else if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
-//		//	a := string([]rune{os.PathSeparator, '\\'})
-//		//	news = news + a + string(c)
-//		//} else {
-//		//	return ""
-//		//}
-//		if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
-//			a := string([]rune{os.PathSeparator,'\\'})
-//			//news = news + a + `\` + string(c)
-//			news = news + a  + string(c)
-//		} else {
-//			news = news + string(c)
-//		}
-//
-//	}
-//	return news
-//}
-
-
-func escapeNew(s string) string {
-	news := ""
-	s = strings.ReplaceAll(s," ","")
-	for _, c := range s {
-		if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
-			news = news + string(c)
-		}
-	}
-	return news
-}
-
-

+ 3 - 0
udpdataclear/udpSensitiveWords/main.go

@@ -8,6 +8,9 @@ func init() {
 	util.InitC()
 }
 func main() {
+
+	//go util.AddTaskSensitiveWordsData() //增量
+
 	// 主函数中添加
 	util.ExtractUdp() //udp通知抽取
 	lock := make(chan bool)

+ 0 - 284
udpdataclear/udpSensitiveWords/proto_grpc/sensitive_words.pb.go

@@ -1,284 +0,0 @@
-// Code generated by protoc-gen-go. DO NOT EDIT.
-// versions:
-// 	protoc-gen-go v1.26.0
-// 	protoc        v3.12.0
-// source: sensitive_words.proto
-
-package proto_grpc
-
-import (
-	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
-	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
-	reflect "reflect"
-	sync "sync"
-)
-
-const (
-	// Verify that this generated code is sufficiently up-to-date.
-	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
-	// Verify that runtime/protoimpl is sufficiently up-to-date.
-	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
-)
-
-type CorpusType int32
-
-const (
-	CorpusType_ALL    CorpusType = 0
-	CorpusType_BUYER  CorpusType = 1
-	CorpusType_AGENCY CorpusType = 2
-	CorpusType_WINNER CorpusType = 3
-)
-
-// Enum value maps for CorpusType.
-var (
-	CorpusType_name = map[int32]string{
-		0: "ALL",
-		1: "BUYER",
-		2: "AGENCY",
-		3: "WINNER",
-	}
-	CorpusType_value = map[string]int32{
-		"ALL":    0,
-		"BUYER":  1,
-		"AGENCY": 2,
-		"WINNER": 3,
-	}
-)
-
-func (x CorpusType) Enum() *CorpusType {
-	p := new(CorpusType)
-	*p = x
-	return p
-}
-
-func (x CorpusType) String() string {
-	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
-}
-
-func (CorpusType) Descriptor() protoreflect.EnumDescriptor {
-	return file_sensitive_words_proto_enumTypes[0].Descriptor()
-}
-
-func (CorpusType) Type() protoreflect.EnumType {
-	return &file_sensitive_words_proto_enumTypes[0]
-}
-
-func (x CorpusType) Number() protoreflect.EnumNumber {
-	return protoreflect.EnumNumber(x)
-}
-
-// Deprecated: Use CorpusType.Descriptor instead.
-func (CorpusType) EnumDescriptor() ([]byte, []int) {
-	return file_sensitive_words_proto_rawDescGZIP(), []int{0}
-}
-
-type Request struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	Text   string     `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"`
-	Corpus CorpusType `protobuf:"varint,2,opt,name=corpus,proto3,enum=proto_grpc.CorpusType" json:"corpus,omitempty"`
-}
-
-func (x *Request) Reset() {
-	*x = Request{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_sensitive_words_proto_msgTypes[0]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *Request) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*Request) ProtoMessage() {}
-
-func (x *Request) ProtoReflect() protoreflect.Message {
-	mi := &file_sensitive_words_proto_msgTypes[0]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use Request.ProtoReflect.Descriptor instead.
-func (*Request) Descriptor() ([]byte, []int) {
-	return file_sensitive_words_proto_rawDescGZIP(), []int{0}
-}
-
-func (x *Request) GetText() string {
-	if x != nil {
-		return x.Text
-	}
-	return ""
-}
-
-func (x *Request) GetCorpus() CorpusType {
-	if x != nil {
-		return x.Corpus
-	}
-	return CorpusType_ALL
-}
-
-type ResultSensitiveWords struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	SensitiveWords string `protobuf:"bytes,1,opt,name=SensitiveWords,proto3" json:"SensitiveWords,omitempty"`
-}
-
-func (x *ResultSensitiveWords) Reset() {
-	*x = ResultSensitiveWords{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_sensitive_words_proto_msgTypes[1]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *ResultSensitiveWords) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*ResultSensitiveWords) ProtoMessage() {}
-
-func (x *ResultSensitiveWords) ProtoReflect() protoreflect.Message {
-	mi := &file_sensitive_words_proto_msgTypes[1]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use ResultSensitiveWords.ProtoReflect.Descriptor instead.
-func (*ResultSensitiveWords) Descriptor() ([]byte, []int) {
-	return file_sensitive_words_proto_rawDescGZIP(), []int{1}
-}
-
-func (x *ResultSensitiveWords) GetSensitiveWords() string {
-	if x != nil {
-		return x.SensitiveWords
-	}
-	return ""
-}
-
-var File_sensitive_words_proto protoreflect.FileDescriptor
-
-var file_sensitive_words_proto_rawDesc = []byte{
-	0x0a, 0x15, 0x73, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x77, 0x6f, 0x72, 0x64,
-	0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x5f, 0x67,
-	0x72, 0x70, 0x63, 0x22, 0x4d, 0x0a, 0x07, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12,
-	0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65,
-	0x78, 0x74, 0x12, 0x2e, 0x0a, 0x06, 0x63, 0x6f, 0x72, 0x70, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01,
-	0x28, 0x0e, 0x32, 0x16, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x5f, 0x67, 0x72, 0x70, 0x63, 0x2e,
-	0x43, 0x6f, 0x72, 0x70, 0x75, 0x73, 0x54, 0x79, 0x70, 0x65, 0x52, 0x06, 0x63, 0x6f, 0x72, 0x70,
-	0x75, 0x73, 0x22, 0x3e, 0x0a, 0x14, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x53, 0x65, 0x6e, 0x73,
-	0x69, 0x74, 0x69, 0x76, 0x65, 0x57, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0e, 0x53, 0x65,
-	0x6e, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x57, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x09, 0x52, 0x0e, 0x53, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x57, 0x6f, 0x72,
-	0x64, 0x73, 0x2a, 0x38, 0x0a, 0x0a, 0x43, 0x6f, 0x72, 0x70, 0x75, 0x73, 0x54, 0x79, 0x70, 0x65,
-	0x12, 0x07, 0x0a, 0x03, 0x41, 0x4c, 0x4c, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x42, 0x55, 0x59,
-	0x45, 0x52, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x41, 0x47, 0x45, 0x4e, 0x43, 0x59, 0x10, 0x02,
-	0x12, 0x0a, 0x0a, 0x06, 0x57, 0x49, 0x4e, 0x4e, 0x45, 0x52, 0x10, 0x03, 0x32, 0x51, 0x0a, 0x0e,
-	0x53, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x57, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x3f,
-	0x0a, 0x06, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x12, 0x13, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
-	0x5f, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, 0x2e,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x5f, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c,
-	0x74, 0x53, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x57, 0x6f, 0x72, 0x64, 0x73, 0x42,
-	0x0d, 0x5a, 0x0b, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x5f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x62, 0x06,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
-}
-
-var (
-	file_sensitive_words_proto_rawDescOnce sync.Once
-	file_sensitive_words_proto_rawDescData = file_sensitive_words_proto_rawDesc
-)
-
-func file_sensitive_words_proto_rawDescGZIP() []byte {
-	file_sensitive_words_proto_rawDescOnce.Do(func() {
-		file_sensitive_words_proto_rawDescData = protoimpl.X.CompressGZIP(file_sensitive_words_proto_rawDescData)
-	})
-	return file_sensitive_words_proto_rawDescData
-}
-
-var file_sensitive_words_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_sensitive_words_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
-var file_sensitive_words_proto_goTypes = []interface{}{
-	(CorpusType)(0),              // 0: proto_grpc.CorpusType
-	(*Request)(nil),              // 1: proto_grpc.Request
-	(*ResultSensitiveWords)(nil), // 2: proto_grpc.ResultSensitiveWords
-}
-var file_sensitive_words_proto_depIdxs = []int32{
-	0, // 0: proto_grpc.Request.corpus:type_name -> proto_grpc.CorpusType
-	1, // 1: proto_grpc.SensitiveWords.Search:input_type -> proto_grpc.Request
-	2, // 2: proto_grpc.SensitiveWords.Search:output_type -> proto_grpc.ResultSensitiveWords
-	2, // [2:3] is the sub-list for method output_type
-	1, // [1:2] is the sub-list for method input_type
-	1, // [1:1] is the sub-list for extension type_name
-	1, // [1:1] is the sub-list for extension extendee
-	0, // [0:1] is the sub-list for field type_name
-}
-
-func init() { file_sensitive_words_proto_init() }
-func file_sensitive_words_proto_init() {
-	if File_sensitive_words_proto != nil {
-		return
-	}
-	if !protoimpl.UnsafeEnabled {
-		file_sensitive_words_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*Request); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_sensitive_words_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*ResultSensitiveWords); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-	}
-	type x struct{}
-	out := protoimpl.TypeBuilder{
-		File: protoimpl.DescBuilder{
-			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
-			RawDescriptor: file_sensitive_words_proto_rawDesc,
-			NumEnums:      1,
-			NumMessages:   2,
-			NumExtensions: 0,
-			NumServices:   1,
-		},
-		GoTypes:           file_sensitive_words_proto_goTypes,
-		DependencyIndexes: file_sensitive_words_proto_depIdxs,
-		EnumInfos:         file_sensitive_words_proto_enumTypes,
-		MessageInfos:      file_sensitive_words_proto_msgTypes,
-	}.Build()
-	File_sensitive_words_proto = out.File
-	file_sensitive_words_proto_rawDesc = nil
-	file_sensitive_words_proto_goTypes = nil
-	file_sensitive_words_proto_depIdxs = nil
-}

+ 0 - 101
udpdataclear/udpSensitiveWords/proto_grpc/sensitive_words_grpc.pb.go

@@ -1,101 +0,0 @@
-// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
-
-package proto_grpc
-
-import (
-	context "context"
-	grpc "google.golang.org/grpc"
-	codes "google.golang.org/grpc/codes"
-	status "google.golang.org/grpc/status"
-)
-
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the grpc package it is being compiled against.
-// Requires gRPC-Go v1.32.0 or later.
-const _ = grpc.SupportPackageIsVersion7
-
-// SensitiveWordsClient is the client API for SensitiveWords service.
-//
-// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
-type SensitiveWordsClient interface {
-	Search(ctx context.Context, in *Request, opts ...grpc.CallOption) (*ResultSensitiveWords, error)
-}
-
-type sensitiveWordsClient struct {
-	cc grpc.ClientConnInterface
-}
-
-func NewSensitiveWordsClient(cc grpc.ClientConnInterface) SensitiveWordsClient {
-	return &sensitiveWordsClient{cc}
-}
-
-func (c *sensitiveWordsClient) Search(ctx context.Context, in *Request, opts ...grpc.CallOption) (*ResultSensitiveWords, error) {
-	out := new(ResultSensitiveWords)
-	err := c.cc.Invoke(ctx, "/proto_grpc.SensitiveWords/Search", in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-// SensitiveWordsServer is the server API for SensitiveWords service.
-// All implementations must embed UnimplementedSensitiveWordsServer
-// for forward compatibility
-type SensitiveWordsServer interface {
-	Search(context.Context, *Request) (*ResultSensitiveWords, error)
-	mustEmbedUnimplementedSensitiveWordsServer()
-}
-
-// UnimplementedSensitiveWordsServer must be embedded to have forward compatible implementations.
-type UnimplementedSensitiveWordsServer struct {
-}
-
-func (UnimplementedSensitiveWordsServer) Search(context.Context, *Request) (*ResultSensitiveWords, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method Search not implemented")
-}
-func (UnimplementedSensitiveWordsServer) mustEmbedUnimplementedSensitiveWordsServer() {}
-
-// UnsafeSensitiveWordsServer may be embedded to opt out of forward compatibility for this service.
-// Use of this interface is not recommended, as added methods to SensitiveWordsServer will
-// result in compilation errors.
-type UnsafeSensitiveWordsServer interface {
-	mustEmbedUnimplementedSensitiveWordsServer()
-}
-
-func RegisterSensitiveWordsServer(s grpc.ServiceRegistrar, srv SensitiveWordsServer) {
-	s.RegisterService(&SensitiveWords_ServiceDesc, srv)
-}
-
-func _SensitiveWords_Search_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(Request)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(SensitiveWordsServer).Search(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: "/proto_grpc.SensitiveWords/Search",
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(SensitiveWordsServer).Search(ctx, req.(*Request))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
-// SensitiveWords_ServiceDesc is the grpc.ServiceDesc for SensitiveWords service.
-// It's only intended for direct use with grpc.RegisterService,
-// and not to be introspected or modified (even as a copy)
-var SensitiveWords_ServiceDesc = grpc.ServiceDesc{
-	ServiceName: "proto_grpc.SensitiveWords",
-	HandlerType: (*SensitiveWordsServer)(nil),
-	Methods: []grpc.MethodDesc{
-		{
-			MethodName: "Search",
-			Handler:    _SensitiveWords_Search_Handler,
-		},
-	},
-	Streams:  []grpc.StreamDesc{},
-	Metadata: "sensitive_words.proto",
-}

+ 0 - 24
udpdataclear/udpSensitiveWords/protos/sensitive_words.proto

@@ -1,24 +0,0 @@
-syntax = "proto3";
-package proto_grpc;
-
-option go_package = "proto_grpc/";
-
-enum CorpusType {
-  ALL = 0;
-  BUYER = 1;
-  AGENCY = 2;
-  WINNER = 3;
-}
-message Request{
-  string text = 1;
-  CorpusType corpus = 2;
-}
-
-message ResultSensitiveWords{
-  string SensitiveWords = 1;
-}
-
-service SensitiveWords{
-  rpc Search(Request) returns (ResultSensitiveWords);
-}
-

+ 0 - 14
udpdataclear/udpSensitiveWords/server.pem

@@ -1,14 +0,0 @@
------BEGIN CERTIFICATE-----
-MIICODCCAb6gAwIBAgIUMPYKo8E41OB9mZI1X8Gr0RpDD3MwCgYIKoZIzj0EAwIw
-UzELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGElu
-dGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEMMAoGA1UEAwwDdG9wMB4XDTIxMDQyODAx
-NTc1NloXDTMxMDQyNjAxNTc1NlowUzELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNv
-bWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEMMAoG
-A1UEAwwDdG9wMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEIG95RQBEOA9GeW+7rnqZ
-hSWnVehfnQBx8WWensms90PeoY95SMtT7oGEWA5iSS5QC8bihO2ZXENpOA4fhoix
-kTlFFv7cjV3mbzrs+H1s1dUjHRUldoFbxeqGVEHKxNvno1MwUTAdBgNVHQ4EFgQU
-46JBxQa+Q908CC1YGU+uzruM9AcwHwYDVR0jBBgwFoAU46JBxQa+Q908CC1YGU+u
-zruM9AcwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAgNoADBlAjEAw3DyG4op
-xa8nbG/xJlUUC61F6n0PaQbpGX8Zo5dvJ7O9RkfltY8+HPP4euUw0KUmAjACjA1J
-U8xLgkVaDmGsrUPVqeayN7gY9iwH4kiuYbhn9tnLbB/y1HybAm6Nitf+h2A=
------END CERTIFICATE-----

+ 2 - 37
udpdataclear/udpSensitiveWords/util/config.go

@@ -1,15 +1,9 @@
 package util
 
 import (
-	"context"
-	"google.golang.org/grpc"
-	"google.golang.org/grpc/credentials"
-	"google.golang.org/grpc/keepalive"
 	"gopkg.in/olivere/elastic.v1"
 	"log"
 	"net/http"
-	"sensitiveWords.udp/proto_grpc"
-	"time"
 )
 
 func init() {
@@ -30,41 +24,14 @@ func InitC() {
 	}
 	QfwMgo85.InitPool()
 
-	Client_Es, _ = elastic.NewClient(http.DefaultClient, "http://192.168.3.11:9800")
+	//Client_Es, _ = elastic.NewClient(http.DefaultClient, "http://192.168.3.11:9800")
+	Client_Es, _ = elastic.NewClient(http.DefaultClient, "http://ela.spdata.jianyu360.com")
 
 	es_type, es_index = Config["es_type"].(string), Config["es_index"].(string)
 
 	Fields = Config["fields"].(map[string]interface{})
 	FindBuyerC, FindAgencyC, FindWinnerC = Config["buyer_c"].(string), Config["agency_c"].(string), Config["winner_c"].(string)
 	Collection = Config["collection"].(string)
-	qaddrs := Config["query_addrs"].([]interface{})
-	CertFile = Config["certFile"].(string)
-	cred,err := credentials.NewClientTLSFromFile(CertFile,"top")
-	if err != nil {
-		log.Fatalln("Failed to create TLS credentials %v", err)
-	}
-	for _, v := range qaddrs {
-		ctx, cancelFunc := context.WithTimeout(context.TODO(), time.Second*5)
-		defer cancelFunc()
-		conn, err := grpc.DialContext(ctx, v.(string),
-			grpc.WithBlock(),
-			grpc.WithTransportCredentials(cred),
-			grpc.WithKeepaliveParams(keepalive.ClientParameters{
-				Time:                10 * time.Second,
-				Timeout:             100 * time.Millisecond,
-				PermitWithoutStream: true}),
-		)
-		if err != nil {
-			log.Fatalf("did not connect: %s %v", v, err)
-		}
-		c := proto_grpc.NewSensitiveWordsClient(conn)
-		_, err = c.Search(ctx, &proto_grpc.Request{})
-		if err != nil {
-			log.Fatalln(err)
-		} else {
-			QAddrs = append(QAddrs, &c)
-		}
-	}
 
 }
 
@@ -73,7 +40,5 @@ var QfwMgo85 *MongodbSim
 var Collection string
 var Fields map[string]interface{}
 var FindBuyerC, FindAgencyC, FindWinnerC string
-var QAddrs []*proto_grpc.SensitiveWordsClient
 var es_type, es_index string
 var Client_Es *elastic.Client
-var CertFile string

+ 208 - 72
udpdataclear/udpSensitiveWords/util/udpdata.go

@@ -2,12 +2,14 @@
 package util
 
 import (
-	"context"
 	"encoding/json"
+	"github.com/importcjj/sensitive"
+	"go.mongodb.org/mongo-driver/bson"
 	"go.mongodb.org/mongo-driver/bson/primitive"
+	"go.mongodb.org/mongo-driver/mongo/options"
 	"log"
 	"net"
-	"sensitiveWords.udp/proto_grpc"
+	"regexp"
 	"strings"
 	"time"
 )
@@ -51,92 +53,94 @@ func QuerySensitiveWords(sid, eid string) {
 	}).Select(Fields).Iter()
 
 	for tmp := map[string]interface{}{}; iter.Next(&tmp); tmp = map[string]interface{}{} {
+		up := make(map[string]string)
 		if win, isok := tmp["winner"].(string); isok {
-			queryGrpcWinner := query_grpc(win, FindWinnerC)
-			if queryGrpcWinner == "" {
-
-			} else {
-				tmp["winner"] = queryGrpcWinner
+			if fok, flog, fname := cheakname(win); fok && flog != "" {
+				tmp["winner"] = fname
+				up["winner"] = flog
 			}
 		}
 		if win, isok := tmp["s_winner"].(string); isok {
-			queryGrpcWinner := query_grpc(win, FindWinnerC)
-			if queryGrpcWinner == "" {
-
-			} else {
-				tmp["s_winner"] = queryGrpcWinner
+			if fok, flog, fname := cheakname(win); fok && flog != "" {
+				tmp["s_winner"] = fname
+				up["s_winner"] = flog
 			}
 		}
 
 		if agency, isok := tmp["agency"].(string); isok {
-			queryGrpcAgency := query_grpc(agency, FindAgencyC)
-			if queryGrpcAgency == "" {
-
-			} else {
-				tmp["agency"] = queryGrpcAgency
+			if fok, flog, fname := cheakname(agency); fok && flog != "" {
+				tmp["agency"] = fname
+				up["agency"] = flog
 			}
 		}
 
 		if buyer, isok := tmp["buyer"].(string); isok {
-			queryGrpcBuyer := query_grpc(buyer, FindBuyerC)
-			if queryGrpcBuyer == "" {
-
-			} else {
-				tmp["buyer"] = queryGrpcBuyer
+			if fok, flog, fname := cheakname(buyer); fok && flog != "" {
+				tmp["buyer"] = fname
+				up["buyer"] = flog
 			}
 		}
+		if len(up) > 0 {
+			tmp["log"] = up
+			//todo update
+			log.Println(tmp)
+		}
 		num++
 	}
-	log.Println(sid, eid,"处理完成:", num)
+	log.Println(sid, eid, "处理完成:", num)
 }
 
-//grpc - 处理
-func query_grpc(enterprise, findC string) string {
-	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
-	defer cancel()
-	var CorpusType proto_grpc.CorpusType = proto_grpc.CorpusType_ALL
-	switch findC {
-	case "buyer":
-		CorpusType = proto_grpc.CorpusType_BUYER
-	case "agency":
-		CorpusType = proto_grpc.CorpusType_BUYER
-	case "winner":
-		CorpusType = proto_grpc.CorpusType_BUYER
-	default:
-		CorpusType = proto_grpc.CorpusType_ALL
-	}
-	lenc := len(QAddrs)
-	c := make(chan map[int]string, lenc)
-	defer close(c)
-	for i, v := range QAddrs {
-		go func(index int, vc *proto_grpc.SensitiveWordsClient) {
-			sensitiveWords, err := (*vc).Search(ctx, &proto_grpc.Request{Text: enterprise, Corpus: CorpusType})
-			if err != nil {
-				log.Println(index, err)
-				c <- map[int]string{index: ""}
-				return
-			}
-			c <- map[int]string{index: sensitiveWords.GetSensitiveWords()}
-			return
-		}(i, v)
-	}
-	result := []string{}
-	var q int
-	for v := range c {
-		for _, vv := range v {
-			if vv == "" {
-				continue
-			}
-			result = append(result, vv)
+func cheakname(name string) (up bool, log, rname string) {
+	filter := sensitive.New()
+	var cheaklog string
+	//更新,匹配
+	if tremQuery(name) {
+		cheaklog = "tremQuery"
+		return true, cheaklog, name
+	}
+
+	rname, isok, datas := dealWithNameScoreRules(name)
+	if len(datas) > 0 {
+		for _, v := range datas {
+			filter.AddWord(v["name"].(string))
 		}
-		q++
-		if q >= lenc {
-			break
+		findAll := filter.FindAll(name)
+		data := handleData(findAll)
+		//更新,匹配
+		if len(data) > 0 {
+			cheaklog = "queryString"
+			return true, cheaklog, data
 		}
 	}
-	rarr := handleData(result)
-	rstr := strings.Join(rarr, ",")
-	return rstr
+	//更新,匹配
+	if rname != "" && isok {
+		cheaklog = "queryScore"
+		return true, cheaklog, rname
+	}
+
+	return false, "", name
+}
+
+func tremQuery(name string) bool {
+	query := `{"query":{"bool":{"must":[{"term":{"` + es_index + `.name":"` + name + `"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}`
+	tmp := make(map[string]interface{})
+	json.Unmarshal([]byte(query), &tmp)
+	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
+	if err != nil {
+		log.Println("从ES查询出错", err.Error(), name)
+		return false
+	} else {
+		data := make(map[string]interface{}, 1)
+		if searchResult.Hits != nil {
+			for _, hit := range searchResult.Hits.Hits {
+				json.Unmarshal(*hit.Source, &data)
+				if data["name"].(string) == name {
+					return true
+				}
+			}
+		}
+	}
+	return false
 }
 
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
@@ -180,11 +184,21 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 		log.Println(string(data))
 	}
 }
-func handleData(datas []string) []string {
-	del := map[int]bool{}
+func handleData(datas []string) string {
 	dataslen := len(datas)
-	if dataslen == 0{
-		return []string{}
+	del := map[int]bool{}
+	if dataslen <= 1 {
+		rstr := strings.Join(datas, ",")
+		return rstr
+	}
+
+	m2 := make(map[string]bool)
+	for i, v := range datas {
+		if m2[v] {
+			del[i] = true
+		} else {
+			m2[v] = true
+		}
 	}
 	for i := 0; i < dataslen; i++ {
 		if !del[i] {
@@ -210,6 +224,128 @@ func handleData(datas []string) []string {
 			m++
 		}
 	}
+	rstr := strings.Join(rdata, ",")
+	return rstr
+}
+
+//定时增量数据处理---冯
+func AddTaskSensitiveWordsData() {
+	defer func() {
+		if err := recover(); err != nil {
+			log.Println("func() addTaskSensitiveWordsData", err)
+		}
+	}()
+
+	mmmgo, err := InitMgoEn("mongodb://172.17.4.187:27082,172.17.145.163:27083", 20, "fengweiqiang", "fwq@123123")
+	if err != nil {
+		log.Fatalln(err)
+	}
+	con := mmmgo.GetCon()
+	if con == nil {
+		log.Fatalln("mgo con err")
+	}
+	tick := time.Tick(time.Hour * 24 * 7) //查询七天前
+	for {                                 //定时任务
+		ctime := <-tick
+		cronData := time.Date(ctime.Year(), ctime.Month(), ctime.Day()-7, ctime.Hour(), ctime.Minute(), ctime.Second(), 0, time.Local)
+		findByupdate, err := con.Database("mixdata").Collection("qyxy_std").Find(nil, bson.M{
+			"updatetime": bson.M{"$gte": cronData.Unix()},
+		}, options.Find().SetProjection(bson.M{"company_name": 1, "updatetime": 1, "company_type": 1, "company_type_old": 1}))
+		if err != nil {
+			log.Println("tick err", cronData)
+			continue
+		}
+		defer findByupdate.Close(nil)
+		for tmp := make(map[string]interface{}); findByupdate.Next(nil); tmp = map[string]interface{}{} {
+			err := findByupdate.Decode(&tmp)
+			if err == nil {
+				if company_name, ok := tmp["company_name"].(string); ok {
+					if reglen.MatchString(company_name) || strReg.MatchString(company_name) ||
+						!uncon_strReg.MatchString(company_name) || !unstart_strReg.MatchString(company_name) ||
+						start_strReg.MatchString(company_name) || end_strReg.MatchString(company_name) ||
+						con_strReg.MatchString(company_name) {
+						continue
+					}
+					if strings.Contains(ObjToString(tmp["company_type"]), "个人") ||
+						strings.Contains(ObjToString(tmp["company_type"]), "个体") ||
+						strings.Contains(ObjToString(tmp["company_type_old"]), "个人") ||
+						strings.Contains(ObjToString(tmp["company_type_old"]), "个体") {
+						continue
+					}
+
+					//存mgo
+					con.Database("mixdata").Collection("unique_qyxy").InsertOne(nil, bson.M{
+						"qy_name": company_name,
+					})
+					//存敏感词
+					//存es=判断+新增
+					dealWithEsData(company_name, BsonTOStringId(tmp["_id"]))
+				}
+			}
+		}
+		log.Println("tick ok", cronData)
+	}
+}
+
+//处理是否新增es
+func dealWithEsData(name string, tmpid string) {
+	query := `{"query":{"bool":{"must":[{"term":{"` + es_index + `.name":"` + name + `"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"facets":{}}`
+	tmp := make(map[string]interface{})
+	json.Unmarshal([]byte(query), &tmp)
+	searchResult, err := Client_Es.Search().Index(es_index).Type(es_type).Source(tmp).Do()
+	if err != nil {
+		log.Println("从ES查询出错", err.Error())
+	} else {
+		data := make(map[string]interface{}, 0)
+		if searchResult.Hits != nil {
+			for _, hit := range searchResult.Hits.Hits {
+				json.Unmarshal(*hit.Source, &data)
+			}
+		}
+		if len(data) == 0 {
+			//log.Println("无数据-新增")
+			_, err := Client_Es.Index().Index(es_index).Type(es_type).Id(tmpid).BodyJson(map[string]interface{}{
+				"name":      name,
+				"name_word": name,
+			}).Do()
+			if err != nil {
+				log.Println("新增失败:", name, tmpid)
+			}
+		}
+	}
 
-	return rdata
 }
+
+var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
+	"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
+	"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
+	"地方金融监督管理局|住房保障和房屋管理局|质量技术监督局|人力资源与社会保障局|公路管理局|国土资源局|" +
+	"卫生和计划生育局|民事政务局|公众安全局|交通管理局|人力资源和社会保障局|劳动和社会保障局|" +
+	"住房和城乡建设局|就业服务局|文物管理局|环境保护局|粮食和物资储备局|教育体育局|" +
+	"体育局|教育局|招商局|农业局|农机局|水务局|林业局|财政局|审计局|统计局|商务局)$")
+var reglen *regexp.Regexp = regexp.MustCompile("^(.{1,5}|.{40,})$")
+var strReg *regexp.Regexp = regexp.MustCompile("^(.{0,3}工程队|.{0,3}总公司|_+|.{0,2}设备安装公司|.{0,2}装[饰修潢]公司|.{0,2}开发公司|.{0,4}有限公司|.{0,4}有限责任公司|.{0,4}设计院|建筑设计研?究?院|省文物考古研究所|经济开发区|省.*|镇人民政府|.{0,2}服务公司|" +
+	".{0,2}工程质量监督站|.{0,3}经[营销]部|.{0,3}事务所|.{0,4}工程公司|.{0,4}责任公司|.*勘测|.{0,4}研究院|.*能源建|.{0,2}安装工程|.*[市省]{1}|.{0,4}中心|.*区.?|" +
+	".{0,3}税务局|.{0,3}财政局|.{0,3}商行|.{0,2}公安处|.{0,2}测绘院|.{0,3}开发|.{0,2}建设局|.{0,2}经销部|.{0,3}委员会|.{0,2}分公司|.{0,2}管理站|.{0,2}事务管理局|" +
+	".*资料|.{0,2}办公用品.{1,2}|.*唯亭|.*设备|.+安装|.{0,2}技术服务|市.+[台院社局司]|城?区.+[府局室院]|县.+[院台局]|.{0,2}发展公司|经济技术开发|" +
+	"发展和改革局|贵州有色地质|铝塑门窗加工|生产力促进中心|特殊普通合伙|工业集团公司|人民调解协会|人民政府办公厅|机电设备公司|房地产开发有限公司|.{0,4}商店|中等专业学校|" +
+	"农村信用联社|.{0,4}经营部|.{0,4}销售部|驾驶员培训学校|.{2}县.{2}镇|保安服务总公司|住房和城乡建设局|地产评估事务所|生产资料门市部|×+|.{0,3}[0-9]{15}|.*[0-9]+|.*路|.*无字号名称.*|.*车|.*[,,]{1}.*|.*个体工商户|.*运输户)$")
+
+//非中文开头...
+var unstart_strReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
+
+//开头
+var start_strReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|省|市|县|区|业绩|资格|中标|项目|预算单位)")
+
+//结尾
+var end_strReg *regexp.Regexp = regexp.MustCompile("(\\.|\\.\\.|餐馆|店|腻子|肉庄|画社|美发屋|发廊|网吧|网咖|零售点|新街|包子铺|奶茶铺|(株)|先生|女士|小姐|" +
+	"资格|业绩|中标|项目|预算单位|摊位号|号|厅|室|部|点|馆|场|厂|床|所|处|站|行|中心|合作社|ATMS|" +
+	"吧|楼|摊|摊位|廊|茶社|坊|圃|汤锅|园|民宿|美容院|房|排挡|府|庄|栈|队|批发|苑|养殖户|棋牌|农家乐|货运|" +
+	"城|社|基地|会|服务|娱乐|种植|百货|汽修|农家菜|亭|小吃|快餐|粮库|卫生院|书画院|面|门窗|鸡排|屋|橱|堂|肉铺|服务|服饰|/*)$")
+
+//包含
+var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|" +
+	"法人|&nbsp|国家拨入|借款|积累资金|单位自有|认股人|--|、|&|`|美元|[\u4e00-\u9fa5]{2,6}·[\u4e00-\u9fa5]{2,6})|" +
+	"[a-zA-Z]{5,}")
+
+var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")