|
@@ -33,7 +33,7 @@ func ExtractUdp() {
|
|
|
eid := "9fffffffffffffffffffffff"
|
|
|
QuerySensitiveWords(sid,eid )*/
|
|
|
}
|
|
|
-
|
|
|
+var syc sync.WaitGroup
|
|
|
func QuerySensitiveWords(sid, eid string) {
|
|
|
log.Println("QuerySensitiveWords:", sid, eid)
|
|
|
objSid, err := primitive.ObjectIDFromHex(sid)
|
|
@@ -46,7 +46,7 @@ func QuerySensitiveWords(sid, eid string) {
|
|
|
log.Println("转换eid err", err)
|
|
|
return
|
|
|
}
|
|
|
- var num, unum int64
|
|
|
+ var num, unum int
|
|
|
mgoSess := QfwMgo85.GetMgoConn()
|
|
|
defer QfwMgo85.DestoryMongoConn(mgoSess)
|
|
|
iter := mgoSess.DB(QfwMgo85.DbName).C(Collection).Find(map[string]interface{}{
|
|
@@ -55,47 +55,56 @@ func QuerySensitiveWords(sid, eid string) {
|
|
|
"$lte": objEid,
|
|
|
},
|
|
|
}).Select(Fields).Iter()
|
|
|
-
|
|
|
+ c := make(chan struct{}, 3)
|
|
|
for tmp := map[string]interface{}{}; iter.Next(&tmp); tmp = map[string]interface{}{} {
|
|
|
- up := make(map[string]string)
|
|
|
- if win, isok := tmp["winner"].(string); isok {
|
|
|
- if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
|
|
|
- tmp["winner"] = fname
|
|
|
- up["winner"] = fmt.Sprintf("%s_%s", flog, win)
|
|
|
- }
|
|
|
+ c <- struct{}{}
|
|
|
+ syc.Add(1)
|
|
|
+ go handletmp(tmp, &unum, c)
|
|
|
+ num++
|
|
|
+ }
|
|
|
+ syc.Wait()
|
|
|
+ log.Printf("%s--->%s 处理完成:%d,更新数:%d\n", sid, eid, num, unum)
|
|
|
+}
|
|
|
+func handletmp(tmp map[string]interface{}, unum *int, c <-chan struct{}) {
|
|
|
+ defer func() {
|
|
|
+ <-c
|
|
|
+ syc.Done()
|
|
|
+ }()
|
|
|
+ up := make(map[string]string)
|
|
|
+ if win, isok := tmp["winner"].(string); isok {
|
|
|
+ if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
|
|
|
+ tmp["winner"] = fname
|
|
|
+ up["winner"] = fmt.Sprintf("%s_%s", flog, win)
|
|
|
}
|
|
|
- if win, isok := tmp["s_winner"].(string); isok {
|
|
|
- if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
|
|
|
- tmp["s_winner"] = fname
|
|
|
- up["s_winner"] = fmt.Sprintf("%s_%s", flog, win)
|
|
|
- }
|
|
|
+ }
|
|
|
+ if win, isok := tmp["s_winner"].(string); isok {
|
|
|
+ if fok, flog, fname := cheakname(win); fok && flog != "" && flog != "tremQuery" {
|
|
|
+ tmp["s_winner"] = fname
|
|
|
+ up["s_winner"] = fmt.Sprintf("%s_%s", flog, win)
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- if agency, isok := tmp["agency"].(string); isok {
|
|
|
- if fok, flog, fname := cheakname(agency); fok && flog != "" && flog != "tremQuery" {
|
|
|
- tmp["agency"] = fname
|
|
|
- up["agency"] = fmt.Sprintf("%s_%s", flog, agency)
|
|
|
- }
|
|
|
+ if agency, isok := tmp["agency"].(string); isok {
|
|
|
+ if fok, flog, fname := cheakname(agency); fok && flog != "" && flog != "tremQuery" {
|
|
|
+ tmp["agency"] = fname
|
|
|
+ up["agency"] = fmt.Sprintf("%s_%s", flog, agency)
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- if buyer, isok := tmp["buyer"].(string); isok {
|
|
|
- if fok, flog, fname := cheakname(buyer); fok && flog != "" && flog != "tremQuery" {
|
|
|
- tmp["buyer"] = fname
|
|
|
- up["buyer"] = fmt.Sprintf("%s_%s", flog, buyer)
|
|
|
- }
|
|
|
+ if buyer, isok := tmp["buyer"].(string); isok {
|
|
|
+ if fok, flog, fname := cheakname(buyer); fok && flog != "" && flog != "tremQuery" {
|
|
|
+ tmp["buyer"] = fname
|
|
|
+ up["buyer"] = fmt.Sprintf("%s_%s", flog, buyer)
|
|
|
}
|
|
|
- if len(up) > 0 {
|
|
|
- unum++
|
|
|
- tmp["log"] = up
|
|
|
- id := tmp["_id"].(primitive.ObjectID).Hex()
|
|
|
- log.Println(tmp)
|
|
|
- QfwMgo85.UpdateById(Collection, id, map[string]interface{}{"$set":tmp})
|
|
|
- }
|
|
|
- num++
|
|
|
}
|
|
|
- log.Printf("%s--->%s 处理完成:%d,更新数:%d\n",sid, eid, num, unum)
|
|
|
+ if len(up) > 0 {
|
|
|
+ *unum++
|
|
|
+ tmp["log"] = up
|
|
|
+ id := tmp["_id"].(primitive.ObjectID).Hex()
|
|
|
+ log.Println(tmp)
|
|
|
+ QfwMgo85.UpdateById(Collection, id, map[string]interface{}{"$set": tmp})
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
func cheakname(name string) (up bool, log, rname string) {
|
|
|
filter := sensitive.New()
|
|
|
var cheaklog string
|
|
@@ -327,13 +336,7 @@ func dealWithEsData(name string, tmpid string) {
|
|
|
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-func TemporaryTest() {
|
|
|
+func TemporaryTest() {
|
|
|
log.Println("测试......导出数据")
|
|
|
|
|
|
|
|
@@ -350,10 +353,9 @@ func TemporaryTest() {
|
|
|
|
|
|
es_type, es_index = "unique_qy", "unique_qy"
|
|
|
|
|
|
-
|
|
|
q := map[string]interface{}{
|
|
|
- "check_history":map[string]interface{}{
|
|
|
- "$exists":0,
|
|
|
+ "check_history": map[string]interface{}{
|
|
|
+ "$exists": 0,
|
|
|
},
|
|
|
}
|
|
|
sess := QfwMgo85.GetMgoConn()
|
|
@@ -362,22 +364,17 @@ func TemporaryTest() {
|
|
|
pool_es := make(chan bool, 3)
|
|
|
wg_es := &sync.WaitGroup{}
|
|
|
it := sess.DB(QfwMgo85.DbName).C("winner_err_new").Find(&q).Iter()
|
|
|
- total,isOK:=0,0
|
|
|
+ total, isOK := 0, 0
|
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total % 100 == 0 {
|
|
|
- log.Println("current index",total,isOK)
|
|
|
- }
|
|
|
-
|
|
|
- if total<91400 {
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- continue
|
|
|
+ if total%100 == 0 {
|
|
|
+ log.Println("current index", total, isOK)
|
|
|
}
|
|
|
- name:=ObjToString(tmp["name"])
|
|
|
+ name := ObjToString(tmp["name"])
|
|
|
tmpid := BsonTOStringId(tmp["_id"])
|
|
|
pool_es <- true
|
|
|
wg_es.Add(1)
|
|
|
|
|
|
- go func(name string,tmpid string) {
|
|
|
+ go func(name string, tmpid string) {
|
|
|
defer func() {
|
|
|
<-pool_es
|
|
|
wg_es.Done()
|
|
@@ -385,34 +382,38 @@ func TemporaryTest() {
|
|
|
//start := int(time.Now().Unix())
|
|
|
new_name,b,score,_ :=dealWithNameScoreRules(name)
|
|
|
//log.Println("耗时:",int(time.Now().Unix())-start,"秒",b,name,new_name,tmpid)
|
|
|
- if new_name!="" && b {
|
|
|
+ if new_name != "" && b {
|
|
|
isOK++
|
|
|
- QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
+ QfwMgo85.UpdateById("winner_err_new", tmpid, map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
"is_word": 1,
|
|
|
"name_word" : new_name,
|
|
|
"score":score,
|
|
|
+
|
|
|
},
|
|
|
})
|
|
|
- }else {
|
|
|
- QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
+ } else {
|
|
|
+ QfwMgo85.UpdateById("winner_err_new", tmpid, map[string]interface{}{
|
|
|
"$set": map[string]interface{}{
|
|
|
+
|
|
|
"is_word": -1,
|
|
|
"name_word" : new_name,
|
|
|
"score":score,
|
|
|
+
|
|
|
},
|
|
|
})
|
|
|
}
|
|
|
|
|
|
- }(name,tmpid)
|
|
|
+ }(name, tmpid)
|
|
|
tmp = make(map[string]interface{})
|
|
|
}
|
|
|
|
|
|
wg_es.Wait()
|
|
|
|
|
|
- log.Println("is over",total,isOK)
|
|
|
+ log.Println("is over", total, isOK)
|
|
|
}
|
|
|
|
|
|
+
|
|
|
func TemporaryTestNewData() {
|
|
|
log.Println("测试......导出新数据")
|
|
|
QfwMgo85 = &MongodbSim{
|
|
@@ -461,6 +462,7 @@ func TemporaryTestNewData() {
|
|
|
|
|
|
|
|
|
|
|
|
+
|
|
|
var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
|
|
|
"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
|
|
|
"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
|
|
@@ -495,11 +497,9 @@ var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删
|
|
|
|
|
|
var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
|
|
|
|
|
|
-
|
|
|
-
|
|
|
var startWordReg_1 *regexp.Regexp = regexp.MustCompile("^(.{1,5})(省|市|县|州|自治区|特别行政区)")
|
|
|
var startWordReg_2 *regexp.Regexp = regexp.MustCompile("^(北京|天津|重庆|上海|河北|山西|" +
|
|
|
"浙江|江西|湖北|吉林|海南|甘肃|广东|陕西|辽宁|山东|河南|云南|黑龙江|福建|贵州|江苏|安徽|" +
|
|
|
"湖南|四川|青海|台湾|新疆|内蒙古|宁夏|西藏|广西|澳门|香港)")
|
|
|
|
|
|
-var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")
|
|
|
+var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")
|