|
@@ -7,10 +7,13 @@ import (
|
|
|
"go.mongodb.org/mongo-driver/bson"
|
|
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
"go.mongodb.org/mongo-driver/mongo/options"
|
|
|
+ "gopkg.in/olivere/elastic.v1"
|
|
|
"log"
|
|
|
"net"
|
|
|
+ "net/http"
|
|
|
"regexp"
|
|
|
"strings"
|
|
|
+ "sync"
|
|
|
"time"
|
|
|
)
|
|
|
|
|
@@ -316,6 +319,95 @@ func dealWithEsData(name string, tmpid string) {
|
|
|
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+func TemporaryTest() {
|
|
|
+ log.Println("测试......导出数据")
|
|
|
+
|
|
|
+ QfwMgo85 = &MongodbSim{
|
|
|
+ MongodbAddr: "172.17.4.187:27082,172.17.145.163:27083",
|
|
|
+ Size: 5,
|
|
|
+ DbName: "mixdata",
|
|
|
+ UserName: "fengweiqiang",
|
|
|
+ PassWord: "fwq@123123",
|
|
|
+ }
|
|
|
+ QfwMgo85.InitPool()
|
|
|
+
|
|
|
+ Client_Es, _ = elastic.NewClient(http.DefaultClient, "http://ela.spdata.jianyu360.com")
|
|
|
+
|
|
|
+ es_type, es_index = "unique_qy", "unique_qy"
|
|
|
+
|
|
|
+
|
|
|
+ q := map[string]interface{}{
|
|
|
+ "check_history":map[string]interface{}{
|
|
|
+ "$exists":0,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ sess := QfwMgo85.GetMgoConn()
|
|
|
+ defer QfwMgo85.DestoryMongoConn(sess)
|
|
|
+ //多线程升索引
|
|
|
+ pool_es := make(chan bool, 3)
|
|
|
+ wg_es := &sync.WaitGroup{}
|
|
|
+ it := sess.DB(QfwMgo85.DbName).C("winner_err_new").Find(&q).Iter()
|
|
|
+ total,isOK:=0,0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total % 100 == 0 {
|
|
|
+ log.Println("current index",total,isOK)
|
|
|
+ }
|
|
|
+
|
|
|
+ name:=ObjToString(tmp["name"])
|
|
|
+ tmpid := BsonTOStringId(tmp["_id"])
|
|
|
+ pool_es <- true
|
|
|
+ wg_es.Add(1)
|
|
|
+
|
|
|
+ go func(name string,tmpid string) {
|
|
|
+ defer func() {
|
|
|
+ <-pool_es
|
|
|
+ wg_es.Done()
|
|
|
+ }()
|
|
|
+ //start := int(time.Now().Unix())
|
|
|
+ new_name,b,_ :=dealWithNameScoreRules(name)
|
|
|
+ //log.Println("耗时:",int(time.Now().Unix())-start,"秒",b,name,new_name,tmpid)
|
|
|
+ if new_name!="" && b {
|
|
|
+ isOK++
|
|
|
+ QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
+ "$set": map[string]interface{}{
|
|
|
+ "is_word": 1,
|
|
|
+ "name_word" : new_name,
|
|
|
+ },
|
|
|
+ })
|
|
|
+ }else {
|
|
|
+ QfwMgo85.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
+ "$set": map[string]interface{}{
|
|
|
+ "is_word": -1,
|
|
|
+ "name_word" : new_name,
|
|
|
+ },
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ }(name,tmpid)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+
|
|
|
+ wg_es.Wait()
|
|
|
+
|
|
|
+ log.Println("is over",total,isOK)
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
|
|
|
"中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
|
|
|
"监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
|
|
@@ -349,3 +441,12 @@ var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删
|
|
|
"[a-zA-Z]{5,}")
|
|
|
|
|
|
var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+var startWordReg_1 *regexp.Regexp = regexp.MustCompile("^(.{1,5})(省|市|县|州|自治区|特别行政区)")
|
|
|
+var startWordReg_2 *regexp.Regexp = regexp.MustCompile("^(北京|天津|重庆|上海|河北|山西|" +
|
|
|
+ "浙江|江西|湖北|吉林|海南|甘肃|广东|陕西|辽宁|山东|河南|云南|黑龙江|福建|贵州|江苏|安徽|" +
|
|
|
+ "湖南|四川|青海|台湾|新疆|内蒙古|宁夏|西藏|广西|澳门|香港)")
|
|
|
+
|
|
|
+var endWordReg *regexp.Regexp = regexp.MustCompile("(有限公司|有限责任公司)$")
|