|
@@ -11,7 +11,6 @@ import (
|
|
|
"runtime"
|
|
|
"sensitiveWords.udp/util"
|
|
|
"strings"
|
|
|
- "sync"
|
|
|
"time"
|
|
|
)
|
|
|
|
|
@@ -193,10 +192,23 @@ func dealWithEsData(name string,tmpid string) {
|
|
|
|
|
|
//处理内存分段
|
|
|
func dealWithDataMemory() {
|
|
|
- iter := MixDataMgo.GetMgoConn().C("unique_qyxy").Find(map[string]interface{}{
|
|
|
+
|
|
|
+ //临时测试
|
|
|
+ MixDataMgo = &util.MongodbSim{
|
|
|
+ MongodbAddr: "172.17.4.187:27082,172.17.145.163:27083",
|
|
|
+ Size: 20,
|
|
|
+ DbName: "mixdata",
|
|
|
+ UserName: "fengweiqiang",
|
|
|
+ PassWord: "fwq@123123",
|
|
|
+ }
|
|
|
+ MixDataMgo.InitPool()
|
|
|
+
|
|
|
+ sess := MixDataMgo.GetMgoConn()
|
|
|
+ defer MixDataMgo.DestoryMongoConn(sess)
|
|
|
+ iter := sess.DB(MixDataMgo.DbName).C("unique_qyxy").Find(map[string]interface{}{
|
|
|
"_id": map[string]interface{}{
|
|
|
- "$gte": util.BsonTOStringId("1fffffffffffffffffffffff"),
|
|
|
- "$lte": util.BsonTOStringId("9fffffffffffffffffffffff"),
|
|
|
+ "$gte": util.StringTOBsonId("1fffffffffffffffffffffff"),
|
|
|
+ "$lte": util.StringTOBsonId("9fffffffffffffffffffffff"),
|
|
|
},
|
|
|
}).Sort("_id").Iter()
|
|
|
Filter = sensitive.New()
|
|
@@ -209,22 +221,22 @@ func dealWithDataMemory() {
|
|
|
}
|
|
|
Filter.AddWord(tmp["qy_name"].(string))
|
|
|
initnum++
|
|
|
- if initnum%100000==0 {
|
|
|
+ if initnum%50000==0 {
|
|
|
runtime.ReadMemStats(&m)
|
|
|
men :=util.ToMegaBytes(m.HeapAlloc)
|
|
|
log.Printf("current index %d\tos %.2f M",initnum, men)
|
|
|
- if men>5*1024 { //7.5G
|
|
|
+ if men>7.5*1024 { //7.5G
|
|
|
saveIdArr = append(saveIdArr, map[string]string{
|
|
|
"start":start_id,
|
|
|
"end":util.BsonTOStringId(tmp["_id"]),
|
|
|
})
|
|
|
- runtime.GC()
|
|
|
+ log.Println("分段:",start_id,util.BsonTOStringId(tmp["_id"]),men)
|
|
|
Filter = sensitive.New()
|
|
|
+ runtime.GC()
|
|
|
start_id = ""
|
|
|
- time.Sleep(time.Second*5)
|
|
|
+ time.Sleep(time.Second*30)
|
|
|
}
|
|
|
}
|
|
|
- break
|
|
|
}
|
|
|
|
|
|
saveIdArr = append(saveIdArr, map[string]string{
|
|
@@ -252,49 +264,35 @@ func temporaryTest() {
|
|
|
}
|
|
|
sess := MixDataMgo.GetMgoConn()
|
|
|
defer MixDataMgo.DestoryMongoConn(sess)
|
|
|
- //多线程升索引
|
|
|
- pool_es := make(chan bool, 20)
|
|
|
- wg_es := &sync.WaitGroup{}
|
|
|
it := sess.DB(MixDataMgo.DbName).C("winner_err_new").Find(&q).Iter()
|
|
|
total,isOK:=0,0
|
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total % 1000 == 0 {
|
|
|
+ if total % 100 == 0 {
|
|
|
log.Println("current index",total,isOK)
|
|
|
}
|
|
|
|
|
|
name:=util.ObjToString(tmp["name"])
|
|
|
tmpid := util.BsonTOStringId(tmp["_id"])
|
|
|
- pool_es <- true
|
|
|
- wg_es.Add(1)
|
|
|
-
|
|
|
- go func(name string,tmpid string) {
|
|
|
- defer func() {
|
|
|
- <-pool_es
|
|
|
- wg_es.Done()
|
|
|
- }()
|
|
|
- new_name,b :=dealWithNameScoreRules(name)
|
|
|
- //log.Println(b,name,new_name,tmpid)
|
|
|
- if new_name!="" && b {
|
|
|
- isOK++
|
|
|
- MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "is_word": 1,
|
|
|
- "name_word" : new_name,
|
|
|
- },
|
|
|
- })
|
|
|
- }else {
|
|
|
- MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
- "$set": map[string]interface{}{
|
|
|
- "is_word": -1,
|
|
|
- "name_word" : new_name,
|
|
|
- },
|
|
|
- })
|
|
|
- }
|
|
|
- }(name,tmpid)
|
|
|
+ new_name,b :=dealWithNameScoreRules(name)
|
|
|
+ if new_name!="" && b {
|
|
|
+ isOK++
|
|
|
+ MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
+ "$set": map[string]interface{}{
|
|
|
+ "is_word": 1,
|
|
|
+ "name_word" : new_name,
|
|
|
+ },
|
|
|
+ })
|
|
|
+ }else {
|
|
|
+ MixDataMgo.UpdateById("winner_err_new",tmpid,map[string]interface{}{
|
|
|
+ "$set": map[string]interface{}{
|
|
|
+ "is_word": -1,
|
|
|
+ "name_word" : new_name,
|
|
|
+ },
|
|
|
+ })
|
|
|
+ }
|
|
|
tmp = make(map[string]interface{})
|
|
|
}
|
|
|
|
|
|
- wg_es.Wait()
|
|
|
|
|
|
log.Println("is over",total,isOK)
|
|
|
}
|