|
@@ -1,304 +0,0 @@
|
|
|
-package main
|
|
|
-
|
|
|
-import (
|
|
|
- "encoding/json"
|
|
|
- "fmt"
|
|
|
- "github.com/tealeg/xlsx"
|
|
|
- "log"
|
|
|
- "os"
|
|
|
- qu "qfw/util"
|
|
|
- "qfw/util/elastic"
|
|
|
- "strings"
|
|
|
- "sync"
|
|
|
- "unicode/utf8"
|
|
|
- "go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
-
|
|
|
-)
|
|
|
-var (
|
|
|
- sysconfig map[string]interface{} //配置文件
|
|
|
- save_mgo *MongodbSim
|
|
|
-)
|
|
|
-
|
|
|
-func init() {
|
|
|
- save_mgo = &MongodbSim{
|
|
|
- MongodbAddr: "192.168.3.207:27092",
|
|
|
- DbName: "zhengkun",
|
|
|
- Size: 5,
|
|
|
- }
|
|
|
- save_mgo.InitPool()
|
|
|
-
|
|
|
- elastic.InitElasticSize("http://192.168.3.11:9800",20)
|
|
|
-}
|
|
|
-
|
|
|
-func dealWithDataXlsx() {
|
|
|
-
|
|
|
- q := map[string]interface{}{}
|
|
|
- sess := save_mgo.GetMgoConn()
|
|
|
- defer save_mgo.DestoryMongoConn(sess)
|
|
|
- it := sess.DB(save_mgo.DbName).C("zk_test_words").Find(&q).Iter()
|
|
|
- total:=0
|
|
|
- saveArr := make([]map[string]string,0)
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total % 10000 == 0 {
|
|
|
- log.Println("current index",total,tmp["_id"])
|
|
|
- }
|
|
|
-
|
|
|
- if total % 30 ==0 {
|
|
|
- name:=qu.ObjToString(tmp["name"])
|
|
|
- dict := make(map[string]string)
|
|
|
- dict["name"] = name
|
|
|
- for i:=0; i<5;i++ {
|
|
|
- value,total,hit :="","",""
|
|
|
- key := "word_"+fmt.Sprintf("%d",i)
|
|
|
- if tmp[key]!=nil {
|
|
|
-
|
|
|
- if arr,ok := tmp[key].(primitive.A);ok {
|
|
|
- dataArr :=qu.ObjArrToMapArr(arr)
|
|
|
- value =qu.ObjToString(dataArr[0]["name"])
|
|
|
- if i!=0 {
|
|
|
- total = fmt.Sprintf("%d",dataArr[0]["all_words"])
|
|
|
- hit = fmt.Sprintf("%d",dataArr[0]["hit_words"])
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
- key1,key2:="total"+fmt.Sprintf("%d",i),"hit"+fmt.Sprintf("%d",i)
|
|
|
- dict[key] = value
|
|
|
- dict[key1] = total
|
|
|
- dict[key2] = hit
|
|
|
-
|
|
|
- }
|
|
|
- saveArr= append(saveArr,dict)
|
|
|
- }
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- os.Remove("words.xlsx") //写excle
|
|
|
- f :=xlsx.NewFile()
|
|
|
-
|
|
|
-
|
|
|
- for i:=0; i<5;i++ {
|
|
|
- key := "word_"+fmt.Sprintf("%d",i)
|
|
|
- sheet, _ := f.AddSheet("统计"+key)
|
|
|
- row := sheet.AddRow()
|
|
|
- row.AddCell().Value = "name"
|
|
|
- row.AddCell().Value = key
|
|
|
- if i!=0 {
|
|
|
- row.AddCell().Value = "total"
|
|
|
- row.AddCell().Value = "hit"
|
|
|
- }
|
|
|
- key1,key2:="total"+fmt.Sprintf("%d",i),"hit"+fmt.Sprintf("%d",i)
|
|
|
-
|
|
|
- for _,tmp := range saveArr {
|
|
|
- row = sheet.AddRow()
|
|
|
- row.AddCell().SetString(tmp["name"])
|
|
|
- row.AddCell().SetString(tmp[key])
|
|
|
- row.AddCell().SetString(fmt.Sprintf("%s",tmp[key1]))
|
|
|
- row.AddCell().SetString(fmt.Sprintf("%s",tmp[key2]))
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- err := f.Save("words.xlsx")
|
|
|
- if err != nil {
|
|
|
- log.Println("保存xlsx失败:", err)
|
|
|
- }else {
|
|
|
- log.Println("保存xlsx成功:", err)
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-func main() {
|
|
|
-
|
|
|
- //导出xlsx
|
|
|
- dealWithDataXlsx()
|
|
|
- return
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- defer qu.Catch()
|
|
|
- log.Println("处理 ... 指定企业名称 ...")
|
|
|
-
|
|
|
- //分析错误数据
|
|
|
- //
|
|
|
- q := map[string]interface{}{}
|
|
|
- sess := save_mgo.GetMgoConn()
|
|
|
- defer save_mgo.DestoryMongoConn(sess)
|
|
|
- //细节才需要遍历
|
|
|
- it := sess.DB(save_mgo.DbName).C("zk_company_test").Find(&q).Iter()
|
|
|
- total:=0
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total % 10000 == 0 {
|
|
|
- log.Println("current index",total,tmp["_id"])
|
|
|
- }
|
|
|
-
|
|
|
- name:=qu.ObjToString(tmp["name"])
|
|
|
- save_dict := make(map[string]interface{},0)
|
|
|
- for i:=0; i<5;i++ {
|
|
|
- key := "word_"+fmt.Sprintf("%d",i)
|
|
|
- dataArr :=dealWithScoreRules(name,i)
|
|
|
- if dataArr ==nil || len(dataArr)<1 {
|
|
|
- //无数据
|
|
|
- }else {
|
|
|
- save_dict[key] = dealWithWordsRules(name,dataArr,i)
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- if len(save_dict)>0 {
|
|
|
- save_dict["name"] = name
|
|
|
- save_mgo.Save("zk_test_words",save_dict)
|
|
|
- }
|
|
|
-
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
-
|
|
|
-}
|
|
|
-
|
|
|
-//分数维度
|
|
|
-func dealWithScoreRules(name string,space int) []map[string]interface{} {
|
|
|
- key := ""
|
|
|
- if space>0&&space<5{
|
|
|
- key = fmt.Sprintf("%d",space)
|
|
|
- }
|
|
|
- query:= `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_`+key+`","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":3,"sort":[],"facets":{}}`
|
|
|
-
|
|
|
- if key=="" {
|
|
|
- query = `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":3,"sort":[],"facets":{}}`
|
|
|
- }
|
|
|
- client := elastic.GetEsConn()
|
|
|
- defer elastic.DestoryEsConn(client)
|
|
|
- searchResult, err := client.Search().Index("azktest").Type("azktest").Source(query).Do()
|
|
|
- if err != nil {
|
|
|
- log.Println("从ES查询出错", err.Error())
|
|
|
- return nil
|
|
|
- }
|
|
|
- resNum := len(searchResult.Hits.Hits)
|
|
|
- res := make([]map[string]interface{}, resNum)
|
|
|
- if searchResult.Hits != nil {
|
|
|
- if resNum < 5000 {
|
|
|
- for i, hit := range searchResult.Hits.Hits {
|
|
|
- data := make(map[string]interface{},0)
|
|
|
- json.Unmarshal(*hit.Source, &data)
|
|
|
- res[i] = map[string]interface{}{
|
|
|
- "name":data["name"],
|
|
|
- "score":*hit.Score,
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- log.Println("查询结果太多,查询到:", resNum, "条")
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
- return res
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-//击中数量以及比例
|
|
|
-func dealWithWordsRules(name string ,source []map[string]interface{},space int) []map[string]interface{} {
|
|
|
-
|
|
|
- nameArr,_ := calculateWordCount(name,space)
|
|
|
- newArr := make([]map[string]interface{},0)
|
|
|
- for _,v := range source {
|
|
|
- total,hit :=0,0
|
|
|
- source_name :=qu.ObjToString(v["name"])
|
|
|
- _,total = calculateWordCount(source_name,space)
|
|
|
- for _,v1 := range nameArr {
|
|
|
- if strings.Contains(source_name,v1) {
|
|
|
- hit++
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- if space==0 {
|
|
|
- newArr = append(newArr, map[string]interface{}{
|
|
|
- "name":source_name,
|
|
|
- "score":qu.Float64All(v["score"]),
|
|
|
- })
|
|
|
- }else {
|
|
|
- newArr = append(newArr, map[string]interface{}{
|
|
|
- "name":source_name,
|
|
|
- "score":qu.Float64All(v["score"]),
|
|
|
- "all_words" : total,
|
|
|
- "hit_words" : hit,
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
- return newArr
|
|
|
-}
|
|
|
-
|
|
|
-//分词结果
|
|
|
-func calculateWordCount(name string,space int) ([]string,int) {
|
|
|
- arr := make([]string,0)
|
|
|
- total := utf8.RuneCountInString(name)-(space-1)
|
|
|
- if name == "" || space<=0 || total<=0 {
|
|
|
- return arr,0
|
|
|
- }
|
|
|
- nameRune := []rune(name)
|
|
|
- for i:=0;i<total ;i++ {
|
|
|
- new_str := string(nameRune[i:space+i])
|
|
|
- arr = append(arr,new_str)
|
|
|
- }
|
|
|
- return arr,len(arr)
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-func readyDataEs() {
|
|
|
-
|
|
|
- q := map[string]interface{}{}
|
|
|
- sess := save_mgo.GetMgoConn()
|
|
|
- defer save_mgo.DestoryMongoConn(sess)
|
|
|
- //多线程升索引
|
|
|
- pool_es := make(chan bool, 10)
|
|
|
- wg_es := &sync.WaitGroup{}
|
|
|
- //细节才需要遍历
|
|
|
- it := sess.DB(save_mgo.DbName).C("zk_company_name").Find(&q).Iter()
|
|
|
- total:=0
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total % 10000 == 0 {
|
|
|
- log.Println("current index",total,tmp["_id"])
|
|
|
- }
|
|
|
- savetmp := make(map[string]interface{}, 0)
|
|
|
- savetmp["_id"] = tmp["_id"]
|
|
|
- savetmp["name"] = qu.ObjToString(tmp["company_name"])
|
|
|
- savetmp["name_1"] = qu.ObjToString(tmp["company_name"])
|
|
|
- savetmp["name_2"] = qu.ObjToString(tmp["company_name"])
|
|
|
- savetmp["name_3"] = qu.ObjToString(tmp["company_name"])
|
|
|
- savetmp["name_4"] = qu.ObjToString(tmp["company_name"])
|
|
|
- pool_es <- true
|
|
|
- wg_es.Add(1)
|
|
|
- go func(savetmp map[string]interface{}) {
|
|
|
- defer func() {
|
|
|
- <-pool_es
|
|
|
- wg_es.Done()
|
|
|
- }()
|
|
|
- elastic.Save("azktest","azktest", savetmp)
|
|
|
- }(savetmp)
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
- wg_es.Wait()
|
|
|
-
|
|
|
-
|
|
|
- log.Println("is over",total)
|
|
|
-}
|
|
|
-
|