package main import ( "encoding/json" "fmt" "github.com/tealeg/xlsx" "log" "os" qu "qfw/util" "qfw/util/elastic" "strings" "sync" "unicode/utf8" "go.mongodb.org/mongo-driver/bson/primitive" ) var ( sysconfig map[string]interface{} //配置文件 save_mgo *MongodbSim ) func init() { save_mgo = &MongodbSim{ MongodbAddr: "192.168.3.207:27092", DbName: "zhengkun", Size: 5, } save_mgo.InitPool() elastic.InitElasticSize("http://192.168.3.11:9800",20) } func dealWithDataXlsx() { q := map[string]interface{}{} sess := save_mgo.GetMgoConn() defer save_mgo.DestoryMongoConn(sess) it := sess.DB(save_mgo.DbName).C("zk_test_words").Find(&q).Iter() total:=0 saveArr := make([]map[string]string,0) for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total % 10000 == 0 { log.Println("current index",total,tmp["_id"]) } if total % 30 ==0 { name:=qu.ObjToString(tmp["name"]) dict := make(map[string]string) dict["name"] = name for i:=0; i<5;i++ { value,total,hit :="","","" key := "word_"+fmt.Sprintf("%d",i) if tmp[key]!=nil { if arr,ok := tmp[key].(primitive.A);ok { dataArr :=qu.ObjArrToMapArr(arr) value =qu.ObjToString(dataArr[0]["name"]) if i!=0 { total = fmt.Sprintf("%d",dataArr[0]["all_words"]) hit = fmt.Sprintf("%d",dataArr[0]["hit_words"]) } } } key1,key2:="total"+fmt.Sprintf("%d",i),"hit"+fmt.Sprintf("%d",i) dict[key] = value dict[key1] = total dict[key2] = hit } saveArr= append(saveArr,dict) } tmp = make(map[string]interface{}) } os.Remove("words.xlsx") //写excle f :=xlsx.NewFile() for i:=0; i<5;i++ { key := "word_"+fmt.Sprintf("%d",i) sheet, _ := f.AddSheet("统计"+key) row := sheet.AddRow() row.AddCell().Value = "name" row.AddCell().Value = key if i!=0 { row.AddCell().Value = "total" row.AddCell().Value = "hit" } key1,key2:="total"+fmt.Sprintf("%d",i),"hit"+fmt.Sprintf("%d",i) for _,tmp := range saveArr { row = sheet.AddRow() row.AddCell().SetString(tmp["name"]) row.AddCell().SetString(tmp[key]) row.AddCell().SetString(fmt.Sprintf("%s",tmp[key1])) row.AddCell().SetString(fmt.Sprintf("%s",tmp[key2])) } } err := f.Save("words.xlsx") if err != nil { log.Println("保存xlsx失败:", err) }else { log.Println("保存xlsx成功:", err) } } func main() { //导出xlsx dealWithDataXlsx() return defer qu.Catch() log.Println("处理 ... 指定企业名称 ...") //分析错误数据 // q := map[string]interface{}{} sess := save_mgo.GetMgoConn() defer save_mgo.DestoryMongoConn(sess) //细节才需要遍历 it := sess.DB(save_mgo.DbName).C("zk_company_test").Find(&q).Iter() total:=0 for tmp := make(map[string]interface{}); it.Next(&tmp); total++ { if total % 10000 == 0 { log.Println("current index",total,tmp["_id"]) } name:=qu.ObjToString(tmp["name"]) save_dict := make(map[string]interface{},0) for i:=0; i<5;i++ { key := "word_"+fmt.Sprintf("%d",i) dataArr :=dealWithScoreRules(name,i) if dataArr ==nil || len(dataArr)<1 { //无数据 }else { save_dict[key] = dealWithWordsRules(name,dataArr,i) } } if len(save_dict)>0 { save_dict["name"] = name save_mgo.Save("zk_test_words",save_dict) } tmp = make(map[string]interface{}) } } //分数维度 func dealWithScoreRules(name string,space int) []map[string]interface{} { key := "" if space>0&&space<5{ key = fmt.Sprintf("%d",space) } query:= `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name_`+key+`","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":3,"sort":[],"facets":{}}` if key=="" { query = `{"query":{"bool":{"must":[{"query_string":{"default_field":"azktest.name","query":"`+name+`"}}],"must_not":[],"should":[]}},"from":0,"size":3,"sort":[],"facets":{}}` } client := elastic.GetEsConn() defer elastic.DestoryEsConn(client) searchResult, err := client.Search().Index("azktest").Type("azktest").Source(query).Do() if err != nil { log.Println("从ES查询出错", err.Error()) return nil } resNum := len(searchResult.Hits.Hits) res := make([]map[string]interface{}, resNum) if searchResult.Hits != nil { if resNum < 5000 { for i, hit := range searchResult.Hits.Hits { data := make(map[string]interface{},0) json.Unmarshal(*hit.Source, &data) res[i] = map[string]interface{}{ "name":data["name"], "score":*hit.Score, } } } else { log.Println("查询结果太多,查询到:", resNum, "条") } } return res } //击中数量以及比例 func dealWithWordsRules(name string ,source []map[string]interface{},space int) []map[string]interface{} { nameArr,_ := calculateWordCount(name,space) newArr := make([]map[string]interface{},0) for _,v := range source { total,hit :=0,0 source_name :=qu.ObjToString(v["name"]) _,total = calculateWordCount(source_name,space) for _,v1 := range nameArr { if strings.Contains(source_name,v1) { hit++ } } if space==0 { newArr = append(newArr, map[string]interface{}{ "name":source_name, "score":qu.Float64All(v["score"]), }) }else { newArr = append(newArr, map[string]interface{}{ "name":source_name, "score":qu.Float64All(v["score"]), "all_words" : total, "hit_words" : hit, }) } } return newArr } //分词结果 func calculateWordCount(name string,space int) ([]string,int) { arr := make([]string,0) total := utf8.RuneCountInString(name)-(space-1) if name == "" || space<=0 || total<=0 { return arr,0 } nameRune := []rune(name) for i:=0;i