|
@@ -0,0 +1,375 @@
|
|
|
+package main
|
|
|
+
|
|
|
+import (
|
|
|
+ "bufio"
|
|
|
+ "fmt"
|
|
|
+ log "github.com/donnie4w/go-logger/logger"
|
|
|
+ "io"
|
|
|
+ "os"
|
|
|
+ qu "qfw/util"
|
|
|
+ "regexp"
|
|
|
+ "strings"
|
|
|
+ "sync"
|
|
|
+)
|
|
|
+var numberlock sync.Mutex
|
|
|
+
|
|
|
+var strReg *regexp.Regexp = regexp.MustCompile(".*(镇|乡|县|区|市|省|旗|州|师)")
|
|
|
+var startReg *regexp.Regexp = regexp.MustCompile("^[委)) ][^员]")
|
|
|
+var startReg_1 *regexp.Regexp = regexp.MustCompile("^[委)) ]")
|
|
|
+var endReg *regexp.Regexp = regexp.MustCompile("(医院)$")
|
|
|
+
|
|
|
+//修复全量判重数据
|
|
|
+func repairfullrepeatdata() {
|
|
|
+ sess := save_mgo.GetMgoConn()
|
|
|
+ defer save_mgo.DestoryMongoConn(sess)
|
|
|
+ q := map[string]interface{}{}
|
|
|
+ it := sess.DB(save_mgo.DbName).C("zktes_full_repeat").Find(&q).Sort("_id").Iter()
|
|
|
+ pool := make(chan bool, 1)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ total := 0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total %10000 == 0 {
|
|
|
+ log.Debug("cur index ", total,tmp["_id"])
|
|
|
+ }
|
|
|
+ pool <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-pool
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+
|
|
|
+ tmpid := BsonTOStringId(tmp["_id"])
|
|
|
+ repeat_id :=qu.ObjToString(tmp["repeat_id"])
|
|
|
+ reason := "全量~"+qu.ObjToString(tmp["reason"])
|
|
|
+ update := map[string]interface{}{
|
|
|
+ "repeat_id":repeat_id,
|
|
|
+ "reason":reason,
|
|
|
+ "repeat":1,
|
|
|
+ }
|
|
|
+ save_mgo.UpdateById("result_20220218",tmpid,map[string]interface{}{
|
|
|
+ "$set": update,
|
|
|
+ })
|
|
|
+ }(tmp)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ log.Debug("is over ",total)
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+//计算数据分布情况
|
|
|
+func calculatedatadisposition() {
|
|
|
+
|
|
|
+ sess := save_mgo.GetMgoConn()
|
|
|
+ defer save_mgo.DestoryMongoConn(sess)
|
|
|
+ q := map[string]interface{}{}
|
|
|
+ it := sess.DB(save_mgo.DbName).C("special_enterprise").Find(&q).Sort("_id").Select(map[string]interface{}{
|
|
|
+ "company_name":1,
|
|
|
+ "authority":1,
|
|
|
+ }).Iter()
|
|
|
+ pool := make(chan bool, 8)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ total := 0
|
|
|
+ dict := map[string]int{}
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total %100000 == 0 {
|
|
|
+ log.Debug("cur index ", total,len(dict))
|
|
|
+ }
|
|
|
+ pool <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-pool
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ company_name := qu.ObjToString(tmp["company_name"])
|
|
|
+ new_str := strReg.ReplaceAllString(company_name,"")
|
|
|
+ if strings.Contains(new_str,"委员会办公室") {
|
|
|
+ new_str = "委员会办公室"
|
|
|
+ }else {
|
|
|
+ if strings.Contains(new_str,"办公室") {
|
|
|
+ new_str = "办公室"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if startReg.MatchString(new_str) {
|
|
|
+ new_str = startReg_1.ReplaceAllString(new_str,"")
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ if endReg.MatchString(new_str) {
|
|
|
+ new_str = "医院"
|
|
|
+ }
|
|
|
+
|
|
|
+ numberlock.Lock()
|
|
|
+ num := dict[new_str]
|
|
|
+ dict[new_str] = num+1
|
|
|
+ numberlock.Unlock()
|
|
|
+ }(tmp)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ log.Debug("is over ",total,len(dict))
|
|
|
+
|
|
|
+ rankArr := []int{}
|
|
|
+ nameArr := []string{}
|
|
|
+ for k,v := range dict {
|
|
|
+ if v<1000 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ nameArr = append(nameArr,k)
|
|
|
+ rankArr = append(rankArr,qu.IntAll(v))
|
|
|
+ }
|
|
|
+ rankArr,nameArr = dealWithSort(rankArr,nameArr)
|
|
|
+ log.Debug(len(rankArr),len(nameArr))
|
|
|
+ //列取的数据
|
|
|
+ for k,v := range rankArr {
|
|
|
+ //if k>=50 {
|
|
|
+ // break
|
|
|
+ //}
|
|
|
+ fmt.Println(v,"~",nameArr[k])
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+//处理字典排序
|
|
|
+func dealWithSort(slice []int , name []string) ([]int,[]string) {
|
|
|
+ for n := 0; n <= len(slice); n++ {
|
|
|
+ for i := 1; i < len(slice)-n; i++ {
|
|
|
+ if slice[i] > slice[i-1] {
|
|
|
+ slice[i], slice[i-1] = slice[i-1], slice[i]
|
|
|
+ name[i], name[i-1] = name[i-1], name[i]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return slice,name
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+//分析可修复buyer数量
|
|
|
+func statisticalbuyererr() {
|
|
|
+ coll_arr := []string{"special_enterprise","special_foundation","special_gov_unit","special_law_office","special_social_organ","special_trade_union"}
|
|
|
+ q := map[string]interface{}{}
|
|
|
+ pool := make(chan bool, 5)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ sess := qy_mgo.GetMgoConn()
|
|
|
+ defer qy_mgo.DestoryMongoConn(sess)
|
|
|
+ it := sess.DB(qy_mgo.DbName).C("buyer_err").Find(&q).Sort("_id").Select(map[string]interface{}{
|
|
|
+ "name":1,
|
|
|
+ }).Iter()
|
|
|
+ total,isok,isok1,isok2 := 0,0,0,0
|
|
|
+ iserr :=0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total %10000 == 0 {
|
|
|
+ log.Debug("cur index ", total, isok ,isok1, isok2)
|
|
|
+ }
|
|
|
+ if iserr > 30000 {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ pool <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-pool
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ name := qu.ObjToString(tmp["name"])
|
|
|
+ query := map[string]interface{}{
|
|
|
+ "company_name":name,
|
|
|
+ }
|
|
|
+ if name!="" {
|
|
|
+ //for _,v := range coll_arr{
|
|
|
+ // //位置181~对比特殊企业
|
|
|
+ // data_yx := spi_mgo.FindOne(v, query)
|
|
|
+ // if data_yx!=nil && len(data_yx)>2 {
|
|
|
+ // numberlock.Lock()
|
|
|
+ // isok2++
|
|
|
+ // numberlock.Unlock()
|
|
|
+ // break
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ isExists:= false
|
|
|
+ data_yy := save_mgo.FindOne("data_info", map[string]interface{}{
|
|
|
+ "name":name,
|
|
|
+ })
|
|
|
+ if data_yy!=nil && len(data_yy)>2 {
|
|
|
+ numberlock.Lock()
|
|
|
+ isok++
|
|
|
+ isExists = true
|
|
|
+ numberlock.Unlock()
|
|
|
+ } else {
|
|
|
+ //对比企业
|
|
|
+ data_qy := qy_mgo.FindOne("qyxy_std", query)
|
|
|
+ if data_qy!=nil && len(data_qy)>2 {
|
|
|
+ numberlock.Lock()
|
|
|
+ isok1++
|
|
|
+ isExists = true
|
|
|
+ numberlock.Unlock()
|
|
|
+ }else {
|
|
|
+ for _,v := range coll_arr{
|
|
|
+ //位置181~对比特殊企业
|
|
|
+ data_yx := spi_mgo.FindOne(v, query)
|
|
|
+ if data_yx!=nil && len(data_yx)>2 {
|
|
|
+ numberlock.Lock()
|
|
|
+ isok2++
|
|
|
+ isExists = true
|
|
|
+ numberlock.Unlock()
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ if !isExists {
|
|
|
+ numberlock.Lock()
|
|
|
+ iserr++
|
|
|
+ if iserr%100==0 {
|
|
|
+ fmt.Println(name)
|
|
|
+ }
|
|
|
+ numberlock.Unlock()
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ }(tmp)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+
|
|
|
+ log.Debug("最终修复数据~",total,"~",isok,"~",isok1,"~",isok2)
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+//分析医械数据~已存在情况
|
|
|
+func statisticalyxdata() {
|
|
|
+ coll_arr := []string{"special_enterprise","special_foundation","special_gov_unit","special_law_office","special_social_organ","special_trade_union"}
|
|
|
+ q := map[string]interface{}{}
|
|
|
+ pool := make(chan bool, 5)
|
|
|
+ wg := &sync.WaitGroup{}
|
|
|
+ //循环遍历数据
|
|
|
+ for _,v := range coll_arr {
|
|
|
+ sess := save_mgo.GetMgoConn()
|
|
|
+ defer save_mgo.DestoryMongoConn(sess)
|
|
|
+ it := sess.DB(save_mgo.DbName).C(v).Find(&q).Sort("_id").Select(map[string]interface{}{
|
|
|
+ "company_name":1,
|
|
|
+ }).Iter()
|
|
|
+ total,isok := 0,0
|
|
|
+ for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
+ if total %100000 == 0 {
|
|
|
+ log.Debug("cur index ",total,isok)
|
|
|
+ }
|
|
|
+ pool <- true
|
|
|
+ wg.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-pool
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ company_name := qu.ObjToString(tmp["company_name"])
|
|
|
+ query := map[string]interface{}{
|
|
|
+ "company_name":company_name,
|
|
|
+ }
|
|
|
+ if company_name!="" {
|
|
|
+ data := qy_mgo.FindOne("qyxy_std", query)
|
|
|
+ if data!=nil && len(data)>2 {
|
|
|
+ numberlock.Lock()
|
|
|
+ isok++
|
|
|
+ numberlock.Unlock()
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }(tmp)
|
|
|
+ tmp = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ log.Debug(v,"~",total,"~",isok)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+//临时导入mgo数据,源txt
|
|
|
+func importYLQXMgoData(path string) {
|
|
|
+ var startReg = regexp.MustCompile("^(.{0,10}[(])")
|
|
|
+ var endReg = regexp.MustCompile("[)]$")
|
|
|
+ var spaceReg = regexp.MustCompile(",[ ]{2,}")
|
|
|
+ fileHanle,err := os.OpenFile(path, os.O_RDONLY, 0666)
|
|
|
+ if err != nil {
|
|
|
+ log.Debug("读取文件~异常")
|
|
|
+ return
|
|
|
+ }
|
|
|
+ defer fileHanle.Close()
|
|
|
+ reader := bufio.NewReader(fileHanle)
|
|
|
+ var results []string
|
|
|
+ pre_str := ""
|
|
|
+ for {
|
|
|
+ line, _, err := reader.ReadLine()
|
|
|
+ if err == io.EOF {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ str := string(line)
|
|
|
+ if str=="GO" || str=="VALUES" {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if len(results)==0 && strings.Contains(str,"INSERT"){
|
|
|
+ str = strings.ReplaceAll(str,"INSERT INTO","")
|
|
|
+ arr := strings.Split(str,"(")
|
|
|
+ s_1 := strings.ReplaceAll(arr[0]," ","")
|
|
|
+ s_2 := strings.ReplaceAll(arr[1]," ","")
|
|
|
+ s_2 = strings.ReplaceAll(s_2,")","")
|
|
|
+ results = append(results, s_1)
|
|
|
+ results = append(results, s_2)
|
|
|
+ }else {
|
|
|
+ if strings.Contains(str,"INSERT") {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if strings.Contains(str,"VALUES") {
|
|
|
+ if pre_str!="" { //处理字符串
|
|
|
+ pre_str = startReg.ReplaceAllString(pre_str, "")
|
|
|
+ pre_str = endReg.ReplaceAllString(pre_str, "")
|
|
|
+ pre_str = spaceReg.ReplaceAllString(pre_str, ",")
|
|
|
+ pre_str = strings.ReplaceAll(pre_str, "\n", ",")
|
|
|
+ results = append(results, pre_str)
|
|
|
+ }
|
|
|
+ pre_str = str
|
|
|
+ }else {
|
|
|
+ pre_str += str
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ iserr := 0
|
|
|
+ coll := ""
|
|
|
+ keyArr := []string{}
|
|
|
+ for k,v := range results{
|
|
|
+ if k==0 {
|
|
|
+ coll = v
|
|
|
+ }else if k==1 {
|
|
|
+ keyArr = strings.Split(v,",")
|
|
|
+ }else {
|
|
|
+ arr := strings.Split(v,", ")
|
|
|
+ if len(arr)!= len(keyArr) {
|
|
|
+ iserr++
|
|
|
+ }else {
|
|
|
+ dict := make(map[string]interface{},0)
|
|
|
+ for k,v := range keyArr {
|
|
|
+ str := arr[k]
|
|
|
+ str = strings.ReplaceAll(str,"'","")
|
|
|
+ dict[v] = str
|
|
|
+ }
|
|
|
+ save_mgo.Save(coll,dict)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ log.Debug("最终解析:",coll,"~",len(results),"~",iserr)
|
|
|
+
|
|
|
+}
|
|
|
+
|