浏览代码

test 统计抽取字段质量 分析

apple 5 年之前
父节点
当前提交
6dff439250
共有 2 个文件被更改,包括 531 次插入237 次删除
  1. 531 237
      udpprojectset/src/heavy_test.go
  2. 二进制
      udpprojectset/src/zheng_test.xlsx

+ 531 - 237
udpprojectset/src/heavy_test.go

@@ -8,15 +8,261 @@ import (
 	"qfw/util/mongodb"
 	"sync"
 	"testing"
-	"time"
 )
 
 var (
 	mgo          *mongodb.MongodbSim    //mongodb操作对象
+	//mgo_copy          *mongodb.MongodbSim    //mongodb操作对象
 )
 
 
-func Test_heavy(t *testing.T) {
+//分类爬虫抽取统计
+func Test_crawlerExtractitCompare(t *testing.T) {
+
+	mgo = &mongodb.MongodbSim{
+		MongodbAddr: "192.168.3.207:27092",
+		DbName:      "extract_kf",
+		Size:        util.IntAllDef(15, 10),
+	}
+	mgo.InitPool()
+
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	it :=sess.DB("extract_kf").C("zheng_test_1").Find(nil).Sort("_id").Iter()
+	n:=0
+	crawlerMap := make(map[string]string,0)
+	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+		if n%10000==0 {
+			log.Println("当前n:",n)
+		}
+
+		if n>2000 {
+			break
+		}
+		crawlerMap[util.BsonIdToSId(tmp["_id"])] = util.ObjToString(tmp["spidercode"])
+	}
+
+	sess_1 := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess_1)
+	it_1 :=sess_1.DB("extract_kf").C("zheng_test1_jd1").Find(nil).Sort("_id").Iter()
+	n1:=0
+	crawlerMap_1 := make(map[string][]map[string]interface{},0)
+
+	for tmp := make(map[string]interface{});it_1.Next(&tmp);n1++{
+		if n1%10000==0 {
+			log.Println("当前n1:",n1)
+		}
+
+		if n1>2000 {
+			break
+		}
+
+		//类别
+		dic :=map[string]interface{}{
+			"_id":util.BsonIdToSId(tmp["_id"]),
+			"href":util.ObjToString(tmp["href"]),
+			"title":util.ObjToString(tmp["title"]),
+			"buyer":util.ObjToString(tmp["buyer"]),
+			"agency":util.ObjToString(tmp["agency"]),
+			"winner":util.ObjToString(tmp["winner"]),
+			"budget":util.ObjToString(tmp["budget"]),
+			"bidamount":util.ObjToString(tmp["bidamount"]),
+			"projectname":util.ObjToString(tmp["projectname"]),
+			"projectcode":util.ObjToString(tmp["projectcode"]),
+			"publishtime":util.ObjToString(tmp["publishtime"]),
+			"bidopentime":util.ObjToString(tmp["bidopentime"]),
+			"agencyaddr":util.ObjToString(tmp["agencyaddr"]),
+		}
+		value :=crawlerMap[util.BsonIdToSId(tmp["_id"])]
+		arr := crawlerMap_1[value]
+		if arr==nil {
+			crawlerMap_1[value] = make([]map[string]interface{},0)
+			crawlerMap_1[value] = append(crawlerMap_1[value],dic)
+		}else {
+			crawlerMap_1[value] = append(crawlerMap_1[value],dic)
+		}
+
+	}
+
+	sess_2 :=mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess_2)
+	it_2 :=sess_2.DB("extract_kf").C("zheng_test1_jd2").Find(nil).Sort("_id").Iter()
+	n2:=0
+	crawlerMap_2 := make(map[string][]map[string]interface{})
+	for tmp := make(map[string]interface{}); it_2.Next(&tmp); n2++ {
+		if n2%10000==0 {
+			log.Println("当前n2:",n2)
+		}
+
+		if n2>2000 {
+			break
+		}
+
+		//类别
+		dic :=map[string]interface{}{
+			"_id":util.BsonIdToSId(tmp["_id"]),
+			"href":util.ObjToString(tmp["href"]),
+			"title":util.ObjToString(tmp["title"]),
+			"buyer":util.ObjToString(tmp["buyer"]),
+			"agency":util.ObjToString(tmp["agency"]),
+			"winner":util.ObjToString(tmp["winner"]),
+			"budget":util.ObjToString(tmp["budget"]),
+			"bidamount":util.ObjToString(tmp["bidamount"]),
+			"projectname":util.ObjToString(tmp["projectname"]),
+			"projectcode":util.ObjToString(tmp["projectcode"]),
+			"publishtime":util.ObjToString(tmp["publishtime"]),
+			"bidopentime":util.ObjToString(tmp["bidopentime"]),
+			"agencyaddr":util.ObjToString(tmp["agencyaddr"]),
+		}
+		value :=crawlerMap[util.BsonIdToSId(tmp["_id"])]
+		arr := crawlerMap_2[value]
+		if arr==nil {
+			crawlerMap_2[value] = make([]map[string]interface{},0)
+			crawlerMap_2[value] = append(crawlerMap_2[value],dic)
+		}else {
+			crawlerMap_2[value] = append(crawlerMap_2[value],dic)
+		}
+	}
+
+	log.Println("爬虫类个数分别为:",len(crawlerMap_1),len(crawlerMap_2))
+
+
+
+
+	if len(crawlerMap_1)!=len(crawlerMap_2)||len(crawlerMap_1)==0 {
+		return
+	}
+	var list = []string{
+		"title",
+		"buyer",
+		"agency",
+		"winner",
+		"budget",
+		"bidamount",
+		"projectname",
+		"projectcode",
+		"publishtime",
+		"bidopentime",
+		"agencyaddr",
+	}
+	fmt.Println(len(list))
+
+	var crawlerArr = []string{
+		"a_zgzfcgw_zfcghtgg_new",
+		"gd_gdszfcgw_dscght",
+		"a_zgzfcgw_bid_tender_new",
+		"a_ztxygjzbtbzxyxgs_zbxx",
+		"sd_zgsdzfcgw_xxgk_sxhtgk",
+	}
+
+
+	////先添加标题栏
+	f :=xlsx.NewFile()
+
+	//	////第一行先写标题
+	//	//row1 := sheet.AddRow()
+	//	//row1.AddCell().Value = "排名"
+	//	//row1.AddCell().Value = "爬虫类"
+	//	//row1.AddCell().Value = "字段有效数"
+	//	//"site" , "href" 单独
+	mapLock := &sync.Mutex{}
+
+
+	//对比数据
+	for _,v:=range crawlerArr {
+		if crawlerMap_1[v]==nil||crawlerMap_2[v]==nil {
+			continue
+		}
+		log.Println("当前爬虫类:",v)
+		//取数组数据
+		arr1 := crawlerMap_1[v]
+		arr2 := crawlerMap_2[v]
+
+		log.Println(len(arr1))
+
+		if len(arr1)!=len(arr2) {
+			log.Println("数据个数错误")
+			continue
+		}else {
+			mapLock.Lock()
+			row_num,col_num :=1,2//赋值起始值
+			sheet, err := f.AddSheet(util.ObjToString(v))
+			if err==nil {
+				for i:=0;i<len(arr1);i++ {
+					dict1 := arr1[i]
+					dict2 := arr2[i]
+					if i==0 {
+						//只写入一次标题
+						for j:=0;j<len(list)+2 ;j++  {
+							if j==0 {
+								sheet.Cell(0, j).Value = "_id"
+							} else if j==1 {
+								sheet.Cell(0, j).Value = "href"
+							}else {
+								sheet.Cell(0, j).Value = list[j-2]
+							}
+						}
+
+						isAdd := false
+						for k:=0;k<len(list) ;k++  {
+							if k==0 {
+								sheet.Cell(1, 0).Value = util.BsonIdToSId(dict1["_id"])
+							}
+							string_1:=util.ObjToString(dict1[list[k]])
+							string_2:=util.ObjToString(dict2[list[k]])
+							if string_1!=string_2 {
+								isAdd = true
+								sheet.Cell(1, col_num).Value = string_1+"~"+string_2
+							}
+							col_num++
+						}
+
+						if isAdd {
+							row_num = 2
+						}
+
+					}else {
+						col_num = 2
+						isAdd := false
+						for l:=0;l<len(list) ;l++  {
+							if l==0 {
+								sheet.Cell(row_num, 0).Value = util.BsonIdToSId(dict1["_id"])
+							}
+
+							string_1:=util.ObjToString(dict1[list[l]])
+							string_2:=util.ObjToString(dict2[list[l]])
+							if string_1!=string_2 {
+								isAdd = true
+								sheet.Cell(row_num, col_num).Value = string_1+"~"+string_2
+							}
+							col_num++
+						}
+						if isAdd {
+							row_num++
+						}
+					}
+				}
+			}
+
+
+			mapLock.Unlock()
+		}
+	}
+
+
+
+	err := f.Save("zheng_test.xlsx")
+	if err != nil {
+		log.Println("保存xlsx失败:", err)
+		return
+	}
+	log.Println("xlsx保存成功")
+}
+
+
+
+//对比判重区别
+//func Test_heavy(t *testing.T) {
 
 	//mapinfo := map[string]interface{}{
 	//	"gtid":  "586b6d7061a0721f15b8f264",
@@ -24,40 +270,6 @@ func Test_heavy(t *testing.T) {
 	//}
 	//task([]byte{}, mapinfo)
 
-
-	//log.Println("1")
-	//代码copy数据
-	//sessTest :=mgoTest.GetMgoConn()
-	//defer sessTest.Close()
-	//
-	//sess := mgo.GetMgoConn()
-	//defer sess.Close()
-	//
-	////var arr []map[string]interface{}
-	//
-	//res_test := sessTest.DB("qfw").C("bidding").Find(mongodb.ObjToMQ(`{"comeintime":{"$gte": 1571025600, "$lte": 1571976000}}`, true)).Iter()
-	//res :=sess.DB("extract_kf").C("a_testbidding")
-	//5
-	//
-	//
-	//
-	//
-	//i:=0
-	//for dict := make(map[string]interface{}); res_test.Next(&dict); i++{
-	//
-	//	//插入
-	//	if i%2000==0 {
-	//		log.Println("当前:",i)
-	//	}
-	//	res.Insert(dict)
-	//	//if len(arr)>=500 {
-	//	//	arr = make([]map[string]interface{},0)
-	//	//}else {
-	//	//	arr = append(arr,dict)
-	//	//}
-	//}
-	//
-
 	//extract,extract_copy:="a_testbidding_new","a_testbidding"
 	//
 	//sess := mgo.GetMgoConn()
@@ -150,215 +362,297 @@ func Test_heavy(t *testing.T) {
 	//log.Println("V1 1:1---",n4)
 	//log.Println("V1 0:-1---",n5)
 	//log.Println("V1 1:-1---",n6)
-}
-
-
+//}
 
+//糅合数据
+//func Test_specifiedField(t *testing.T) {
 
-func Test_field(t *testing.T) {
-
-	mgo = &mongodb.MongodbSim{
-		MongodbAddr: "192.168.3.207:27081",
-		DbName:      "qfw",
-		Size:        util.IntAllDef(15, 10),
-	}
-	mgo.InitPool()
-
-	//调试 - 导出数据
-	//1:已抽取字段为准,统计对应爬虫字段存在个数,出个结果表格统计(前100名)
-	//2:人工抽查数据质量,用于jsondata权重评估
-
-	//取 固有字段 1-为存在
-	//now := int64(time.Now().Unix())
-	//date_time := int64(86400*2)
-
-	//field_map := make(map[string]string,0)
-	//sess_field := mgo.GetMgoConn()
-	//defer sess_field.Close()
-	//res_field := sess_field.DB("extract_kf").C("fields").Find(nil).Sort("_id").Iter()
-	//for dict := make(map[string]interface{}); res_field.Next(&dict); {
-	//	field_map[dict["s_field"].(string)] = "1"
+	//mgo = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27081",
+	//	DbName:      "qfw",
+	//	Size:        util.IntAllDef(15, 10),
 	//}
-
-	//固定死的需要分析的字段
-	field_map := map[string]string{
-		"title":"1",
-		"area":"1",
-		"city":"1",
-		"subtype":"1",
-		"buyer":"1",
-		"agency":"1",
-		"winner":"1",
-		"budget":"1",
-		"bidamount":"1",
-		"projectname":"1",
-		"projectcode":"1",
-		"publishtime":"1",
-		"comeintime":"1",
-		"bidopentime":"1",
-		"agencyaddr":"1",
-		"site":"1",
-		"href":"1",
-	}
-
-	/*	ObjectId("5da3f2c5a5cb26b9b79847fc") 0
-		ObjectId("5da3fd6da5cb26b9b7a8683c") 5000
-		ObjectId("5da40bdaa5cb26b9b7bea472") 10000
-		ObjectId("5da44deaa5cb26b9b75efb38") 50000
-		ObjectId("5da53440a5cb26b9b7d3f9aa") 100000
-		ObjectId("5db2735ba5cb26b9b7c99c6f") 761414
-	*/
-
-	/*
-	qfw-bidding
-
-	ObjectId("5e0d4cdd0cf41612e063fc65")  -1
-	ObjectId("5df8bfe4e9d1f601e4e87431") 一百万
-	ObjectId("5dea080ce9d1f601e45cb838") 二百万
-
-
-	*/
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	//q := map[string]interface{}{
-	//	"_id": map[string]interface{}{
-	//		"$gt":  util.StringTOBsonId("5dea080ce9d1f601e45cb838"),
-	//		"$lte": util.StringTOBsonId("5e0d4cdd0cf41612e063fc65"),
-	//	},
+	//mgo.InitPool()
+	//
+	//mgo_copy = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27092",
+	//	DbName:      "extract_kf",
+	//	Size:        util.IntAllDef(15, 10),
 	//}
-	it := sess.DB(mgo.DbName).C("bidding").Find(nil).Sort("-_id").Iter()
-
-	//爬虫组
-	crawlerMap,n := make(map[string]map[string]interface{},0),0
-
-	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
-		if n%10000==0 {
-			log.Println("当前n:",n)
-		}
-
-		if n>3000000 {
-			break
-		}
-
-		if tmp["spidercode"]!="" {
-			//判断是否有此类别分组
-			dict := make(map[string]interface{},0)
-			if crawlerMap[tmp["spidercode"].(string)]!= nil {
-				dict = crawlerMap[tmp["spidercode"].(string)]
-			}
-			jsonData := util.ObjToMap(tmp["jsondata"])
-
-			if jsonData!=nil {
-				for k,v :=range *jsonData  {
-					if fmt.Sprint(v) ==""{
-						//无效数据
-					}else {
-						if field_map[k]=="1" {
-							arr := dict[k]
-							if arr==nil {
-								dict[k] = make([]string,0)
-								dict[k] = append(dict[k].([]string),fmt.Sprint(v))
-							}else {
-								dict[k] = append(dict[k].([]string),fmt.Sprint(v))
-							}
-						}
-					}
-				}
-			}
-			if dict!=nil {
-				crawlerMap[tmp["spidercode"].(string)] = dict
-			}
-		}
-	}
-
-	log.Println("总计",n,"条数据")
-	log.Println("爬虫类别个数:",len(crawlerMap))
-
-
-	//计算每个爬虫分类的总数-并添加
-
-	//ObjectId("5e0d4cdd0cf41612e063fc65")
-	arr :=make([]map[string]interface{},0)
-	for k,v :=range crawlerMap  {
-		total :=0
-		for _,v1 :=range v {
-			total =total + len(v1.([]string))
-		}
-		v["total"]= total
-		v["key"] = k
-		arr = append(arr,v)
-	}
-
-
-	//爬虫类别下-有效字段总数排列 前100
-	start := time.Now().Unix()
-	quickSort(0,len(arr)-1,&arr)
-	end :=time.Now().Unix()
-	fmt.Println("耗时:",end-start,"秒")
-
-	f :=xlsx.NewFile()
-	sheet, _ := f.AddSheet("排序")
-
-	//第一行先写标题
-	row1 := sheet.AddRow()
-	row1.AddCell().Value = "排名"
-	row1.AddCell().Value = "爬虫类"
-	row1.AddCell().Value = "字段有效数"
-
-	mapLock := &sync.Mutex{}
-	limit :=0
-	for _,v :=range arr  {
-		limit++
-		row := sheet.AddRow()
-		row.AddCell().SetInt(limit)
-		row.AddCell().SetString(v["key"].(string))
-		row.AddCell().SetInt(v["total"].(int))
-
-		if limit <=20 {
-			mapLock.Lock()
-			sheetName := "排名"+util.ObjToString(limit)+":"+util.ObjToString(v["key"])
-			sheet_detail, err := f.AddSheet(sheetName)
-			if err==nil {
-				row_num,col_num :=0,0
-				for k1,v1 := range v {
-					if a,ok :=v1.([]string);ok {
-						for k2, v2 := range a {
-							if k2==0 {
-								sheet_detail.Cell(row_num, col_num).Value = util.ObjToString(k1)
-								row_num++
-								sheet_detail.Cell(row_num, col_num).Value = v2
-							}else {
-								if row_num>2000 {
-									continue
-								}
-								sheet_detail.Cell(row_num, col_num).Value = v2
-							}
-							row_num++
-						}
-						row_num = 0
-						col_num++
-					}
-				}
-			}
-
-			mapLock.Unlock()
-		}
-
+	//mgo_copy.InitPool()
+	//
+	//
+	////固定死的需要分析的字段
+	//field_map := map[string]string{
+	//	"title":"1",
+	//	"area":"1",
+	//	"city":"1",
+	//	"subtype":"1",
+	//	"buyer":"1",
+	//	"agency":"1",
+	//	"winner":"1",
+	//	"budget":"1",
+	//	"bidamount":"1",
+	//	"projectname":"1",
+	//	"projectcode":"1",
+	//	"publishtime":"1",
+	//	"comeintime":"1",
+	//	"bidopentime":"1",
+	//	"agencyaddr":"1",
+	//	"site":"1",
+	//	"href":"1",
+	//}
+	//
+	//
+	//sess := mgo.GetMgoConn()
+	//defer mgo.DestoryMongoConn(sess)
+	//
+	//sess_1 :=mgo_copy.GetMgoConn()
+	//defer mgo_copy.DestoryMongoConn(sess_1)
+	//
+	//sess_2 :=mgo_copy.GetMgoConn()
+	//defer mgo_copy.DestoryMongoConn(sess_2)
+	//
+	//
+	//it := sess.DB(mgo.DbName).C("bidding").Find(nil).Sort("-_id").Iter()
+	//it_1 :=sess_1.DB("extract_kf").C("zheng_test_1")
+	//it_2 :=sess_2.DB("extract_kf").C("zheng_test_2")
+	//n:=0
+	//for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+	//	if n%10000==0 {
+	//		log.Println("当前n:",n)
+	//	}
+	//	if n>1000000 { //约半月数据
+	//		break
+	//	}
+	//	if tmp["spidercode"]=="a_zgzfcgw_zfcghtgg_new"|| tmp["spidercode"]=="gd_gdszfcgw_dscght"||
+	//		tmp["spidercode"]=="a_zgzfcgw_bid_tender_new"||tmp["spidercode"]=="a_ztxygjzbtbzxyxgs_zbxx"||
+	//		tmp["spidercode"]=="sd_zgsdzfcgw_xxgk_sxhtgk"{
+	//		jsonData := util.ObjToMap(tmp["jsondata"])
+	//		if jsonData!=nil {
+	//			for k,v :=range *jsonData  {
+	//				if fmt.Sprint(v) !=""{
+	//					if field_map[k]=="1" {
+	//						it_1.Insert(tmp)
+	//						it_2.Insert(tmp)
+	//						break
+	//					}
+	//				}
+	//			}
+	//		}
+	//	}
+	//}
+	//log.Println("总计",n,"条数据")
 
+//}
 
-		if limit >99{
-			break
-		}
-	}
 
+//统计字段
+//func Test_field(t *testing.T) {
 
-	err := f.Save("zheng.xlsx")
-	if err != nil {
-		log.Println("保存xlsx失败:", err)
-		return
-	}
-	log.Println("xlsx保存成功")
-}
+	//mgo = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27081",
+	//	DbName:      "qfw",
+	//	Size:        util.IntAllDef(15, 10),
+	//}
+	//mgo.InitPool()
+	//
+	////调试 - 导出数据
+	////1:已抽取字段为准,统计对应爬虫字段存在个数,出个结果表格统计(前100名)
+	////2:人工抽查数据质量,用于jsondata权重评估
+	//
+	////取 固有字段 1-为存在
+	////now := int64(time.Now().Unix())
+	////date_time := int64(86400*2)
+	//
+	////field_map := make(map[string]string,0)
+	////sess_field := mgo.GetMgoConn()
+	////defer sess_field.Close()
+	////res_field := sess_field.DB("extract_kf").C("fields").Find(nil).Sort("_id").Iter()
+	////for dict := make(map[string]interface{}); res_field.Next(&dict); {
+	////	field_map[dict["s_field"].(string)] = "1"
+	////}
+	//
+	////固定死的需要分析的字段
+	//field_map := map[string]string{
+	//	"title":"1",
+	//	"area":"1",
+	//	"city":"1",
+	//	"subtype":"1",
+	//	"buyer":"1",
+	//	"agency":"1",
+	//	"winner":"1",
+	//	"budget":"1",
+	//	"bidamount":"1",
+	//	"projectname":"1",
+	//	"projectcode":"1",
+	//	"publishtime":"1",
+	//	"comeintime":"1",
+	//	"bidopentime":"1",
+	//	"agencyaddr":"1",
+	//	"site":"1",
+	//	"href":"1",
+	//}
+	//
+	///*	ObjectId("5da3f2c5a5cb26b9b79847fc") 0
+	//	ObjectId("5da3fd6da5cb26b9b7a8683c") 5000
+	//	ObjectId("5da40bdaa5cb26b9b7bea472") 10000
+	//	ObjectId("5da44deaa5cb26b9b75efb38") 50000
+	//	ObjectId("5da53440a5cb26b9b7d3f9aa") 100000
+	//	ObjectId("5db2735ba5cb26b9b7c99c6f") 761414
+	//*/
+	//
+	///*
+	//qfw-bidding
+	//
+	//ObjectId("5e0d4cdd0cf41612e063fc65")  -1
+	//ObjectId("5df8bfe4e9d1f601e4e87431") 一百万
+	//ObjectId("5dea080ce9d1f601e45cb838") 二百万
+	//
+	//5df834dd // 半月         大约100万条
+	//
+	//*/
+	//sess := mgo.GetMgoConn()
+	//defer mgo.DestoryMongoConn(sess)
+	////q := map[string]interface{}{
+	////	"_id": map[string]interface{}{
+	////		"$gt":  util.StringTOBsonId("5dea080ce9d1f601e45cb838"),
+	////		"$lte": util.StringTOBsonId("5e0d4cdd0cf41612e063fc65"),
+	////	},
+	////}
+	//it := sess.DB(mgo.DbName).C("bidding").Find(nil).Sort("-_id").Iter()
+	//
+	////爬虫组
+	//crawlerMap,n := make(map[string]map[string]interface{},0),0
+	//
+	//for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+	//	if n%10000==0 {
+	//		log.Println("当前n:",n)
+	//	}
+	//
+	//	if n>3000000 {
+	//		break
+	//	}
+	//
+	//	if tmp["spidercode"]!="" {
+	//		//判断是否有此类别分组
+	//		dict := make(map[string]interface{},0)
+	//		if crawlerMap[tmp["spidercode"].(string)]!= nil {
+	//			dict = crawlerMap[tmp["spidercode"].(string)]
+	//		}
+	//		jsonData := util.ObjToMap(tmp["jsondata"])
+	//
+	//		if jsonData!=nil {
+	//			for k,v :=range *jsonData  {
+	//				if fmt.Sprint(v) ==""{
+	//					//无效数据
+	//				}else {
+	//					if field_map[k]=="1" {
+	//						arr := dict[k]
+	//						if arr==nil {
+	//							dict[k] = make([]string,0)
+	//							dict[k] = append(dict[k].([]string),fmt.Sprint(v))
+	//						}else {
+	//							dict[k] = append(dict[k].([]string),fmt.Sprint(v))
+	//						}
+	//					}
+	//				}
+	//			}
+	//		}
+	//		if dict!=nil {
+	//			crawlerMap[tmp["spidercode"].(string)] = dict
+	//		}
+	//	}
+	//}
+	//
+	//log.Println("总计",n,"条数据")
+	//log.Println("爬虫类别个数:",len(crawlerMap))
+	//
+	//
+	////计算每个爬虫分类的总数-并添加
+	//
+	////ObjectId("5e0d4cdd0cf41612e063fc65")
+	//arr :=make([]map[string]interface{},0)
+	//for k,v :=range crawlerMap  {
+	//	total :=0
+	//	for _,v1 :=range v {
+	//		total =total + len(v1.([]string))
+	//	}
+	//	v["total"]= total
+	//	v["key"] = k
+	//	arr = append(arr,v)
+	//}
+	//
+	//
+	////爬虫类别下-有效字段总数排列 前100
+	//start := time.Now().Unix()
+	//quickSort(0,len(arr)-1,&arr)
+	//end :=time.Now().Unix()
+	//fmt.Println("耗时:",end-start,"秒")
+	//
+	//f :=xlsx.NewFile()
+	//sheet, _ := f.AddSheet("排序")
+	//
+	////第一行先写标题
+	//row1 := sheet.AddRow()
+	//row1.AddCell().Value = "排名"
+	//row1.AddCell().Value = "爬虫类"
+	//row1.AddCell().Value = "字段有效数"
+	//
+	//mapLock := &sync.Mutex{}
+	//limit :=0
+	//for _,v :=range arr  {
+	//	limit++
+	//	row := sheet.AddRow()
+	//	row.AddCell().SetInt(limit)
+	//	row.AddCell().SetString(v["key"].(string))
+	//	row.AddCell().SetInt(v["total"].(int))
+	//
+	//	if limit <=20 {
+	//		mapLock.Lock()
+	//		sheetName := "排名"+util.ObjToString(limit)+":"+util.ObjToString(v["key"])
+	//		sheet_detail, err := f.AddSheet(sheetName)
+	//		if err==nil {
+	//			row_num,col_num :=0,0
+	//			for k1,v1 := range v {
+	//				if a,ok :=v1.([]string);ok {
+	//					for k2, v2 := range a {
+	//						if k2==0 {
+	//							sheet_detail.Cell(row_num, col_num).Value = util.ObjToString(k1)
+	//							row_num++
+	//							sheet_detail.Cell(row_num, col_num).Value = v2
+	//						}else {
+	//							if row_num>2000 {
+	//								continue
+	//							}
+	//							sheet_detail.Cell(row_num, col_num).Value = v2
+	//						}
+	//						row_num++
+	//					}
+	//					row_num = 0
+	//					col_num++
+	//				}
+	//			}
+	//		}
+	//
+	//		mapLock.Unlock()
+	//	}
+	//
+	//
+	//
+	//	if limit >99{
+	//		break
+	//	}
+	//}
+	//
+	//
+	//err := f.Save("zheng.xlsx")
+	//if err != nil {
+	//	log.Println("保存xlsx失败:", err)
+	//	return
+	//}
+	//log.Println("xlsx保存成功")
+//}
 
 
 func quickSort(left int,right int ,array *[]map[string]interface{}) {

二进制
udpprojectset/src/zheng_test.xlsx