|
@@ -158,8 +158,8 @@ func Test_heavy(t *testing.T) {
|
|
|
func Test_field(t *testing.T) {
|
|
|
|
|
|
mgo = &mongodb.MongodbSim{
|
|
|
- MongodbAddr: "192.168.3.207:27092",
|
|
|
- DbName: "extract_kf",
|
|
|
+ MongodbAddr: "192.168.3.207:27081",
|
|
|
+ DbName: "qfw",
|
|
|
Size: util.IntAllDef(15, 10),
|
|
|
}
|
|
|
mgo.InitPool()
|
|
@@ -172,39 +172,76 @@ func Test_field(t *testing.T) {
|
|
|
//now := int64(time.Now().Unix())
|
|
|
//date_time := int64(86400*2)
|
|
|
|
|
|
- field_map := make(map[string]string,0)
|
|
|
- sess_field := mgo.GetMgoConn()
|
|
|
- defer sess_field.Close()
|
|
|
- res_field := sess_field.DB("extract_kf").C("fields").Find(nil).Sort("_id").Iter()
|
|
|
- for dict := make(map[string]interface{}); res_field.Next(&dict); {
|
|
|
- field_map[dict["s_field"].(string)] = "1"
|
|
|
- }
|
|
|
+ //field_map := make(map[string]string,0)
|
|
|
+ //sess_field := mgo.GetMgoConn()
|
|
|
+ //defer sess_field.Close()
|
|
|
+ //res_field := sess_field.DB("extract_kf").C("fields").Find(nil).Sort("_id").Iter()
|
|
|
+ //for dict := make(map[string]interface{}); res_field.Next(&dict); {
|
|
|
+ // field_map[dict["s_field"].(string)] = "1"
|
|
|
+ //}
|
|
|
|
|
|
//固定死的需要分析的字段
|
|
|
+ field_map := map[string]string{
|
|
|
+ "title":"1",
|
|
|
+ "area":"1",
|
|
|
+ "city":"1",
|
|
|
+ "subtype":"1",
|
|
|
+ "buyer":"1",
|
|
|
+ "agency":"1",
|
|
|
+ "winner":"1",
|
|
|
+ "budget":"1",
|
|
|
+ "bidamount":"1",
|
|
|
+ "projectname":"1",
|
|
|
+ "projectcode":"1",
|
|
|
+ "publishtime":"1",
|
|
|
+ "comeintime":"1",
|
|
|
+ "bidopentime":"1",
|
|
|
+ "agencyaddr":"1",
|
|
|
+ "site":"1",
|
|
|
+ "href":"1",
|
|
|
+ }
|
|
|
+
|
|
|
+ /* ObjectId("5da3f2c5a5cb26b9b79847fc") 0
|
|
|
+ ObjectId("5da3fd6da5cb26b9b7a8683c") 5000
|
|
|
+ ObjectId("5da40bdaa5cb26b9b7bea472") 10000
|
|
|
+ ObjectId("5da44deaa5cb26b9b75efb38") 50000
|
|
|
+ ObjectId("5da53440a5cb26b9b7d3f9aa") 100000
|
|
|
+ ObjectId("5db2735ba5cb26b9b7c99c6f") 761414
|
|
|
+ */
|
|
|
|
|
|
+ /*
|
|
|
+ qfw-bidding
|
|
|
|
|
|
+ ObjectId("5e0d4cdd0cf41612e063fc65") -1
|
|
|
+ ObjectId("5df8bfe4e9d1f601e4e87431") 一百万
|
|
|
+ ObjectId("5dea080ce9d1f601e45cb838") 二百万
|
|
|
|
|
|
|
|
|
- /* ObjectId("5da3f2c5a5cb26b9b79847fc")
|
|
|
- ObjectId("5da3fd6da5cb26b9b7a8683c")
|
|
|
- ObjectId("5da40bdaa5cb26b9b7bea472")
|
|
|
*/
|
|
|
sess := mgo.GetMgoConn()
|
|
|
defer mgo.DestoryMongoConn(sess)
|
|
|
- q := map[string]interface{}{
|
|
|
- "_id": map[string]interface{}{
|
|
|
- "$gt": util.StringTOBsonId("5da3f2c5a5cb26b9b79847fc"),
|
|
|
- "$lte": util.StringTOBsonId("5da3fd6da5cb26b9b7a8683c"),
|
|
|
- },
|
|
|
- }
|
|
|
- it := sess.DB(mgo.DbName).C("a_testbidding").Find(&q).Sort("_id").Iter()
|
|
|
+ //q := map[string]interface{}{
|
|
|
+ // "_id": map[string]interface{}{
|
|
|
+ // "$gt": util.StringTOBsonId("5dea080ce9d1f601e45cb838"),
|
|
|
+ // "$lte": util.StringTOBsonId("5e0d4cdd0cf41612e063fc65"),
|
|
|
+ // },
|
|
|
+ //}
|
|
|
+ it := sess.DB(mgo.DbName).C("bidding").Find(nil).Sort("-_id").Iter()
|
|
|
|
|
|
//爬虫组
|
|
|
crawlerMap,n := make(map[string]map[string]interface{},0),0
|
|
|
|
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
|
+ if n%10000==0 {
|
|
|
+ log.Println("当前n:",n)
|
|
|
+ }
|
|
|
+
|
|
|
+ if n>3000000 {
|
|
|
+ break
|
|
|
+ }
|
|
|
+
|
|
|
if tmp["spidercode"]!="" {
|
|
|
- //判断是否有次类别分组
|
|
|
+ //判断是否有此类别分组
|
|
|
dict := make(map[string]interface{},0)
|
|
|
if crawlerMap[tmp["spidercode"].(string)]!= nil {
|
|
|
dict = crawlerMap[tmp["spidercode"].(string)]
|
|
@@ -213,18 +250,17 @@ func Test_field(t *testing.T) {
|
|
|
|
|
|
if jsonData!=nil {
|
|
|
for k,v :=range *jsonData {
|
|
|
- if fmt.Sprint(v) =="" {
|
|
|
+ if fmt.Sprint(v) ==""{
|
|
|
//无效数据
|
|
|
}else {
|
|
|
- arr := dict[k]
|
|
|
- if arr==nil {
|
|
|
- dict[k] = make([]string,0)
|
|
|
- dict[k] = append(dict[k].([]string),fmt.Sprint(v))
|
|
|
- }else {
|
|
|
- //if a,ok :=arr.([]string);ok{
|
|
|
- // a = append(a,fmt.Sprint(v))
|
|
|
- //}
|
|
|
- dict[k] = append(dict[k].([]string),fmt.Sprint(v))
|
|
|
+ if field_map[k]=="1" {
|
|
|
+ arr := dict[k]
|
|
|
+ if arr==nil {
|
|
|
+ dict[k] = make([]string,0)
|
|
|
+ dict[k] = append(dict[k].([]string),fmt.Sprint(v))
|
|
|
+ }else {
|
|
|
+ dict[k] = append(dict[k].([]string),fmt.Sprint(v))
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -236,12 +272,12 @@ func Test_field(t *testing.T) {
|
|
|
}
|
|
|
|
|
|
log.Println("总计",n,"条数据")
|
|
|
- log.Println("判重类别个数:",len(crawlerMap))
|
|
|
+ log.Println("爬虫类别个数:",len(crawlerMap))
|
|
|
|
|
|
|
|
|
//计算每个爬虫分类的总数-并添加
|
|
|
|
|
|
- //
|
|
|
+ //ObjectId("5e0d4cdd0cf41612e063fc65")
|
|
|
arr :=make([]map[string]interface{},0)
|
|
|
for k,v :=range crawlerMap {
|
|
|
total :=0
|
|
@@ -278,32 +314,39 @@ func Test_field(t *testing.T) {
|
|
|
row.AddCell().SetString(v["key"].(string))
|
|
|
row.AddCell().SetInt(v["total"].(int))
|
|
|
|
|
|
- mapLock.Lock()
|
|
|
- sheetName := "排名:"+util.ObjToString(v["key"])
|
|
|
- sheet_detail, err := f.AddSheet(sheetName)
|
|
|
- if err==nil {
|
|
|
- row_num,col_num :=0,0
|
|
|
- for k1,v1 := range v {
|
|
|
- if a,ok :=v1.([]string);ok {
|
|
|
- for k2, v2 := range a {
|
|
|
- if k2==0 {
|
|
|
- sheet_detail.Cell(row_num, col_num).Value = util.ObjToString(k1)
|
|
|
+ if limit <=20 {
|
|
|
+ mapLock.Lock()
|
|
|
+ sheetName := "排名"+util.ObjToString(limit)+":"+util.ObjToString(v["key"])
|
|
|
+ sheet_detail, err := f.AddSheet(sheetName)
|
|
|
+ if err==nil {
|
|
|
+ row_num,col_num :=0,0
|
|
|
+ for k1,v1 := range v {
|
|
|
+ if a,ok :=v1.([]string);ok {
|
|
|
+ for k2, v2 := range a {
|
|
|
+ if k2==0 {
|
|
|
+ sheet_detail.Cell(row_num, col_num).Value = util.ObjToString(k1)
|
|
|
+ row_num++
|
|
|
+ sheet_detail.Cell(row_num, col_num).Value = v2
|
|
|
+ }else {
|
|
|
+ if row_num>2000 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ sheet_detail.Cell(row_num, col_num).Value = v2
|
|
|
+ }
|
|
|
row_num++
|
|
|
- sheet_detail.Cell(row_num, col_num).Value = v2
|
|
|
- }else {
|
|
|
- sheet_detail.Cell(row_num, col_num).Value = v2
|
|
|
}
|
|
|
- row_num++
|
|
|
+ row_num = 0
|
|
|
+ col_num++
|
|
|
}
|
|
|
- row_num = 0
|
|
|
- col_num++
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ mapLock.Unlock()
|
|
|
}
|
|
|
|
|
|
- mapLock.Unlock()
|
|
|
|
|
|
- if limit >10{
|
|
|
+
|
|
|
+ if limit >99{
|
|
|
break
|
|
|
}
|
|
|
}
|