zhengkun 3 anos atrás
pai
commit
6f1b8bb2df

+ 3 - 0
listen_data/src/main.go

@@ -116,6 +116,9 @@ func main()  {
 
 	*/
 
+
+	dealWithBingTuanData()
+
 	return
 
 

+ 1 - 1
listen_data/src/zkmethod.go

@@ -107,7 +107,7 @@ func dealWithBingTuanData() {
 	}
 
 	if len(savedata)>2 {
-		save_mgo.Save("111",savedata)
+		save_mgo.Save("area_xjbt",savedata)
 	}
 	log.Debug("总计",total,"行")
 }

+ 475 - 0
process_medical/src/init_repeat.go

@@ -0,0 +1,475 @@
+package main
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	qu "qfw/util"
+	"regexp"
+	"strings"
+	//"time"
+)
+
+var zhb_key_list = []string{"budget", "buyer", "agency", "s_winner", "bidamount", "projectcode", "contractcode"}
+var packreg *regexp.Regexp
+var Mgo *MongodbSim
+var listSource []*dataSource
+
+type dataSource struct {
+	_id, id, title                         string
+	projectname, projectcode, contractcode string
+	buyer, agency, s_winner                string
+	budget, bidamount                      float64
+	isrepeat                               bool
+	repeat_id_source                       string
+	repeat_id                              map[string]string
+	repeatText                             string
+	publishtime							   int64
+}
+
+//var addr, dbname, table, startTime, endTime, sortType *string
+var addr, dbname, table,  sortType *string
+
+func init() {
+	addr = flag.String("addr", "172.17.4.87:27080", "数据库名称")
+	dbname = flag.String("dbname", "py_spider", "数据库名称")
+	table = flag.String("table", "zktest_mysql_bidding_repeat", "表名称")
+	sortType = flag.String("sort", "1", "sort--请输入排序方式,1正序、-1倒序")
+	flag.Parse()
+	Mgo = &MongodbSim{
+		MongodbAddr: *addr,
+		Size:        3,
+		DbName:      *dbname,
+	}
+	Mgo.InitPool()
+	packreg, _ = regexp.Compile(`([a-zA-Z0-9①②ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ一二三四五六七八九十](包|标|段)[::]?)`)
+	//packreg, _ = regexp.MustCompile("([a-zA-Z0-9①②ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ一二三四五六七八九十](包|标|段)[::]?)")
+}
+
+func repeatMedicalTest() {
+	log.Printf("表名:%s,排序方式:%s", *table, *sortType)
+	if *addr == "" || *dbname == "" || *table == "" ||  *sortType == "" {
+		log.Println("参数输入有误")
+		fmt.Printf("数据库地址:%s\n数据库名称:%s\n表名:%s\n排序方式:%s\n", *addr, *dbname, *table, *sortType)
+		os.Exit(0)
+	}
+	//stime, _ := time.Parse(qu.Date_Short_Layout, *startTime)
+	//etime, _ := time.Parse(qu.Date_Short_Layout, *endTime)
+	//query := map[string]interface{}{}
+	//query["$and"] = []interface{}{
+	//	map[string]interface{}{
+	//		"publishtime":map[string]interface{}{
+	//			"$gte":stime.Unix(),
+	//		},
+	//	},
+	//	map[string]interface{}{
+	//		"publishtime":map[string]interface{}{
+	//			"$lte":etime.Unix(),
+	//		},
+	//	},
+	//	//bson.M{"publishtime": bson.M{"$gte": stime.Unix()}},
+	//	//bson.M{"publishtime": bson.M{"$lte": etime.Unix()}},
+	//}
+
+	sort := "publishtime"
+	if *sortType == "-1" {
+		sort = "-publishtime"
+	}
+	//log.Println(sort)
+	sess := Mgo.GetMgoConn()
+	defer Mgo.DestoryMongoConn(sess)
+	//it := sess.DB(Mgo.DbName).C(*table).Find(query).Sort(sort).Iter()
+	it := sess.DB(Mgo.DbName).C(*table).Find(map[string]interface{}{}).Sort(sort).Iter()
+	//对标题、项目名称等中英文符号、空格等进行处理
+	var filterReg = regexp.MustCompile("[`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()——|{}【】‘;:”“'。,、?%+_]")
+	index := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
+		d := &dataSource{
+			_id:          BsonTOStringId(tmp["_id"]),
+			id:           qu.ObjToString(tmp["id"]),
+			title:        filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["title"])), ""),
+			projectname:  filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["projectname"])), ""),
+			projectcode:  filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["projectcode"])), ""),
+			contractcode: filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["contractcode"])), ""),
+			buyer:        filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["buyer"])), ""),
+			agency:       filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["agency"])), ""),
+			s_winner:     filterReg.ReplaceAllString(strings.ToLower(qu.ObjToString(tmp["s_winner"])), ""),
+			budget:       qu.Float64All(tmp["budget"]),
+			bidamount:    qu.Float64All(tmp["bidamount"]),
+			publishtime:  qu.Int64All(tmp["publishtime"]),
+			repeat_id:    map[string]string{},
+		}
+		//log.Println(tmp["_id"], d.id)
+		if index%10000 == 0 {
+			log.Println("加载数据:", index,tmp["_id"])
+		}
+		listSource = append(listSource, d)
+		tmp = map[string]interface{}{}
+	}
+	log.Println("数据加载完成",len(listSource))
+
+	dataItem()
+	dd := 0
+	for i := 0; i < len(listSource); i++ {
+		a := listSource[i]
+		if a.isrepeat {
+			dd++
+		}
+		//更新数据
+		if len(a.repeat_id) ==0{
+			Mgo.UpdateById(*table, a._id,
+				map[string]interface{}{"$set": map[string]interface{}{
+					//重复数据看repeatid
+					"repeatid":     a.repeat_id_source, //和哪条数据重复id
+					"repeat":       a.isrepeat,         //本条数据是否重复数据
+					"repeattext":   a.repeatText,       //本数据被判重的原因
+				}})
+		}else {
+			if len(a.repeat_id) > 0{
+				arr:=[]string{}
+				for k,_:=range a.repeat_id{
+					arr = append(arr,k)
+				}
+				Mgo.UpdateById(*table, a._id,
+					map[string]interface{}{"$set": map[string]interface{}{
+						//原始数据看repeatid_ids_str
+						"repeatid":     a.repeat_id_source, //和哪条数据重复id
+						"repeat":       a.isrepeat,         //本条数据是否重复数据
+						//"repeatid_ids": a.repeat_id,        //和我重复的数据都有哪些
+						"repeatid_ids_str": strings.Join(arr,","),
+						"repeattext":   a.repeatText,       //本数据被判重的原因
+					}})}
+		}
+		if i%1000 == 0 {
+			log.Println("已更新", i)
+		}
+	}
+	log.Println(dd)
+}
+
+var listSize = 20000
+
+func dataItem() {
+	for i := 0; i < len(listSource); i++ {
+		a := listSource[i]
+		// if a.isrepeat {
+		// 	continue
+		// }
+		b := &dataSource{}
+		for j := i + 1; j < len(listSource); j++ {
+			b = listSource[j]
+			if *sortType == "1" {
+				if publishtime_b_a(*a,*b){
+					// if b.isrepeat {
+					// 	continue
+					// }
+					a, b = panchong(*a, *b)
+					listSource[j] = b
+					listSource[i] = a
+					// if b.isrepeat {
+					// 	log.Println("sss", a.id, b.isrepeat, b.repeat_id)
+					// }
+				}
+			}else{
+				if publishtime_a_b(*a,*b){
+					// if b.isrepeat {
+					// 	continue
+					// }
+					a, b = panchong(*a, *b)
+					listSource[j] = b
+					listSource[i] = a
+					// if b.isrepeat {
+					// 	log.Println("sss", a.id, b.isrepeat, b.repeat_id)
+					// }
+				}
+			}
+		}
+		if i%500 == 0 {
+			log.Println("已处理:", i)
+		}
+	}
+}
+
+func panchong(a, b dataSource) (c, d *dataSource) {
+	switch {
+	case a.title == b.title: //标题相等
+		if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+
+		} else if a.budget == b.budget && (a.budget >= 0 || b.budget >= 0) {
+			if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+				if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+					if a.budget == b.budget && (a.budget >= 0 || b.budget >= 0) {
+						b.repeat_id_source = a.id
+						a.repeat_id[b.id] = ""
+						b.isrepeat = true
+						b.repeatText = "标题相等 && buyer && s_winner && bidamount"
+						//log.Println("1111", a.id, b.id, b.isrepeat)
+					}
+				}
+			} else {
+				r := key_list(a, b)
+				if r {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && budget && key_list"
+				}
+			}
+		} else if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+			r := key_list(a, b)
+			if r {
+				b.repeat_id_source = a.id
+				a.repeat_id[b.id] = ""
+				b.isrepeat = true
+				b.repeatText = "标题相等 && projectcode && key_list"
+			}
+		} else if a.bidamount == b.bidamount && (a.bidamount >= 0 || b.bidamount >= 0) {
+			if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+				if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && bidamount && buyer && s_winner"
+					//log.Println("1111", a.id, b.id, b.isrepeat)
+				}
+			} else {
+				r := key_list(a, b)
+				if r {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && bidamount && key_list"
+				}
+			}
+		} else {
+			//
+		}
+	case a.title != b.title: //标题不相等
+		//项目名称包含及相等
+		if strings.Contains(a.projectname, b.projectname) || strings.Contains(b.projectname, a.projectname) {
+			isp := packreg.MatchString(a.title)
+			//有分包
+			if isp {
+				//项目名称相等
+				if a.projectname == b.projectname {
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+						b.repeat_id_source = a.id
+						a.repeat_id[b.id] = ""
+						b.isrepeat = true
+						b.repeatText = "标题不相等-->有分包 && projectname && bidamount"
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner && a.budget == b.budget && (a.budget >= 0 || b.budget >= 0) {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname && s_winner && budget"
+						}
+					}
+				} else { //项目名称包含
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount >= 0 || b.bidamount >= 0) {
+						if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && projectcode"
+						} else if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && s_winner"
+						} else if a.budget == b.budget && (a.budget >= 0 || b.budget >= 0) {
+							if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && budget && buyer"
+							} else if pankong(a.agency) && pankong(b.agency) && a.agency == b.agency {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && budget && agency"
+							} else {
+								//
+							}
+						}
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner && a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname包含 && s_winner && budget"
+						} else {
+							//
+						}
+					}
+				}
+			} else { //无分包
+				//项目名称相等
+				if a.projectname == b.projectname {
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount >= 0 || b.bidamount >= 0) {
+						b.repeat_id_source = a.id
+						a.repeat_id[b.id] = ""
+						b.isrepeat = true
+						b.repeatText = "标题不相等-->无分包 && projectname && bidamount"
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname && projectcode"
+						} else if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname && s_winner"
+						} else if a.budget == b.budget && (a.budget >= 0 || b.budget >= 0) {
+							if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname && budget && buyer"
+							} else if pankong(a.agency) && pankong(b.agency) && a.agency == b.agency {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname && budget && agency"
+							} else {
+								//
+							}
+						}
+					}
+				} else { //项目名称包含
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount >= 0 || b.bidamount >= 0) {
+						if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname包含 && bidamount"
+						} else if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname包含 && s_winner"
+						} else if a.budget == b.budget && (a.budget >= 0 || b.budget >= 0) {
+							if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname包含 && budget && buyer"
+							} else if pankong(a.agency) && pankong(b.agency) && a.agency == b.agency {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname包含 && budget && agency"
+							} else {
+								//
+							}
+						} else {
+							//
+						}
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner && a.budget == a.budget && (a.budget > 0 || b.budget > 0) {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname包含 && s_winner && budget"
+						}
+					}
+				}
+			}
+		}
+	default:
+
+	}
+	return &a, &b
+}
+
+//zhb_key_list 判断
+//"budget", "buyer", "agency", "s_winner", "bidamount", "projectcode", "contractcode"
+func key_list(a, b dataSource) bool {
+	for i := 0; i < len(zhb_key_list); i++ {
+		key := zhb_key_list[i]
+		switch key {
+		case "budget":
+			if a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+				continue
+			} else {
+				return false
+			}
+		case "buyer":
+			if a.buyer == b.buyer && pankong(a.buyer) && pankong(b.buyer) {
+				continue
+			} else {
+				return false
+			}
+		case "agency":
+			if a.agency == b.agency && pankong(a.agency) && pankong(b.agency) {
+				continue
+			} else {
+				return false
+			}
+		case "s_winner":
+			if a.s_winner == b.s_winner && pankong(a.s_winner) && pankong(b.s_winner) {
+				continue
+			} else {
+				return false
+			}
+		case "bidamount":
+			if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+				continue
+			} else {
+				return false
+			}
+		case "projectcode":
+			if a.projectcode == b.projectcode && pankong(a.projectcode) && pankong(b.projectcode) {
+				continue
+			} else {
+				return false
+			}
+		case "contractcode":
+			if a.contractcode == b.contractcode && pankong(a.contractcode) && pankong(b.contractcode) {
+				continue
+			} else {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+
+//发布时间判断
+//正序
+func publishtime_b_a(a,b dataSource) bool{
+	return b.publishtime-a.publishtime < 86400 * 31 * 9
+}
+//倒序
+func publishtime_a_b(a,b dataSource) bool {
+	return a.publishtime-b.publishtime < 86400 * 31 * 9
+}
+
+
+//
+func pankong(a string) bool {
+	if a != "" {
+		return true
+	} else {
+		return false
+	}
+}

+ 5 - 1
process_medical/src/main.go

@@ -38,7 +38,11 @@ func init()  {
 
 
 func main()  {
-	saveAreaMysql()
+	//initMySqlMgoData()
+	//saveBiddingPurMysql()
+
+
+
 }
 
 

+ 155 - 90
process_medical/src/medical_bidding.go

@@ -7,6 +7,7 @@ import (
 	qu "qfw/util"
 	"strings"
 	"time"
+	"unicode/utf8"
 )
 
 var proNameDict = map[string]string{}
@@ -16,20 +17,29 @@ func saveBiddingPurMysql() {
 	log.Debug("分析数据-保存两个mysql表...")
 	sess := save_mgo.GetMgoConn()
 	defer save_mgo.DestoryMongoConn(sess)
-	q,total:=map[string]interface{}{},0
+	q,total,isok:=map[string]interface{}{},0,0
 	it := sess.DB(save_mgo.DbName).C(o_bidding_coll).Find(&q).Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
-		if total%10000==0 {
-			log.Debug("curent index ",total)
+		if total%1000==0 {
+			log.Debug("curent index ",total,isok)
 		}
 		tmpid := BsonTOStringId(tmp["_id"])
-		dealWithBidData(tmp,tmpid)
-		dealWithPurData(tmp,tmpid)
+		repeat:= qu.IntAll(tmp["repeat"])
+		if repeat!=1 {
+			isok++
+			dealWithBidData(tmp,tmpid)
+			dealWithPurData(tmp,tmpid)
+		}
 		tmp = make(map[string]interface{})
 	}
-	log.Debug("is over ",total)
+	log.Debug("is over ",total,isok)
 }
 
+
+
+
+
+
 //构建-招投标数据-无异常
 func dealWithBidData(tmp map[string]interface{},tmpid string){
 	data := map[string]interface{}{}
@@ -108,14 +118,20 @@ func dealWithPurData(tmp map[string]interface{},tmpid string){
 	for _,v:=range p_list{
 		data := map[string]interface{}{}
 		product_name := qu.ObjToString(v["product_name"])
-
+		brandname := qu.ObjToString(v["brandname"])
 		data["info_id"] = info_id
 		data["bid_name"] = qu.ObjToString(v["itemname"])
 		data["product_name"] = product_name
-		data["product_brand"] = qu.ObjToString(v["brandname"])
-		data["product_model"] = qu.ObjToString(v["model"])
+		data["product_brand"] = brandname
 		data["product_specs"] = ""
 
+		product_model := qu.ObjToString(v["model"])
+		if utf8.RuneCountInString(product_model)>100 {
+			product_model = ""
+		}
+		data["product_model"] = product_model
+
+
 		if v["unitprice"]!=nil {
 			data["product_price"] = qu.Float64All(v["unitprice"])
 		}
@@ -156,39 +172,23 @@ func dealWithPurData(tmp map[string]interface{},tmpid string){
 		}
 
 		//涉及-make_company_id make_company_name make_company_ysname -关联f_product
-		query := "SELECT company_id,company_name FROM f_product WHERE business_type=? or business_type=? LIMIT 0,20"
-		proinfo := MysqlTool.SelectBySql(query, "1","3")
+		query := "SELECT company_id,company_name FROM f_product WHERE product_name = ? and (business_type = ? or business_type = ?)"
+		proinfo := MysqlTool.SelectBySql(query, product_name,"1","3")
 		if len(*proinfo) == 1 { //查询到多条-同产品信息
 			info :=(*proinfo)[0]
 			data["make_company_id"] = qu.ObjToString(info["company_id"])
 			data["make_company_name"] = qu.ObjToString(info["company_name"])
 			data["make_company_ysname"] = ""
-		}else if len(*proinfo) > 1 {
-			//是否多个
-			pro_name := ""
-			pro_id := ""
-			isUse:=true
-			for index,info := range *proinfo {
-				company_name := qu.ObjToString(info["company_name"])
-				company_id := qu.ObjToString(info["company_id"])
-				if index==0 {
-					pro_name = company_name
-					pro_id = company_id
-				}else {
-					if company_name!=pro_name {
-						isUse = false
-						pro_name+=","+company_name
-					}
-				}
-			}
+		}else if len(*proinfo) > 1 {//如果有多个企业-生产信息
+			isUse,c_name,c_id := matchedManufacturer(brandname,*proinfo)
 			if isUse {
-				data["make_company_id"] = pro_id
-				data["make_company_name"] = pro_name
+				data["make_company_id"] = c_id
+				data["make_company_name"] = c_name
 				data["make_company_ysname"] = ""
 			}else {
 				data["make_company_id"] = ""
 				data["make_company_name"] = ""
-				data["make_company_ysname"] = pro_name
+				data["make_company_ysname"] = c_name
 			}
 		}else {
 			data["make_company_id"] = ""
@@ -213,14 +213,57 @@ func dealWithPurData(tmp map[string]interface{},tmpid string){
 
 
 }
+//根据品牌字段匹配-有效生产企业-过多10条
+func matchedManufacturer(brandName string,infoArr []map[string]interface{}) (bool,string,string) {
+	c_name :=""
+	namedict := map[string]string{}
+	for i:=0;i<len(infoArr);i++{
+		info := infoArr[i]
+		company_name := qu.ObjToString(info["company_name"])
+		company_id := qu.ObjToString(info["company_id"])
+		if brandName!="" { //品牌匹配
+			if strings.Contains(company_name,brandName) {
+				return true,company_name,company_id
+			}
+		}
+		if len(namedict) < 10 {
+			if c_name=="" {
+				c_name = company_name
+				namedict[company_name] = company_id
+			}else {
+				if namedict[company_name]=="" {
+					c_name += ","+company_name
+					namedict[company_name] = company_id
+				}
+			}
+		}
+	}
+
+	if len(namedict)==1 {
+		return true,c_name,namedict[c_name]
+	}
+	return false,c_name,""
+}
+
 
 
 
-//导出源数据... 163-qfw
-func exportUsefulPurMysql() {
+//准备工作-导出源数据很重要... 163
+func exportUsefulPurBidData() {
 	sess := qy_mgo.GetMgoConn()
 	defer qy_mgo.DestoryMongoConn(sess)
-	q,total:=map[string]interface{}{},0
+	//构建标准产品数据
+	dataArr,_ := save_mgo.Find("zktest_mairui",nil,nil,nil)
+	for _,v := range dataArr {
+		name := qu.ObjToString(v["name"])
+		proNameDict[name] = name
+	}
+	log.Debug("迈瑞产品...",len(proNameDict))
+	q,total:=map[string]interface{}{
+		"publishtime": map[string]interface{}{
+		"$gte":1546272000,
+		},
+	},0
 	isok:=0
 	it := sess.DB("qfw").C(s_bidding_coll).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
@@ -236,81 +279,101 @@ func exportUsefulPurMysql() {
 				p_list = qu.ObjArrToMapArr(yl_purchasinglist)
 			}
 		}
+		//临时-匹配是不是含有-有效迈瑞-产品
 		if len(p_list)>0 {
-			isok++
-			s_winner := qu.ObjToString(tmp["s_winner"])
-			new_data["yl_purchasinglist"] = p_list
-			tmpid := BsonTOStringId(tmp["_id"])
-			new_data["info_id"] = tmpid
-			new_data["subtype"] = qu.ObjToString(tmp["subtype"])
-			new_data["toptype"] = qu.ObjToString(tmp["toptype"])
-			new_data["projectname"] =qu.ObjToString(tmp["projectname"])
-			new_data["publishtime"] = qu.IntAll(tmp["publishtime"])
-			new_data["buyer"] =qu.ObjToString(tmp["buyer"])
-			new_data["buyerclass"] =qu.ObjToString(tmp["buyerclass"])
-			if tmp["budget"]!=nil {
-				new_data["budget"] =tmp["budget"]
-			}
-			if tmp["bidamount"]!=nil {
-				new_data["bidamount"] =tmp["bidamount"]
-			}
-			new_data["s_winner"] =s_winner
-			new_data["agency"] =qu.ObjToString(tmp["agency"])
-			new_data["area"] =qu.ObjToString(tmp["area"])
-			new_data["city"] =qu.ObjToString(tmp["city"])
-			new_data["district"] =qu.ObjToString(tmp["district"])
-			new_data["jyhref"] = fmt.Sprintf(Url, qu.CommonEncodeArticle("content", tmpid))
-
-			if tmp["subscopeclass"]!=nil {
-				new_data["subscopeclass"] = tmp["subscopeclass"]
+			new_p_list := matchUsefulMrBidProduct(p_list)
+			if len(new_p_list)>0 {
+				isok++
+				s_winner := qu.ObjToString(tmp["s_winner"])
+				new_data["yl_purchasinglist"] = p_list
+				tmpid := BsonTOStringId(tmp["_id"])
+				new_data["info_id"] = tmpid
+				new_data["_id"] = tmp["_id"]
+				new_data["subtype"] = qu.ObjToString(tmp["subtype"])
+				new_data["toptype"] = qu.ObjToString(tmp["toptype"])
+				new_data["projectname"] =qu.ObjToString(tmp["projectname"])
+				new_data["publishtime"] = qu.IntAll(tmp["publishtime"])
+				new_data["buyer"] =qu.ObjToString(tmp["buyer"])
+				new_data["buyerclass"] =qu.ObjToString(tmp["buyerclass"])
+				if tmp["budget"]!=nil {
+					new_data["budget"] =tmp["budget"]
+				}
+				if tmp["bidamount"]!=nil {
+					new_data["bidamount"] =tmp["bidamount"]
+				}
+				new_data["s_winner"] =s_winner
+				new_data["agency"] =qu.ObjToString(tmp["agency"])
+				new_data["area"] =qu.ObjToString(tmp["area"])
+				new_data["city"] =qu.ObjToString(tmp["city"])
+				new_data["district"] =qu.ObjToString(tmp["district"])
+				new_data["jyhref"] = fmt.Sprintf(Url, qu.CommonEncodeArticle("content", tmpid))
+
+				if tmp["subscopeclass"]!=nil {
+					new_data["subscopeclass"] = tmp["subscopeclass"]
+				}
+				save_mgo.Save(o_bidding_coll,new_data)
+				save_mgo.Save("zktest_mysql_bidding_repeat",tmp) //业务判重需要-
 			}
-			save_mgo.Save(o_bidding_coll,new_data)
-
 		}
 		tmp = make(map[string]interface{})
 	}
 
 	log.Debug("is bidding over ",total,isok)
 }
-
-
-
-
-
-
-
-
-
-//调试- 指定数据
-func testMaiRuiData() {
-	query := "SELECT product_name FROM f_product WHERE company_name LIKE CONCAT('%',?,'%')"
-	proinfo := MysqlTool.SelectBySql(query, "迈瑞")
-	for _,v := range *proinfo{
+func matchUsefulMrBidProduct(p_list []map[string]interface{})(new_p_list []map[string]interface{}){
+	for _,v := range p_list {
 		product_name := qu.ObjToString(v["product_name"])
-		if proNameDict[product_name]==""{
-			proNameDict[product_name] = product_name
+		if proNameDict[product_name] != "" {
+			new_p_list =append(new_p_list,v)
 		}
 	}
-	log.Debug("涉及",len(proNameDict),"产品信息")
-
+	return new_p_list
+}
+//根据目标数据,去掉重复 - 重新导出数据
+func exportNewPurBidData()  {
 	sess := save_mgo.GetMgoConn()
 	defer save_mgo.DestoryMongoConn(sess)
 	q,total:=map[string]interface{}{},0
-	it := sess.DB(save_mgo.DbName).C("112233").Find(&q).Iter()
+	isok:=0
+	it := sess.DB(save_mgo.DbName).C("zktest_mysql_bidding_repeat").Find(&q).Sort("_id").Select(map[string]interface{}{
+		"repeat":1,
+	}).Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
 		if total%1000==0 {
-			log.Debug("curent index ",total)
+			log.Debug("curent index ",total,tmp["_id"],"~",isok)
+		}
+		tmpid := BsonTOStringId(tmp["_id"])
+		repeat := tmp["repeat"].(bool)
+		if repeat {
+			isok++
+			save_mgo.UpdateById(o_bidding_coll,tmpid, map[string]interface{}{
+				"$set": map[string]interface{}{
+					"repeat":1,
+				},
+			})
 		}
-		info_id := qu.ObjToString(tmp["info_id"])
-		data := save_mgo.FindOne(o_bidding_coll, map[string]interface{}{
-			"info_id":info_id,
-		})
-		dealWithBidData(data,info_id)
-		dealWithPurData(data,info_id)
 		tmp = make(map[string]interface{})
 	}
+	log.Debug("is over ",total,isok)
 }
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//导出指定-迈瑞相关数据-已弃用
 func exportMaiRuiInfoId(){
 	query := "SELECT product_name FROM f_product WHERE company_name LIKE CONCAT('%',?,'%')"
 	proinfo := MysqlTool.SelectBySql(query, "迈瑞")
@@ -343,4 +406,6 @@ func exportMaiRuiInfoId(){
 			}
 		}
 	}
-}
+}
+
+