Ver Fonte

Merge branch 'dev2.1' into dev3.0

* dev2.1:
  no message
  no message

# Conflicts:
#	src/front/group.go
jianghan7 há 1 ano atrás
pai
commit
4a831d03d4
3 ficheiros alterados com 419 adições e 2 exclusões
  1. 25 1
      isrepeat/main.go
  2. 384 0
      isrepeat/task.go
  3. 10 1
      src/front/group.go

+ 25 - 1
isrepeat/main.go

@@ -31,7 +31,8 @@ func ShowTable() {
 	fmt.Println("小工具-数据处理")
 	fmt.Println("1、导入清洗数据")
 	fmt.Println("2、推送数据")
-	fmt.Println("3、数据判重")
+	fmt.Println("3、联通数据判重")
+	fmt.Println("4、业务数据判重")
 	fmt.Println("0、EXIT")
 	fmt.Println("================================")
 }
@@ -62,6 +63,29 @@ func main() {
 			if p != "" {
 				task3()
 			}
+		} else if flag == 4 {
+			fmt.Println("请输入表名,进行数据判重.")
+			var coll, stime, etime, sType string
+			fmt.Scan(&coll)
+			if coll != "" {
+				fmt.Println("请输入开始时间, 格式:2006-01-02")
+				fmt.Scan(&stime)
+				if stime != "" {
+					fmt.Println("请输入结束时间, 格式:2006-01-02")
+					fmt.Scan(&etime)
+					if etime != "" {
+						fmt.Println("请输入排序方式,1正序、-1倒序")
+						fmt.Scan(&sType)
+						if sType != "" {
+							task4(coll, stime, etime, sType)
+						}
+					} else {
+						fmt.Println("请输入结束时间, 格式:2006-01-02")
+					}
+				} else {
+					fmt.Println("请输入开始时间, 格式:2006-01-02")
+				}
+			}
 		}
 	}
 }

+ 384 - 0
isrepeat/task.go

@@ -0,0 +1,384 @@
+package main
+
+import (
+	"go.mongodb.org/mongo-driver/bson"
+	"log"
+	"mongodb"
+	qu "qfw/util"
+	"regexp"
+	"strings"
+	"time"
+)
+
+var zhb_key_list = []string{"budget", "buyer", "agency", "s_winner", "bidamount", "projectcode", "contractcode"}
+var packreg, _ = regexp.Compile(`([a-zA-Z0-9①②ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ一二三四五六七八九十](包|标|段)[::]?)`)
+var listSource []*dataSource
+
+type dataSource struct {
+	_id, id, title                         string
+	projectname, projectcode, contractcode string
+	buyer, agency, s_winner                string
+	budget, bidamount                      float64
+	isrepeat                               bool
+	repeat_id_source                       string
+	repeat_id                              map[string]string
+	repeatText                             string
+}
+
+func task4(coll, startTime, endTime, sortType string) {
+	log.Printf("表名:%s,开始时间:%s,结束时间:%s,排序方式:%s", coll, startTime, endTime, sortType)
+	stime, _ := time.Parse(qu.Date_Short_Layout, startTime)
+	etime, _ := time.Parse(qu.Date_Short_Layout, endTime)
+	query := bson.M{}
+	query["$and"] = []interface{}{
+		bson.M{"publishtime": bson.M{"$gte": stime.Unix()}},
+		bson.M{"publishtime": bson.M{"$lte": etime.Unix()}},
+	}
+	sort := "publishtime"
+	if sortType == "-1" {
+		sort = "-publishtime"
+	}
+	log.Println(query, sort)
+	sess := Mgo.GetMgoConn()
+	defer Mgo.DestoryMongoConn(sess)
+	it := sess.DB(Mgo.DbName).C(coll).Find(query).Sort(sort).Iter()
+	index := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
+		d := &dataSource{
+			_id:          mongodb.BsonIdToSId(tmp["_id"]),
+			id:           qu.ObjToString(tmp["id"]),
+			title:        strings.ToLower(qu.ObjToString(tmp["title"])),
+			projectname:  strings.ToLower(qu.ObjToString(tmp["projectname"])),
+			projectcode:  strings.ToLower(qu.ObjToString(tmp["projectcode"])),
+			contractcode: strings.ToLower(qu.ObjToString(tmp["contractcode"])),
+			buyer:        strings.ToLower(qu.ObjToString(tmp["buyer"])),
+			agency:       strings.ToLower(qu.ObjToString(tmp["agency"])),
+			s_winner:     strings.ToLower(qu.ObjToString(tmp["s_winner"])),
+			budget:       qu.Float64All(tmp["budget"]),
+			bidamount:    qu.Float64All(tmp["bidamount"]),
+			repeat_id:    map[string]string{},
+		}
+		//log.Println(tmp["_id"], d.id)
+		if index%10000 == 0 {
+			log.Println("加载数据:", index)
+		}
+		listSource = append(listSource, d)
+		tmp = map[string]interface{}{}
+	}
+	log.Println("数据加载完成")
+	dataItem()
+	dd := 0
+	for i := 0; i < len(listSource); i++ {
+		a := listSource[i]
+		if a.isrepeat {
+			dd++
+		}
+		//更新数据
+		Mgo.UpdateById(coll, a._id,
+			map[string]interface{}{"$set": map[string]interface{}{
+				"repeatid":     a.repeat_id_source, //和那条数据重复id
+				"repeat":       a.isrepeat,         //本条数据是否本判重
+				"repeatid_ids": a.repeat_id,        //和我重复的数据都有哪些
+				"repeattext":   a.repeatText,       //本数据被判重的原因
+			}})
+		if i%1000 == 0 {
+			log.Println("已更新", i)
+		}
+	}
+	log.Println(dd)
+}
+
+var listSize = 20000
+
+func dataItem() {
+	for i := 0; i < len(listSource); i++ {
+		a := listSource[i]
+		// if a.isrepeat {
+		// 	continue
+		// }
+		b := &dataSource{}
+		for j := i + 1; j < len(listSource); j++ {
+			b = listSource[j]
+			// if b.isrepeat {
+			// 	continue
+			// }
+			a, b = panchong(*a, *b)
+			listSource[j] = b
+			listSource[i] = a
+			// if b.isrepeat {
+			// 	log.Println("sss", a.id, b.isrepeat, b.repeat_id)
+			// }
+		}
+		if i%500 == 0 {
+			log.Println("已处理:", i)
+		}
+	}
+}
+func panchong(a, b dataSource) (c, d *dataSource) {
+	switch {
+	case a.title == b.title: //标题相等
+		if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+
+		} else if a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+			if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+				if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && buyer && s_winner"
+					//log.Println("1111", a.id, b.id, b.isrepeat)
+				}
+			} else {
+				r := key_list(a, b)
+				if r {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && budget && key_list"
+				}
+			}
+		} else if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+			r := key_list(a, b)
+			if r {
+				b.repeat_id_source = a.id
+				a.repeat_id[b.id] = ""
+				b.isrepeat = true
+				b.repeatText = "标题相等 && projectcode && key_list"
+			}
+		} else if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+			if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+				if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && bidamount && buyer && s_winner"
+					//log.Println("1111", a.id, b.id, b.isrepeat)
+				}
+			} else {
+				r := key_list(a, b)
+				if r {
+					b.repeat_id_source = a.id
+					a.repeat_id[b.id] = ""
+					b.isrepeat = true
+					b.repeatText = "标题相等 && bidamount && key_list"
+				}
+			}
+		} else {
+			//
+		}
+	case a.title != b.title: //标题不相等
+		//项目名称包含及相等
+		if strings.Contains(a.projectname, b.projectname) || strings.Contains(b.projectname, a.projectname) {
+			isp := packreg.MatchString(a.title)
+			//有分包
+			if isp {
+				//项目名称相等
+				if a.projectname == b.projectname {
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+						b.repeat_id_source = a.id
+						a.repeat_id[b.id] = ""
+						b.isrepeat = true
+						b.repeatText = "标题不相等-->有分包 && projectname && bidamount"
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner && a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname && s_winner && budget"
+						}
+					}
+				} else { //项目名称包含
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+						if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && projectcode"
+						} else if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && s_winner"
+						} else if a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+							if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && budget && buyer"
+							} else if pankong(a.agency) && pankong(b.agency) && a.agency == b.agency {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->有分包 && projectname包含 && bidamount && budget && agency"
+							} else {
+								//
+							}
+						}
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner && a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->有分包 && projectname包含 && s_winner && budget"
+						} else {
+							//
+						}
+					}
+				}
+			} else { //无分包
+				//项目名称相等
+				if a.projectname == b.projectname {
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+						b.repeat_id_source = a.id
+						a.repeat_id[b.id] = ""
+						b.isrepeat = true
+						b.repeatText = "标题不相等-->无分包 && projectname && bidamount"
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname && projectcode"
+						} else if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname && s_winner"
+						} else if a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+							if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname && budget && buyer"
+							} else if pankong(a.agency) && pankong(b.agency) && a.agency == b.agency {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname && budget && agency"
+							} else {
+								//
+							}
+						}
+					}
+				} else { //项目名称包含
+					if pankong(a.contractcode) && pankong(b.contractcode) && a.contractcode != b.contractcode {
+						//
+					} else if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+						if pankong(a.projectcode) && pankong(b.projectcode) && a.projectcode == b.projectcode {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname包含 && bidamount"
+						} else if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname包含 && s_winner"
+						} else if a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+							if pankong(a.buyer) && pankong(b.buyer) && a.buyer == b.buyer {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname包含 && budget && buyer"
+							} else if pankong(a.agency) && pankong(b.agency) && a.agency == b.agency {
+								b.repeat_id_source = a.id
+								a.repeat_id[b.id] = ""
+								b.isrepeat = true
+								b.repeatText = "标题不相等-->无分包 && projectname包含 && budget && agency"
+							} else {
+								//
+							}
+						} else {
+							//
+						}
+					} else if a.bidamount != b.bidamount {
+						//
+					} else {
+						if pankong(a.s_winner) && pankong(b.s_winner) && a.s_winner == b.s_winner && a.budget == a.budget && (a.budget > 0 || b.budget > 0) {
+							b.repeat_id_source = a.id
+							a.repeat_id[b.id] = ""
+							b.isrepeat = true
+							b.repeatText = "标题不相等-->无分包 && projectname包含 && s_winner && budget"
+						}
+					}
+				}
+			}
+		}
+	default:
+
+	}
+	return &a, &b
+}
+
+// zhb_key_list 判断
+// "budget", "buyer", "agency", "s_winner", "bidamount", "projectcode", "contractcode"
+func key_list(a, b dataSource) bool {
+	for i := 0; i < len(zhb_key_list); i++ {
+		key := zhb_key_list[i]
+		switch key {
+		case "budget":
+			if a.budget == b.budget && (a.budget > 0 || b.budget > 0) {
+				continue
+			} else {
+				return false
+			}
+		case "buyer":
+			if a.buyer == b.buyer && pankong(a.buyer) && pankong(b.buyer) {
+				continue
+			} else {
+				return false
+			}
+		case "agency":
+			if a.agency == b.agency && pankong(a.agency) && pankong(b.agency) {
+				continue
+			} else {
+				return false
+			}
+		case "s_winner":
+			if a.s_winner == b.s_winner && pankong(a.s_winner) && pankong(b.s_winner) {
+				continue
+			} else {
+				return false
+			}
+		case "bidamount":
+			if a.bidamount == b.bidamount && (a.bidamount > 0 || b.bidamount > 0) {
+				continue
+			} else {
+				return false
+			}
+		case "projectcode":
+			if a.projectcode == b.projectcode && pankong(a.projectcode) && pankong(b.projectcode) {
+				continue
+			} else {
+				return false
+			}
+		case "contractcode":
+			if a.contractcode == b.contractcode && pankong(a.contractcode) && pankong(b.contractcode) {
+				continue
+			} else {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func pankong(a string) bool {
+	if a != "" {
+		return true
+	} else {
+		return false
+	}
+}

+ 10 - 1
src/front/group.go

@@ -923,6 +923,7 @@ func (f *Front) GroupImportData() {
 	c1 := 0 // 多包第n条数据
 	for rn, row := range rows {
 		update := make(map[string]interface{})
+		del := make(map[string]interface{})
 		if rn == 0 {
 			for index, cell := range row.Cells {
 				if cell.Value == "唯一标识" || cell.Value == "信息标识" { //id所在列
@@ -950,6 +951,10 @@ func (f *Front) GroupImportData() {
 					} else {
 						update[fmt.Sprintf("v_baseinfo.%s", f)] = val
 					}
+				} else {
+					if f != "is_push" {
+						del[fmt.Sprintf("v_baseinfo.%s", f)] = "1"
+					}
 				}
 			}
 			if qu.IntAll(update["v_baseinfo.multipackage"]) == 1 {
@@ -1006,7 +1011,11 @@ func (f *Front) GroupImportData() {
 			// 临时
 			update["tag"] = "临时"
 			lastid = id
-			util.Mgo.UpdateById(util.DATACOLLNAME, id, bson.M{"$set": update})
+			if len(del) > 0 {
+				util.Mgo.UpdateById(util.DATACOLLNAME, id, bson.M{"$set": update, "$unset": del})
+			} else {
+				util.Mgo.UpdateById(util.DATACOLLNAME, id, bson.M{"$set": update})
+			}
 		}
 	}
 	f.ServeJson(map[string]interface{}{"success": true, "msg": "数据导入成功", "count": count})