zhengkun 2 år sedan
förälder
incheckning
3d4690cea4

+ 289 - 0
data_quality/src/figure/figure.go

@@ -0,0 +1,289 @@
+package figure
+
+import (
+	"fmt"
+	log "github.com/donnie4w/go-logger/logger"
+	"go.mongodb.org/mongo-driver/bson"
+	qu "qfw/util"
+	"strings"
+	"time"
+	u "ul"
+	"unicode/utf8"
+)
+
+func FigureUnLevelScreenNumber() {
+	dataArr, _ := u.Ext_Mgo.Find("zktest_marked_info", map[string]interface{}{
+		"qua_rejecty": 1,
+	}, nil, map[string]interface{}{
+		"subtype": 1,
+		"qua_res": 1,
+		"qua_tag": 1,
+	})
+	n1, n2, n3, n4 := 0, 0, 0, 0
+	n5, n6, n7, n8 := 0, 0, 0, 0
+	for _, v := range dataArr {
+		subtype := qu.ObjToString(v["subtype"])
+		qua_tag := qu.IntAll(v["qua_tag"])
+		qua_res := *qu.ObjToMap(v["qua_res"])
+		qua_level := qu.IntAll(qua_res["qua_level"])
+		if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+			if qua_level == -1 {
+				if qua_tag == 1 {
+					n3++
+				} else {
+					n4++
+				}
+			}
+			if qua_level == 0 {
+				if qua_tag == 1 {
+					n7++
+				} else {
+					n8++
+				}
+			}
+		} else {
+			if qua_level == -1 {
+				if qua_tag == 1 {
+					n1++
+				} else {
+					n2++
+				}
+			}
+			if qua_level == 0 {
+				if qua_tag == 1 {
+					n5++
+				} else {
+					n6++
+				}
+			}
+		}
+	}
+	log.Debug("不可信级别招标~", n1, "~", n2)
+	log.Debug("不可信级别结果~", n3, "~", n4)
+	log.Debug("边缘级别招标~", n5, "~", n6)
+	log.Debug("边缘级别结果~", n7, "~", n8)
+
+}
+
+//根据不可信维度-再度提取-可信数据
+func FigureUnLevelScreenInfo() {
+	//加载正文信息
+	q, total1 := map[string]interface{}{}, 0
+	det_res := map[string]string{}
+	sess := u.Ext_Mgo.GetMgoConn()
+	defer u.Ext_Mgo.DestoryMongoConn(sess)
+	it := sess.DB(u.Ext_Mgo.DbName).C("zktest_marked_source").Find(&q).Sort("_id").Select(map[string]interface{}{
+		"detail":   1,
+		"filetext": 1,
+	}).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total1++ {
+		if total1%10000 == 0 {
+			log.Debug("cur index ", total1, "~", len(det_res))
+		}
+		detail := qu.ObjToString(tmp["detail"])
+		filetext := qu.ObjToString(tmp["filetext"])
+		tmpid := u.BsonTOStringId(tmp["_id"])
+		if utf8.RuneCountInString(detail) < 100 && filetext == "" && !strings.Contains(detail, "详情请访问原网页") {
+			det_res[tmpid] = tmpid
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over ...", total1, "~", len(det_res))
+
+	total2, isok1, isok2 := 0, 0, 0
+	sess = u.Ext_Mgo.GetMgoConn()
+	defer u.Ext_Mgo.DestoryMongoConn(sess)
+	it = sess.DB(u.Ext_Mgo.DbName).C("zktest_marked_info").Find(&q).Sort("_id").Select(map[string]interface{}{
+		"qua_res":  1,
+		"qua_tag":  1,
+		"buyer":    1,
+		"s_winner": 1,
+		"subtype":  1,
+	}).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total2++ {
+		if total2%10000 == 0 {
+			log.Debug("cur index ", total2, "~", isok1, "~", isok2)
+		}
+		tmpid := u.BsonTOStringId(tmp["_id"])
+		buyer := qu.ObjToString(tmp["buyer"])
+		s_winner := qu.ObjToString(tmp["s_winner"])
+		subtype := qu.ObjToString(tmp["subtype"])
+		qua_res := *qu.ObjToMap(tmp["qua_res"])
+		qua_level := qu.IntAll(qua_res["qua_level"])
+		qua_tag := qu.IntAll(tmp["qua_tag"])
+		if qua_level == 0 || qua_level == -1 {
+			if det_res[tmpid] != "" {
+				if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+					if s_winner != "" && buyer != "" {
+						if qua_tag == 2 {
+							isok2++
+						} else {
+							isok1++
+						}
+						u.Ext_Mgo.UpdateById("zktest_marked_info", tmpid, map[string]interface{}{
+							"$set": map[string]interface{}{
+								"qua_rejecty": 1,
+							},
+						})
+					}
+				} else {
+					if buyer != "" {
+						if qua_tag == 2 {
+							isok2++
+						} else {
+							isok1++
+						}
+						u.Ext_Mgo.UpdateById("zktest_marked_info", tmpid, map[string]interface{}{
+							"$set": map[string]interface{}{
+								"qua_rejecty": 1,
+							},
+						})
+					}
+				}
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over ... ", total2, "~", isok1, "~", isok2)
+	time.Sleep(time.Second * 2)
+}
+
+//评估占比
+func FigurelQualityLevelFirst() {
+	n1, n2, n3, n4 := 0, 0, 0, 0
+	dataArr, _ := u.Ext_Mgo.Find("zktest_marked_source", nil, nil, bson.M{"qua_res": 1, "subtype": 1})
+	log.Debug("查询...", len(dataArr))
+	for _, v := range dataArr {
+		qua_res := qu.IntAll(v["qua_res"])
+		subtype := qu.ObjToString(v["subtype"])
+		if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+			if qua_res == 1 {
+				n3++
+			} else {
+				n4++
+			}
+		} else {
+			if qua_res == 1 {
+				n1++
+			} else {
+				n2++
+			}
+		}
+	}
+	log.Debug("招标类:", n1, "~", n2)
+	log.Debug("结果类:", n3, "~", n4)
+}
+
+//评估占比
+func FigurelQualityLevelSecond() {
+	q, total := map[string]interface{}{}, 0
+	sess := u.Ext_Mgo.GetMgoConn()
+	defer u.Ext_Mgo.DestoryMongoConn(sess)
+	it := sess.DB(u.Ext_Mgo.DbName).C("zktest_marked_info").Find(&q).Sort("_id").Iter()
+	zb_info, jg_info := map[string]map[string]int{}, map[string]map[string]int{}
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total)
+		}
+		subtype := qu.ObjToString(tmp["subtype"])
+		qua_tag := fmt.Sprintf("%d", qu.IntAll(tmp["qua_tag"]))
+		qua := *qu.ObjToMap(tmp["qua_res"])
+		level := qu.IntAll(qua["qua_level"])
+		key := fmt.Sprintf("%d", level)
+		if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+			info := jg_info[key]
+			if info == nil {
+				info = map[string]int{}
+			}
+			num := qu.IntAll(info[qua_tag])
+			info[qua_tag] = num + 1
+			jg_info[key] = info
+		} else {
+			info := zb_info[key]
+			if info == nil {
+				info = map[string]int{}
+			}
+			num := qu.IntAll(info[qua_tag])
+			info[qua_tag] = num + 1
+			zb_info[key] = info
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("可信招标类:", zb_info["1"]["1"]+zb_info["1"]["2"], "~", zb_info["1"]["1"], "~", zb_info["1"]["2"])
+	log.Debug("模糊招标类:", zb_info["0"]["1"]+zb_info["0"]["2"], "~", zb_info["0"]["1"], "~", zb_info["0"]["2"])
+	log.Debug("不可信招类:", zb_info["-1"]["1"]+zb_info["-1"]["2"], "~", zb_info["-1"]["1"], "~", zb_info["-1"]["2"])
+	log.Debug("可信结果类:", jg_info["1"]["1"]+jg_info["1"]["2"], "~", jg_info["1"]["1"], "~", jg_info["1"]["2"])
+	log.Debug("模糊结果类:", jg_info["0"]["1"]+jg_info["0"]["2"], "~", jg_info["0"]["1"], "~", jg_info["0"]["2"])
+	log.Debug("不可信结类:", jg_info["-1"]["1"]+jg_info["-1"]["2"], "~", jg_info["-1"]["1"], "~", jg_info["-1"]["2"])
+}
+
+//不考虑area 与 city 计算是否修改率
+func FigurelSourceInfoQuantity() {
+	q, total := map[string]interface{}{}, 0
+	sess := u.Ext_Mgo.GetMgoConn()
+	defer u.Ext_Mgo.DestoryMongoConn(sess)
+	it := sess.DB(u.Ext_Mgo.DbName).C("zktest_marked_source").Find(&q).Sort("_id").Select(map[string]interface{}{
+		"ck_bidamount":   1,
+		"ck_budget":      1,
+		"ck_buyer":       1,
+		"ck_s_winner":    1,
+		"ck_projectcode": 1,
+		"ck_projectname": 1,
+		"subtype":        1,
+	}).Iter()
+	isok1, isok2 := 0, 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total, "~", isok1, "~", isok2)
+		}
+		subtype := qu.ObjToString(tmp["subtype"])
+		isT := true
+		for k, v := range tmp {
+			if strings.Contains(k, "ck_") {
+				if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+					if qu.IntAll(v) > 1 {
+						isT = false
+						break
+					}
+				} else {
+					if k == "ck_bidamount" || k == "ck_s_winner" {
+
+					} else {
+						if qu.IntAll(v) > 1 {
+							isT = false
+							break
+						}
+					}
+				}
+			}
+		}
+		tmpid := u.BsonTOStringId(tmp["_id"])
+		if isT {
+			isok1++
+			u.Ext_Mgo.UpdateById("zktest_marked_source", tmpid, map[string]interface{}{
+				"$set": map[string]interface{}{
+					"qua_tag": 1,
+				},
+			})
+			u.Ext_Mgo.UpdateById("zktest_marked_info", tmpid, map[string]interface{}{
+				"$set": map[string]interface{}{
+					"qua_tag": 1,
+				},
+			})
+		} else {
+			isok2++
+			u.Ext_Mgo.UpdateById("zktest_marked_source", tmpid, map[string]interface{}{
+				"$set": map[string]interface{}{
+					"qua_tag": 2,
+				},
+			})
+			u.Ext_Mgo.UpdateById("zktest_marked_info", tmpid, map[string]interface{}{
+				"$set": map[string]interface{}{
+					"qua_tag": 2,
+				},
+			})
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over ...", total, "~", isok1, "~", isok2)
+}

+ 8 - 0
data_quality/src/figure/govalueate.go

@@ -0,0 +1,8 @@
+package figure
+
+func GoValueateTest() {
+
+	expr, err := govaluate.NewEvaluableExpression("(10 > 0) && (2.1 == 2.1) && 'service is ok' == 'service is ok'" +
+		" && 1 in (1,2) && 'code1' in ('code3','code2',1)")
+
+}

+ 10 - 5
data_quality/src/main.go

@@ -1,19 +1,24 @@
 package main
 
 import (
-	log "github.com/donnie4w/go-logger/logger"
-	"qua"
+	"figure"
 	u "ul"
 )
 
 func init() {
-	u.IsLocal = true
+	//u.IsLocal = true
 	u.InitClass()
 }
 
 func main() {
-	log.Debug("main ...")
-	qua.QuaFieldScore([]string{"budget", "bidamount"}, "zktest_marked_info")
+	//qua.QuaFieldScore([]string{"budget", "bidamount", "buyer", "s_winner", "projectname", "projectcode"}, "zktest_marked_info")
+	//figure.FigurelQualityLevelFirst()
+	//figure.FigurelQualityLevelSecond()
+	//figure.FigurelSourceInfoQuantity()
+
+	figure.FigureUnLevelScreenInfo()
+	figure.FigureUnLevelScreenNumber()
+
 	//保持活性
 	lock := make(chan bool)
 	<-lock

+ 3 - 5
data_quality/src/qua/method.go

@@ -1,6 +1,7 @@
 package qua
 
 import (
+	log "github.com/donnie4w/go-logger/logger"
 	"github.com/go-ego/gse"
 	qu "qfw/util"
 	"regexp"
@@ -28,7 +29,7 @@ var codeUnLenReg *regexp.Regexp = regexp.MustCompile("([\u4e00-\u9fa5]{9,})")
 var classMoneyScope map[string]map[string]interface{}
 
 func init() {
-	GSE.LoadDict("./web/qua_res/dictionary.txt")
+	GSE.LoadDict("dictionary.txt")
 	//t>d>p
 	classMoneyScope = map[string]map[string]interface{}{
 		"建筑工程": {"min": 10000, "max": 10000000000},
@@ -44,6 +45,7 @@ func init() {
 		"市政设施": {"min": 1000, "max": 10000000000},
 		"农林牧渔": {"min": 100, "max": 10000000},
 	}
+	log.Debug("切词加载完毕...")
 }
 
 //行业金额校验
@@ -178,14 +180,10 @@ func FieldSourceScore(source map[string]interface{}) map[string]int64 {
 			ext_from = normalizedExtFromName(ext_from)
 			if ext_from == "winnerorder" || ext_from == "package" ||
 				ext_from == "jsondata" || ext_type == "" {
-				u.QyLock.Lock()
 				score[key] = qu.Int64All(u.Ext_From[ext_from])
-				u.QyLock.Unlock()
 			} else {
-				u.QyLock.Lock()
 				s := qu.Int64All(u.Ext_From[ext_from]) + qu.Int64All(u.Ext_Type[ext_type])
 				score[key] = s / 2
-				u.QyLock.Unlock()
 			}
 		}
 	}

+ 53 - 100
data_quality/src/qua/quality.go

@@ -2,11 +2,12 @@ package qua
 
 import (
 	log "github.com/donnie4w/go-logger/logger"
-	"go.mongodb.org/mongo-driver/bson/primitive"
 	qu "qfw/util"
 	u "ul"
 )
 
+var QuaScore = int64(98)
+
 func QuaFieldScore(field_tag []string, coll_name string) {
 	if coll_name == "" || len(field_tag) <= 0 {
 		return
@@ -16,14 +17,23 @@ func QuaFieldScore(field_tag []string, coll_name string) {
 	defer u.Ext_Mgo.DestoryMongoConn(sess)
 	it := sess.DB(u.Ext_Mgo.DbName).C(coll_name).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
+		if total%10000 == 0 {
 			log.Debug("当前数量:", total)
 		}
-		update := calculateFieldScore(tmp, field_tag)
-		u.Ext_Mgo.UpdateById(coll_name, u.BsonTOStringId(tmp["_id"]), update)
+		u.Pool_Mgo <- true
+		u.Wg_Mgo.Add(1)
+		go func(tmp map[string]interface{}, field_tag []string) {
+			defer func() {
+				<-u.Pool_Mgo
+				u.Wg_Mgo.Done()
+			}()
+			update := calculateFieldScore(tmp, field_tag)
+			u.Ext_Mgo.UpdateById(coll_name, u.BsonTOStringId(tmp["_id"]), update)
+		}(tmp, field_tag)
 		tmp = make(map[string]interface{})
 	}
-	log.Debug("is over ......")
+	u.Wg_Mgo.Wait()
+	log.Debug("is over ...", total)
 }
 
 //计算字段分
@@ -31,123 +41,66 @@ func calculateFieldScore(tmp map[string]interface{}, field_tag []string) map[str
 	//所有字段
 	source := *qu.ObjToMap(tmp["field_source"])
 	f_s := FieldSourceScore(source) //打初始分
-	update_dict := make(map[string]interface{}, 0)
+	update := make(map[string]interface{}, 0)
 	buyer_s := BuyerFieldScore(tmp, f_s["buyer"])
 	budget_s := BudgetFieldScore(tmp, f_s["budget"])
 	projectname_s := ProjectnameFieldScore(tmp, f_s["projectname"])
 	projectcode_s := ProjectcodeFieldScore(tmp, f_s["projectcode"])
-
-	update_dict["buyer"] = buyer_s
-	update_dict["budget"] = budget_s
-	update_dict["projectname"] = projectname_s
-	update_dict["projectcode"] = projectcode_s
+	update["buyer"] = buyer_s
+	update["budget"] = budget_s
+	update["projectname"] = projectname_s
+	update["projectcode"] = projectcode_s
 
 	subtype := qu.ObjToString(tmp["subtype"])
 	if subtype == "中标" || subtype == "成交" || subtype == "合同" {
 		s_winner_s := WinnerFieldScore(tmp, f_s["s_winner"])
-		update_dict["s_winner"] = s_winner_s
+		update["s_winner"] = s_winner_s
 		bidamount_s := BidamountFieldScore(tmp, f_s["bidamount"])
-		update_dict["bidamount"] = bidamount_s
+		update["bidamount"] = bidamount_s
 	}
 
-	isUse := true
-	for _, key := range field_tag {
-		value := *qu.ObjToMap(update_dict[key])
-		score := qu.Int64All(value["score"])
-		if score < 98 {
-			isUse = false //需要标注
-			break
-		}
-	}
+	level := valuateTheRating(subtype, field_tag, update)
 	return map[string]interface{}{
 		"$set": map[string]interface{}{
 			"qua_res": map[string]interface{}{
-				"qua_score": update_dict,
-				"qua_used":  isUse,
+				"qua_score": update,
+				"qua_level": level,
 				"qua_rule":  field_tag,
 			},
 		},
 	}
 }
 
-//标的物-计算
-func calculatePrchasinglist(tmp map[string]interface{}) bool {
-	b_ispurchasing := true //默认需要标注
-	if purchasinglist, ok := tmp["purchasinglist"].([]interface{}); ok {
-		if len(purchasinglist) <= 0 {
-			return b_ispurchasing
-		}
-		isListUserArr := []bool{}
-		for _, data := range purchasinglist {
-			isUse := isUsePurchasing(*qu.ObjToMap(data))
-			isListUserArr = append(isListUserArr, isUse)
-		}
-		b_ispurchasing = !isUseAllPurchasing(isListUserArr)
-		return b_ispurchasing
-	}
-	if purchasinglistA, okA := tmp["purchasinglist"].(primitive.A); okA {
-		if len(purchasinglistA) <= 0 {
-			return b_ispurchasing
-		}
-		isListUserArr := []bool{}
-		for _, data := range purchasinglistA {
-			isUse := isUsePurchasing(*qu.ObjToMap(data))
-			isListUserArr = append(isListUserArr, isUse)
-		}
-		b_ispurchasing = !isUseAllPurchasing(isListUserArr)
-	}
-	return b_ispurchasing
-}
-
-//判断各个标的物是否可用  默认不可用
-func isUsePurchasing(data map[string]interface{}) bool {
-	isUse := false
-	num := 1
-	total_score := qu.Float64All(data["score"])
-	if total_score <= float64(0) {
-		return isUse
-	}
-	ner_data := *qu.ObjToMap(data["ner_data"])
-	NerGoods := *qu.ObjToMap(ner_data["NerGoods"])
-	for _, v := range NerGoods {
-		num++
-		total_score += qu.Float64All(v)
-	}
-	//计算总平均分输
-	if total_score/float64(num) > float64(0.3) {
-		isUse = true
-	}
-	return isUse
-}
-
-//是否整体可用 - 默认不可用
-func isUseAllPurchasing(isListUserArr []bool) bool {
-	isUse := false
-	total_num := len(isListUserArr)
-	ok_num := 0
-	for _, v := range isListUserArr {
-		if v {
-			ok_num++
+//评定整体定级
+func valuateTheRating(subtype string, field_tag []string, update map[string]interface{}) int {
+	isErr, level := 0, -1
+	for _, key := range field_tag {
+		value := *qu.ObjToMap(update[key])
+		score := qu.Int64All(value["score"])
+		if key == "bidamount" || key == "s_winner" {
+			if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+				if score < QuaScore {
+					isErr++
+				}
+			}
+		} else {
+			if score < QuaScore {
+				isErr++
+			}
 		}
 	}
-	if ok_num == 0 {
-		return isUse
-	}
-	correct := float64(ok_num) / float64(total_num)
-	if total_num <= 2 {
-		if correct >= float64(1) {
-			isUse = true
+	if isErr == 0 {
+		level = 1
+	} else if isErr == 1 {
+		if value := *qu.ObjToMap(update["buyer"]); qu.Int64All(value["score"]) >= QuaScore {
+			if subtype == "中标" || subtype == "成交" || subtype == "合同" {
+				if value1 := *qu.ObjToMap(update["s_winner"]); qu.Int64All(value1["score"]) >= QuaScore {
+					level = 0
+				}
+			} else {
+				level = 0
+			}
 		}
-	} else if total_num > 2 && total_num <= 6 {
-		if correct >= float64(0.6) {
-			isUse = true
-		}
-	} else if total_num > 7 {
-		if correct >= float64(0.5) {
-			isUse = true
-		}
-	} else {
-
 	}
-	return isUse
+	return level
 }

+ 1 - 1
data_quality/src/qua/scoreprojectname.go

@@ -33,7 +33,7 @@ func ProjectnameFieldScore(tmp map[string]interface{}, score int64) map[string]i
 	    "中文比例": 2
 	*/
 
-	if utf8.RuneCountInString(projectname) >= 40 {
+	if utf8.RuneCountInString(projectname) >= 50 {
 		desc = "长度过长"
 		reason += "~" + desc + fmt.Sprintf("-%d", qu.Int64All(u.Projectname_Score[desc]))
 		score -= qu.Int64All(u.Projectname_Score[desc])

+ 2 - 2
data_quality/src/ul/initcfg.go

@@ -17,8 +17,8 @@ var (
 	Projectname_Score, Projectcode_Score map[string]interface{}
 )
 
-//var Pool_Mgo = make(chan bool, 10)
-//var Wg_Mgo = &sync.WaitGroup{}
+var Pool_Mgo = make(chan bool, 10)
+var Wg_Mgo = &sync.WaitGroup{}
 
 func InitClass() {
 	initMgoInfo()

+ 2 - 23
data_ylqx/src/main.go

@@ -1,11 +1,11 @@
 package main
 
 import (
+	"figure"
 	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
 	"go.mongodb.org/mongo-driver/bson"
 	qu "qfw/util"
-	"strings"
 	ul "util"
 )
 
@@ -41,28 +41,7 @@ func main() {
 
 	//T.RepairPurchasingJyHref()
 
-	dataArr, _ := ul.Save_Mgo.Find("zktest_marked_info", nil, nil, nil)
-	ok1, ok2 := 0, 0
-	for k, v := range dataArr {
-		if k%10000 == 0 {
-			log.Debug("cur index ", k, "~", ok1, "~", ok2)
-		}
-		isT := true
-		for k1, v1 := range v {
-			if strings.Contains(k1, "ck_") {
-				if qu.IntAll(v1) > 1 {
-					isT = false
-					break
-				}
-			}
-		}
-		if isT {
-			ok1++
-		} else {
-			ok2++
-		}
-	}
-	log.Debug(ok1, "~", ok2)
+	figure.GoValueateTest()
 
 	//保持活性
 	lock := make(chan bool)

+ 0 - 2
listen_data/src/main.go

@@ -160,8 +160,6 @@ func main() {
 	//decodeSEUrl("QltFK2h2OQMIUFwVV0dybyZpVAkFWEdY")
 	//exportErrInfo0515()
 
-	export0613()
-
 	return
 
 	/*