Browse Source

标注对比

zhangjinkun 6 years ago
parent
commit
c010655871
2 changed files with 128 additions and 6 deletions
  1. 4 4
      versioncomparison/config.json
  2. 124 2
      versioncomparison/main.go

+ 4 - 4
versioncomparison/config.json

@@ -1,9 +1,9 @@
 {
-    "premgo": "192.168.3.207:27081",
-    "predb": "qfw",
+    "premgo": "192.168.3.207:27082",
+    "predb": "extract_kf",
     "prec": "result_v3",
-    "newmgo": "192.168.3.207:27081",
-    "newdb": "extract_v3",
+    "newmgo": "192.168.3.207:27082",
+    "newdb": "extract_kf",
     "newc": "result_data",
     "fields": [
         "projectname",

+ 124 - 2
versioncomparison/main.go

@@ -23,6 +23,7 @@ var (
 	Sid, Eid    string
 	Fields      []string
 	FieldsQuery string
+	Url         = "https://www.jianyu360.com/article/content/%s.html"
 )
 
 type Compare struct {
@@ -59,7 +60,9 @@ func init() {
 func main() {
 	getVersionData()
 	createXlsx()
+	//biaozhucompare()
 }
+
 func createXlsx() {
 	xf, err := xlsx.OpenFile("template.xlsx")
 	if err != nil {
@@ -87,7 +90,7 @@ func createXlsx() {
 		}
 	}
 	//生成信息sheet
-	url := "https://www.jianyu360.com/article/content/%s.html"
+
 	for _, field := range Fields {
 		sh, _ := xf.AddSheet(field)
 		rowh := sh.AddRow()
@@ -102,7 +105,7 @@ func createXlsx() {
 				row.AddCell().SetString(k)
 				row.AddCell().SetString(v.PreVal)
 				row.AddCell().SetString(v.NewVal)
-				row.AddCell().SetString(fmt.Sprintf(url, qu.CommonEncodeArticle("content", v.Id)))
+				row.AddCell().SetString(fmt.Sprintf(Url, qu.CommonEncodeArticle("content", v.Id)))
 			}
 		}
 	}
@@ -184,3 +187,122 @@ func getVersionData() {
 		Compares[k] = cp
 	}
 }
+
+type BidData struct {
+	id  string
+	key map[string]interface{}
+}
+type BidCom struct {
+	Val []int
+	Ids []map[string]interface{}
+}
+
+//标注正确率统计
+func biaozhucompare() {
+	exts, _ := Newmgo.Find("bid_v3", `{}`, `{"_id":1}`, nil, false, -1, -1)
+	extDatas := []BidData{}
+	for _, v := range *exts {
+		key := map[string]interface{}{
+			"projectname": v["projectname"],
+			"projectcode": v["projectcode"],
+			"buyer":       v["buyer"],
+			"budget":      qu.Float64All(v["budget"]),
+			"bidamount":   qu.Float64All(v["bidamount"]),
+			"agency":      v["agency"],
+			"buyerperson": v["buyerperson"],
+			"buyertel":    v["buyertel"],
+		}
+		ext := BidData{
+			id:  qu.BsonIdToSId(v["_id"]),
+			key: key,
+		}
+		extDatas = append(extDatas, ext)
+	}
+	log.Println("exts ok")
+	bzs, _ := Newmgo.Find("bid_biaozhuid", `{}`, `{"_id":1}`, nil, false, -1, -1)
+	bzDatas := []BidData{}
+	for _, v := range *bzs {
+		bidamount := float64(0)
+		if bigprices, ok := v["bigprice"].([]interface{}); ok {
+			bidamount = qu.Float64All(bigprices[0])
+		}
+		key := map[string]interface{}{
+			"projectname": qu.ObjToString(v["projectname"]),
+			"projectcode": qu.ObjToString(v["projectcode"]),           //qu.If(qu.ObjToString(v["t_bidno"]) == "", qu.ObjToString(v["b_projectno"]), qu.ObjToString(v["t_bidno"])),
+			"buyer":       qu.ObjToString(v["buyer"]),                 // qu.If(qu.ObjToString(v["t_buyer"]) == "", qu.ObjToString(v["b_buyer"]), qu.ObjToString(v["t_buyer"])),
+			"budget":      qu.Float64All(qu.ObjToString(v["budget"])), //  qu.Float64All(qu.ObjToString(v["t_budget"])),
+			"bidamount":   bidamount,
+			"agency":      qu.ObjToString(v["agency"]),
+			"buyerperson": qu.ObjToString(v["buyerperson"]),
+			"buyertel":    qu.ObjToString(v["buyertel"]),
+		}
+		bz := BidData{
+			id:  qu.BsonIdToSId(v["_id"]),
+			key: key,
+		}
+		bzDatas = append(bzDatas, bz)
+	}
+	log.Println("bzs ok")
+
+	bcoms := map[string]*BidCom{}
+	for _, ext := range extDatas {
+		for _, bz := range bzDatas {
+			if bz.id == ext.id {
+				for key, val := range ext.key {
+					//					if key == "budget" {
+					//						log.Println(key, ext.key[key], ";;;;;", bz.key[key])
+					//					}
+					if qu.ObjToString(val) != "" || qu.Float64All(val) > 0 {
+						bcom := bcoms[key]
+						if bcom == nil {
+							bcom = &BidCom{
+								Val: []int{0, 0},
+								Ids: []map[string]interface{}{},
+							}
+						}
+						if val == bz.key[key] {
+							bcom.Val[0] += 1
+						} else {
+							bcom.Val[1] += 1
+							tmp := map[string]interface{}{
+								"id":  ext.id,
+								"ext": val,
+								"bz":  bz.key[key],
+							}
+							bcom.Ids = append(bcom.Ids, tmp)
+						}
+						bcoms[key] = bcom
+					}
+				}
+				break
+			}
+		}
+	}
+	xl := xlsx.NewFile()
+	sh, _ := xl.AddSheet("统计")
+	h := sh.AddRow()
+	h.AddCell().SetString("field")
+	h.AddCell().SetString("相同")
+	h.AddCell().SetString("不同")
+	for k, v := range bcoms {
+		row := sh.AddRow()
+		row.AddCell().SetString(k)
+		row.AddCell().SetInt(v.Val[0])
+		row.AddCell().SetInt(v.Val[1])
+		ksh, _ := xl.AddSheet(k)
+		rh := ksh.AddRow()
+		rh.AddCell().SetString("id")
+		rh.AddCell().SetString("标注")
+		rh.AddCell().SetString("抽取")
+		rh.AddCell().SetString("url")
+		for _, v := range v.Ids {
+			rw := ksh.AddRow()
+			rw.AddCell().SetString(qu.ObjToString(v["id"]))
+			rw.AddCell().SetString(fmt.Sprint(v["bz"]))
+			rw.AddCell().SetString(fmt.Sprint(v["ext"]))
+			rw.AddCell().SetString(fmt.Sprintf(Url, qu.CommonEncodeArticle("content", qu.ObjToString(v["id"]))))
+		}
+		log.Println(k, v.Val)
+	}
+	xl.Save("ext_bz.xlsx")
+}