Bläddra i källkod

Merge branch 'dev3.1.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.1.2

fengweiqiang 6 år sedan
förälder
incheckning
19f3112cef

BIN
src/doc/数据字典.xlsx


+ 4 - 0
src/jy/extract/exportask.go

@@ -4,6 +4,7 @@ package extract
 import (
 	"fmt"
 	db "jy/mongodbutil"
+	ju "jy/util"
 	"log"
 	qu "qfw/util"
 	"time"
@@ -55,6 +56,9 @@ func extractAndExport(v string, t map[string]interface{}) {
 	e.InitRuleCore()
 	e.InitTag()
 	e.InitClearFn()
+	//品牌抽取是否开启
+	ju.IsBrandGoods = ju.Config["brandgoods"].(bool)
+
 	query := t["query"]
 	limit := qu.IntAll(t["limit"])
 	list, _ := e.TaskInfo.FDB.Find(e.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)

+ 1 - 1
src/jy/extract/extpackage.go

@@ -14,7 +14,7 @@ func PackageDetail(j *ju.Job, e *ExtractTask) {
 	qu.Try(func() {
 		if len(j.BlockPackage) > 0 {
 			packageResult := map[string]map[string]interface{}{}
-			packagenum := len(j.BlockPackage)
+			//packagenum := len(j.BlockPackage)
 			for pkName, pkg := range j.BlockPackage {
 				//是否清理标记
 				clearmap := map[string]bool{}

+ 54 - 54
src/jy/extract/extract.go

@@ -648,7 +648,7 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 				tmp := j.Result[in.Field]
 				exts := []interface{}{}
 				for k, v := range tmp {
-					if v.Type == "table" { //table抽取到的数据不清理
+					if v.Type == "table" && v.Field != "projectname" { //table抽取到的数据不清理
 						continue
 					}
 					text := qu.ObjToString(v.Value)
@@ -858,66 +858,66 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 			}
 		}
 
-	//质量审核
-	if ju.Config["qualityaudit"].(bool) {
-		e.QualityAudit(tmp)
-	}
-	if e.IsExtractCity { //城市抽取
-		b, p, c, d := e.TransmitData(tmp, _id) //抽取省份城市
-		//log.Println("省份---", p, "城市---", c, "区---", d)
-		tmp["district"] = d
-		if b {
-			tmp["city"] = c
-			tmp["area"] = p
-		}
-	}
-	//品牌抽取
-	if ju.IsBrandGoods {
-		tmp["checkhas"] = map[string]int{
-			"hastable": j.HasTable,
-			"hasgoods": j.HasGoods,
-			"hasbrand": j.HasBrand,
-			"haskey":   j.HasKey,
+		//质量审核
+		if ju.Config["qualityaudit"].(bool) {
+			e.QualityAudit(tmp)
 		}
-		if len(j.BrandData) > 0 {
-			tmp["brand"] = j.BrandData
+		if e.IsExtractCity { //城市抽取
+			b, p, c, d := e.TransmitData(tmp, _id) //抽取省份城市
+			//log.Println("省份---", p, "城市---", c, "区---", d)
+			tmp["district"] = d
+			if b {
+				tmp["city"] = c
+				tmp["area"] = p
+			}
 		}
-		//log.Println("============", j.HasBrand, j.HasGoods, j.HasKey, j.HasTable, j.BrandData)
-	}
-	if e.TaskInfo.TestColl == "" {
-		if len(tmp) > 0 { //保存抽取结果
-			tmparr := []map[string]interface{}{
-				map[string]interface{}{
-					"_id": qu.StringTOBsonId(_id),
-				},
-				map[string]interface{}{"$set": tmp},
+		//品牌抽取
+		if ju.IsBrandGoods {
+			tmp["checkhas"] = map[string]int{
+				"hastable": j.HasTable,
+				"hasgoods": j.HasGoods,
+				"hasbrand": j.HasBrand,
+				"haskey":   j.HasKey,
 			}
-			e.BidArr = append(e.BidArr, tmparr)
+			if len(j.BrandData) > 0 {
+				tmp["brand"] = j.BrandData
+			}
+			//log.Println("============", j.HasBrand, j.HasGoods, j.HasKey, j.HasTable, j.BrandData)
 		}
-		if b, ok := ju.Config["saveresult"].(bool); ok && b {
-			id := tmp["_id"]
-			tmp["result"] = result
+		if e.TaskInfo.TestColl == "" {
+			if len(tmp) > 0 { //保存抽取结果
+				tmparr := []map[string]interface{}{
+					map[string]interface{}{
+						"_id": qu.StringTOBsonId(_id),
+					},
+					map[string]interface{}{"$set": tmp},
+				}
+				e.BidArr = append(e.BidArr, tmparr)
+			}
+			if b, ok := ju.Config["saveresult"].(bool); ok && b {
+				id := tmp["_id"]
+				tmp["result"] = result
+				delete(tmp, "_id")
+				tmparr := []map[string]interface{}{
+					map[string]interface{}{
+						"_id": id,
+					},
+					map[string]interface{}{"$set": tmp},
+				}
+				e.ResultArr = append(e.ResultArr, tmparr)
+			}
+		} else { //测试结果
 			delete(tmp, "_id")
-			tmparr := []map[string]interface{}{
-				map[string]interface{}{
-					"_id": id,
-				},
-				map[string]interface{}{"$set": tmp},
+			if len(j.BlockPackage) > 0 { //分包详情
+				bs, _ := json.Marshal(j.BlockPackage)
+				tmp["epackage"] = string(bs)
+			}
+			tmp["result"] = result
+			b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
+			if !b {
+				log.Println(e.TaskInfo.TestColl, _id)
 			}
-			e.ResultArr = append(e.ResultArr, tmparr)
-		}
-	} else { //测试结果
-		delete(tmp, "_id")
-		if len(j.BlockPackage) > 0 { //分包详情
-			bs, _ := json.Marshal(j.BlockPackage)
-			tmp["epackage"] = string(bs)
-		}
-		tmp["result"] = result
-		b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
-		if !b {
-			log.Println(e.TaskInfo.TestColl, _id)
 		}
-	}
 	}, func(err interface{}) {
 		log.Println("AnalysisSaveResult err", err)
 	})

+ 0 - 23
src/jy/pretreated/analytable.go

@@ -784,7 +784,6 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 			table.TdContactFormat(contactFormat)
 			//开始查找kv,核心模块
 			table.FindKV()
-			qutil.Debug(table.SortKV.Map)
 			//table中抽取品牌
 			if u.IsBrandGoods {
 				table.analyBrand1()
@@ -997,20 +996,16 @@ func (table *Table) FindTag() {
 //计算r/c_start_end的概率
 func (table *Table) GetKeyRation() {
 	for _, vn := range table.StartAndEndRationKSort.Keys {
-		qutil.Debug("vn:", vn)
 		v := table.StartAndEndRation[vn]
 		for _, v1 := range v.Poss {
 			count := 0
 			n := 0
-			qutil.Debug("len:", len(v.Tdmap[v1]))
 			for _, td := range v.Tdmap[v1] {
 				n++
 				if td.BH {
-					qutil.Debug("val:", td.Val)
 					count++
 				}
 			}
-			qutil.Debug(float32(count), float32(n), float32(count)/float32(n))
 			v.Rationmap[v1] = float32(count) / float32(n)
 		}
 	}
@@ -1026,15 +1021,11 @@ func (table *Table) ComputeRowColIsKeyRation() {
 		checkCompute := map[string]bool{}
 		for k, tr := range table.TRs {
 			rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow)
-			qutil.Debug("rk", rk)
 			if k == 0 { //第1行的概率
 				ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol)
-				qutil.Debug("ck", ck)
 				//u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck])
 				ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
 				ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0])
-				qutil.Debug("ration1:", ration1, "ration2:", ration2)
-				qutil.Debug(len(tr.TDs) == 2 && ration2 < 0.55, len(tr.TDs) == 2 && ration1 > 0.5)
 				if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key
 					bkeyfirstrow = true
 					ball := true
@@ -1071,7 +1062,6 @@ func (table *Table) ComputeRowColIsKeyRation() {
 						}
 					}
 				}
-				qutil.Debug("bkeyfirstrow:", bkeyfirstrow, "bkeyfirstcol:", bkeyfirstcol)
 				if !bkeyfirstrow && !bkeyfirstcol {
 					if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 {
 						bkeyfirstrow = true
@@ -1102,7 +1092,6 @@ func (table *Table) ComputeRowColIsKeyRation() {
 					}
 				}
 			} else {
-				qutil.Debug("bkeyfirstrow", bkeyfirstrow)
 				if bkeyfirstrow {
 					//第一列的概率
 					ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
@@ -1117,7 +1106,6 @@ func (table *Table) ComputeRowColIsKeyRation() {
 					} //else {for _, td := range tr.TDs {}}
 				} else {
 					//列在起作用
-					qutil.Debug("bkeyfirstcol", bkeyfirstcol)
 					if bkeyfirstcol {
 						for _, td := range tr.TDs {
 							ck := fmtkey("c", td.StartCol, td.EndCol)
@@ -1159,21 +1147,17 @@ func (table *Table) ComputeRowColIsKeyRation() {
 	if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) {
 		//断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整
 		for _, k := range table.StartAndEndRationKSort.Keys {
-			qutil.Debug("k:", k)
 			v := table.StartAndEndRation[k]
 			//横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题)
 			k1 := k[:1]
 			for _, v2 := range v.Poss {
 				lentds := len(v.Tdmap[v2])
-				qutil.Debug(v2.Max, v2.Min, "len", lentds)
 				if v.Rationmap[v2] > checkval {
 					for _, td := range v.Tdmap[v2] {
-						qutil.Debug("td:", td.Val)
 						if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) {
 							if k1 == "r" {
 								ck := fmtkey("c", td.StartCol, td.EndCol)
 								rt := table.StartAndEndRation[ck]
-								qutil.Debug("ck:", ck, "rt:", rt)
 								//clen := 0
 								var fv float32
 								var tdn []*TD
@@ -1182,7 +1166,6 @@ func (table *Table) ComputeRowColIsKeyRation() {
 									//clen = len(tdn)
 								}
 								if lentds > 1 {
-									qutil.Debug((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil)
 									if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
 										td.KeyDirect = 1
 										td.KVDirect = 2
@@ -1192,7 +1175,6 @@ func (table *Table) ComputeRowColIsKeyRation() {
 							} else {
 								ck := fmtkey("r", td.StartRow, td.EndRow)
 								rt := table.StartAndEndRation[ck]
-								qutil.Debug("ck:", ck, "rt:", rt)
 								var fv float32
 								var tdn []*TD
 								//clen := 0
@@ -1201,7 +1183,6 @@ func (table *Table) ComputeRowColIsKeyRation() {
 									//clen = len(tdn)
 								}
 								if lentds > 1 {
-									qutil.Debug(tdn != nil, v.Rationmap[v2] > fv, tdn == nil)
 									if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
 										td.KeyDirect = 2
 										td.KVDirect = 1
@@ -1209,13 +1190,11 @@ func (table *Table) ComputeRowColIsKeyRation() {
 									}
 								}
 							}
-							qutil.Debug(td.Val, td.BH, td.KeyDirect, td.KVDirect)
 						} else {
 							break
 						}
 					}
 				} else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 {
-					qutil.Debug("================================")
 					for _, td := range v.Tdmap[v2] {
 						//						u.Debug(td.Val, "-----", td.BH)
 						if td.KeyDirect == 0 && td.BH && !td.MustBH {
@@ -3116,11 +3095,9 @@ func (table *Table) analyBrand() {
 						if nameM == "brandname" || nameM == "modal" { //特殊处理brandname
 							if len(endStrMap["brandname"]) == 0 {
 								brand, allNull := hasBrand(table, v1)
-								qutil.Debug("----", nameM, brand, v1)
 								if !allNull {
 									endStrMap["brandname"] = brand[0]
 								}
-								qutil.Debug("endStrMap----", endStrMap)
 							}
 						}
 						if nameM != "brandname" {