Forráskód Böngészése

Merge remote-tracking branch 'origin/dev3.1.2' into dev3.1.2

fengweiqiang 6 éve
szülő
commit
f6502abd08

+ 4 - 4
src/config.json

@@ -8,10 +8,10 @@
     "elasticPoolSize": 30,
 	"mergetable":"projectset",
 	"mergetablealias":"projectset_v1",
-    "saveresult": true,
-    "fieldscore": true,
-    "qualityaudit": true,
-	"iscltlog":true,
+    "saveresult": false,
+    "fieldscore": false,
+    "qualityaudit": false,
+	"iscltlog":false,
 	"brandgoods":true,
     "udptaskid": "5be107e600746bf92debf080",
     "udpip": "127.0.0.1",

+ 10 - 10
src/jy/admin/audit/rulemanager.go

@@ -77,8 +77,8 @@ func init() {
 func GetRecogField(c *gin.Context) {
 	data, _ := Mgo.Find("rc_field", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
 	for _, d := range *data {
-		timeStr := time.Unix(d["l_lasttime"].(int64), 0).Format(Date_Short_Layout)
-		d["l_lasttime"] = timeStr
+		timeStr := time.Unix(d["l_createtime"].(int64), 0).Format(Date_Short_Layout)
+		d["l_createtime"] = timeStr
 	}
 	c.JSON(200, gin.H{"data": data})
 }
@@ -97,7 +97,7 @@ func SaveRecogField(c *gin.Context) {
 		if len(*d) > 0 {
 			c.JSON(200, gin.H{"msg": "已存在!"})
 		} else {
-			data["l_lasttime"] = time.Now().Unix()
+			data["l_createtime"] = time.Now().Unix()
 			//data["l_date"] = time.Now().Unix()
 			data["s_user"] = session.Get("username")
 			data["delete"] = false
@@ -125,8 +125,8 @@ func GetClass(c *gin.Context) {
 	s_fid, _ := c.GetPostForm("fid")
 	data, _ := Mgo.Find("rc_class", `{"s_fid":"`+s_fid+`","delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
 	for _, d := range *data {
-		timeStr := time.Unix(d["l_lasttime"].(int64), 0).Format(Date_Short_Layout)
-		d["l_lasttime"] = timeStr
+		timeStr := time.Unix(d["l_createtime"].(int64), 0).Format(Date_Short_Layout)
+		d["l_createtime"] = timeStr
 	}
 	c.JSON(200, gin.H{"data": data})
 }
@@ -146,7 +146,7 @@ func SaveClass(c *gin.Context) {
 		if len(*class) > 0 {
 			c.JSON(200, gin.H{"rep": false})
 		} else {
-			data["l_lasttime"] = time.Now().Unix()
+			data["l_createtime"] = time.Now().Unix()
 			//data["l_date"] = time.Now().Unix()
 			data["s_user"] = session.Get("username")
 			data["i_order"] = GetOrder("class")
@@ -205,8 +205,8 @@ func GetRule(c *gin.Context) {
 	cid, _ := c.GetPostForm("cid")
 	data, _ := Mgo.Find("rc_rule", `{"s_classid":"`+cid+`","delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
 	for _, d := range *data {
-		timeStr := time.Unix(d["l_lasttime"].(int64), 0).Format(Date_Short_Layout)
-		d["l_lasttime"] = timeStr
+		timeStr := time.Unix(d["l_createtime"].(int64), 0).Format(Date_Short_Layout)
+		d["l_createtime"] = timeStr
 	}
 	c.JSON(200, gin.H{"data": data})
 }
@@ -245,8 +245,8 @@ func SaveRule(c *gin.Context) {
 		if len(*d) > 0 {
 			c.JSON(200, gin.H{"msg": "已存在!"})
 		} else {
-			data["l_lasttime"] = time.Now().Unix()
-			data["l_date"] = time.Now().Unix()
+			data["l_createtime"] = time.Now().Unix()
+			//data["l_date"] = time.Now().Unix()
 			data["s_user"] = session.Get("username")
 			data["i_order"] = GetOrder("rule")
 			data["delete"] = false

+ 14 - 12
src/jy/cluster/distributed.go

@@ -35,6 +35,7 @@ func IdsRange(table, sdate, edate string) int {
 					ids[fmt.Sprint(k)][1],
 					ids[fmt.Sprint(k)][2],
 					qu.ObjToString(v["InstanceId"]),
+					ids[fmt.Sprint(k)][3],
 				},
 			},
 		})
@@ -82,44 +83,45 @@ func RangeIdsByDate(escnum int, start, end time.Time) map[string][]string {
 	total_back := DB.Count("bidding_back", bson.M{"_id": bson.M{"$gte": bson.NewObjectIdWithTime(start), "$lt": bson.NewObjectIdWithTime(end)}})
 	total += total_back
 	pagesize := (total + escnum - 1) / escnum
-	log.Printf("total:%d total_back:%d pagesize:%d escnum:%d", total, total_back, pagesize, escnum)
+	log.Printf("total:%d pagesize:%d escnum:%d", total, pagesize, escnum)
 	nums := 0
+	table := "bidding_back"
 	for i := 0; i < escnum; i++ {
 		log.Println("escnum", i)
 		sid := bson.NewObjectIdWithTime(start)
 		var eid bson.ObjectId
 		var idsnum = 0
-		table := "bidding_back"
 		for {
 			tmpsid := bson.NewObjectIdWithTime(start)
-			end := start.Add(4 * time.Hour)
-			if end.Unix() > end.Unix() {
+			endi := start.Add(4 * time.Hour)
+			if endi.Unix() > end.Unix() {
 				eid = bson.NewObjectIdWithTime(end)
 			} else {
-				eid = bson.NewObjectIdWithTime(end)
+				eid = bson.NewObjectIdWithTime(endi)
 			}
-			start = end
+			start = endi
 			query := bson.M{"_id": bson.M{"$gte": tmpsid, "$lt": eid}}
 			count := DB.Count(table, query)
-			log.Println(count, table, query)
+			//log.Println(count, table, query)
 			if count < 1 { //校验是否切换table
-				tmpnum := DB.Count(table, bson.M{"_id": bson.M{"$gte": tmpsid, "$lt": bson.NewObjectIdWithTime(end.Add(24 * 10 * time.Hour) /*连续10天无数据*/)}})
-				if tmpnum < 1 && table != "bidding" {
+				tmpnum := DB.Count(table, bson.M{"_id": bson.M{"$gte": tmpsid, "$lt": bson.NewObjectIdWithTime(endi.Add(30 * 24 * time.Hour))}})
+				if tmpnum < 1 && table == "bidding_back" {
 					table = "bidding"
 					start = start.Add(-4 * time.Hour)
+					log.Println("切换table,bidding", start)
 					continue
 				}
 			} else {
 				idsnum += count
 			}
-			log.Printf("i:%d count:%d,date:%s", i, idsnum, end.Format(qu.Date_Full_Layout))
+			//log.Printf("i:%d count:%d,date:%s", i, idsnum, end.Format(qu.Date_Full_Layout))
 			if idsnum >= pagesize || start.Unix() > time.Now().Unix() || count > 5000000 { //测试数据count > 5000000
 				break
 			}
 		}
 		nums += idsnum
-		ids[fmt.Sprint(i)] = []string{qu.BsonIdToSId(sid), qu.BsonIdToSId(eid), fmt.Sprint(idsnum)}
-		log.Println("nums", nums)
+		ids[fmt.Sprint(i)] = []string{qu.BsonIdToSId(sid), qu.BsonIdToSId(eid), fmt.Sprint(idsnum), table}
+		log.Println("nums", nums, table)
 	}
 	return ids
 }

+ 111 - 105
src/jy/extract/extpackage.go

@@ -4,138 +4,144 @@ package extract
 import (
 	"jy/clear"
 	ju "jy/util"
+	"log"
 	qu "qfw/util"
 	"reflect"
 )
 
 //处理分包信息
 func PackageDetail(j *ju.Job, e *ExtractTask) {
-	if len(j.BlockPackage) > 0 {
-		packageResult := map[string]map[string]interface{}{}
-		packagenum := len(j.BlockPackage)
-		for pkName, pkg := range j.BlockPackage {
-			//是否清理标记
-			clearmap := map[string]bool{}
-			sonJobResult := map[string]interface{}{}
-			sonJobResult["text"] = pkg.Text
-			sonJobResult["origin"] = pkg.Origin
-			sonJobResult["type"] = pkg.Type
-			sonJobResult["winnerorder"] = pkg.WinnerOrder
-			for k, tags := range e.Tag {
-			L:
-				for _, tag := range tags {
-					if pkg.TableKV != nil {
-						for key, val := range pkg.TableKV.Kv {
-							if tag.Key == key {
-								clearmap[k] = false
-								var tmpval interface{}
-								if len(e.ClearFn[k]) > 0 {
-									data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
-									tmpval = data[0]
-								} else {
-									tmpval = val
-								}
-								sonJobResult[k] = tmpval
-								if packagenum == 1 {
-									field := &ju.ExtField{
-										Field:     k,
-										Code:      "package",
-										RuleText:  "package",
-										Type:      "table",
-										MatchType: "tag_string",
-										ExtFrom:   "package",
-										Value:     tmpval,
-										Score:     0,
+	qu.Try(func() {
+		if len(j.BlockPackage) > 0 {
+			packageResult := map[string]map[string]interface{}{}
+			packagenum := len(j.BlockPackage)
+			for pkName, pkg := range j.BlockPackage {
+				//是否清理标记
+				clearmap := map[string]bool{}
+				sonJobResult := map[string]interface{}{}
+				sonJobResult["text"] = pkg.Text
+				sonJobResult["origin"] = pkg.Origin
+				sonJobResult["type"] = pkg.Type
+				sonJobResult["winnerorder"] = pkg.WinnerOrder
+				for k, tags := range e.Tag {
+				L:
+					for _, tag := range tags {
+						if pkg.TableKV != nil {
+							for key, val := range pkg.TableKV.Kv {
+								if tag.Key == key {
+									clearmap[k] = false
+									var tmpval interface{}
+									if len(e.ClearFn[k]) > 0 {
+										data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
+										tmpval = data[0]
+									} else {
+										tmpval = val
+									}
+									sonJobResult[k] = tmpval
+									if packagenum == 1 {
+										field := &ju.ExtField{
+											Field:     k,
+											Code:      "package",
+											RuleText:  "package",
+											Type:      "table",
+											MatchType: "tag_string",
+											ExtFrom:   "package",
+											Value:     tmpval,
+											Score:     0,
+										}
+										j.Result[k] = append(j.Result[k], field)
 									}
-									j.Result[k] = append(j.Result[k], field)
+									break L
 								}
-								break L
 							}
 						}
-					}
-					if pkg.ColonKV != nil {
-						for key, val := range pkg.ColonKV.Kv {
-							if tag.Key == key {
-								clearmap[k] = true
-								var tmpval interface{}
-								if len(e.ClearFn[k]) > 0 {
-									data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
-									tmpval = data[0]
-								} else {
-									tmpval = val
-								}
-								sonJobResult[k] = tmpval
-								if packagenum == 1 {
-									field := &ju.ExtField{
-										Field:     k,
-										Code:      "package",
-										RuleText:  "package",
-										Type:      "colon",
-										MatchType: "tag_string",
-										ExtFrom:   "package",
-										Value:     tmpval,
-										Score:     0,
+						if pkg.ColonKV != nil {
+							for key, val := range pkg.ColonKV.Kv {
+								if tag.Key == key {
+									clearmap[k] = true
+									var tmpval interface{}
+									if len(e.ClearFn[k]) > 0 {
+										data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
+										tmpval = data[0]
+									} else {
+										tmpval = val
 									}
-									j.Result[k] = append(j.Result[k], field)
+									sonJobResult[k] = tmpval
+									if packagenum == 1 {
+										field := &ju.ExtField{
+											Field:     k,
+											Code:      "package",
+											RuleText:  "package",
+											Type:      "colon",
+											MatchType: "tag_string",
+											ExtFrom:   "package",
+											Value:     tmpval,
+											Score:     0,
+										}
+										j.Result[k] = append(j.Result[k], field)
+									}
+									break L
 								}
-								break L
 							}
 						}
-					}
-					if pkg.SpaceKV != nil {
-						for key, val := range pkg.SpaceKV.Kv {
-							if tag.Key == key {
-								clearmap[k] = true
-								var tmpval interface{}
-								if len(e.ClearFn[k]) > 0 {
-									data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
-									tmpval = data[0]
-								} else {
-									tmpval = val
-								}
-								sonJobResult[k] = tmpval
-								if packagenum == 1 {
-									field := &ju.ExtField{
-										Field:     k,
-										Code:      "package",
-										RuleText:  "package",
-										Type:      "space",
-										MatchType: "tag_string",
-										ExtFrom:   "package",
-										Value:     tmpval,
-										Score:     0,
+						if pkg.SpaceKV != nil {
+							for key, val := range pkg.SpaceKV.Kv {
+								if tag.Key == key {
+									clearmap[k] = true
+									var tmpval interface{}
+									if len(e.ClearFn[k]) > 0 {
+										data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
+										tmpval = data[0]
+									} else {
+										tmpval = val
+									}
+									sonJobResult[k] = tmpval
+									if packagenum == 1 {
+										field := &ju.ExtField{
+											Field:     k,
+											Code:      "package",
+											RuleText:  "package",
+											Type:      "space",
+											MatchType: "tag_string",
+											ExtFrom:   "package",
+											Value:     tmpval,
+											Score:     0,
+										}
+										j.Result[k] = append(j.Result[k], field)
 									}
-									j.Result[k] = append(j.Result[k], field)
+									break L
 								}
-								break L
 							}
 						}
 					}
 				}
-			}
-			//如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
-			if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
-				firstWinnerOrder := pkg.WinnerOrder[0]
-				if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
-					sonJobResult["winner"] = firstWinnerOrder["entname"]
-				}
-				if qu.Float64All(sonJobResult["bidamount"]) == 0 || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
-					sonJobResult["bidamount"] = firstWinnerOrder["price"]
+				//如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
+				if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
+					firstWinnerOrder := pkg.WinnerOrder[0]
+					if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
+						sonJobResult["winner"] = firstWinnerOrder["entname"]
+					}
+					if qu.Float64All(sonJobResult["bidamount"]) == 0 || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
+						sonJobResult["bidamount"] = firstWinnerOrder["price"]
+					}
 				}
+				//log.Println(pkName, sonJobResult)
+				sonJobResult["clear"] = clearmap
+				packageResult[pkName] = sonJobResult
+			}
+			if len(packageResult) > 0 {
+				j.PackageInfo = packageResult
 			}
-			//log.Println(pkName, sonJobResult)
-			sonJobResult["clear"] = clearmap
-			packageResult[pkName] = sonJobResult
-		}
-		if len(packageResult) > 0 {
-			j.PackageInfo = packageResult
 		}
-	}
-	extRegBackPack(j, e)
+		extRegBackPack(j, e)
+	}, func(err interface{}) {
+		log.Println("PackageDetail err", err)
+	})
 }
 
 //清理分包信息
 func extRegBackPack(j *ju.Job, e *ExtractTask) {
+	defer qu.Catch()
 	//正则清理
 	for _, rc := range e.RuleCores {
 		for pk, pack := range j.PackageInfo {

+ 78 - 62
src/jy/extract/extract.go

@@ -89,6 +89,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 
 //启动抽取
 func StartExtractTaskId(taskId string) bool {
+	defer qu.Catch()
 	isgo := false
 	ext := TaskList[taskId]
 	if ext == nil {
@@ -133,6 +134,7 @@ func StartExtractTaskId(taskId string) bool {
 
 //停止抽取
 func StopExtractTaskId(taskId string) bool {
+	defer qu.Catch()
 	ext := TaskList[taskId]
 	if ext != nil {
 		ext.IsRun = false
@@ -145,6 +147,7 @@ func StopExtractTaskId(taskId string) bool {
 
 //开始抽取
 func RunExtractTask(taskId string) {
+	defer qu.Catch()
 	ext := TaskList[taskId]
 	query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}}
 	count := ext.TaskInfo.FDB.Count(ext.TaskInfo.FromColl, query)
@@ -182,6 +185,7 @@ func RunExtractTask(taskId string) {
 
 //信息预处理
 func PreInfo(doc map[string]interface{}) *ju.Job {
+	defer qu.Catch()
 	detail := ""
 	d1, _ := doc["detail"].(string)
 	d2, _ := doc["contenthtml"].(string)
@@ -312,15 +316,16 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 		//		log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
 		//分析抽取结果并保存 todo
 		AnalysisSaveResult(j, e)
+		<-e.TaskInfo.ProcessPool
 	}, func(err interface{}) {
-		log.Println((*j.Data)["_id"], err)
+		log.Println("ExtractProcess err", err, (*j.Data)["_id"])
 		<-e.TaskInfo.ProcessPool
 	})
-	<-e.TaskInfo.ProcessPool
 }
 
 //前置过滤
 func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInfo) map[string]interface{} {
+	defer qu.Catch()
 	before := ju.DeepCopy(doc).(map[string]interface{})
 	extinfo := map[string]interface{}{}
 	if in.IsLua {
@@ -345,6 +350,7 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 
 //抽取-规则
 func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
+	defer qu.Catch()
 	//废标、流标、ppp等跳过
 	b := IsExtract(in.Field, j.Title, j.Content)
 	if !b {
@@ -385,6 +391,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
 
 //lua脚本根据属性设置提取kv值
 func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} {
+	defer qu.Catch()
 	kvmap := map[string][]map[string]interface{}{}
 	for fieldname, field := range in.LFields {
 		lock.Lock()
@@ -547,6 +554,7 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
 
 //正则提取结果
 func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[string][]map[string]interface{} {
+	defer qu.Catch()
 	extinfo := map[string][]map[string]interface{}{}
 	if v.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
 		apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1)
@@ -614,6 +622,7 @@ func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[stri
 
 //后置过滤
 func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
+	defer qu.Catch()
 	if in.IsLua {
 		result := GetResultMapForLua(j)
 		lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
@@ -695,6 +704,7 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 
 //获取抽取结果map[string][]interface{},lua脚本使用
 func GetResultMapForLua(j *ju.Job) map[string][]map[string]interface{} {
+	defer qu.Catch()
 	result := map[string][]map[string]interface{}{}
 	for key, val := range j.Result {
 		if result[key] == nil {
@@ -718,6 +728,7 @@ func GetResultMapForLua(j *ju.Job) map[string][]map[string]interface{} {
 
 //抽取日志
 func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *RegLuaInfo, t *TaskInfo) {
+	defer qu.Catch()
 	if !t.IsEtxLog {
 		return
 	}
@@ -742,6 +753,7 @@ func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *Re
 
 //保存抽取日志
 func SaveExtLog() {
+	defer qu.Catch()
 	tmpLogs := map[*TaskInfo][]map[string]interface{}{}
 	lock.Lock()
 	tmpLogs = ExtLogs
@@ -773,77 +785,78 @@ type FieldValue struct {
 
 //分析抽取结果并保存
 func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
-	doc := j.Data
-	result := j.Result
-	_id := qu.BsonIdToSId((*doc)["_id"])
-	iscore, _ := ju.Config["fieldscore"].(bool)
-	if iscore { //打分
-		result = ScoreFields(j)
-	}
-	//结果排序
-	values := map[string][]*ju.SortObject{}
-	for key, val := range result {
-		fieldValue := map[string][]interface{}{}
-		if iscore { //走打分
-			for _, v := range val {
-				if len(fmt.Sprint(v.Value)) < 1 {
-					continue //去除空串
+	qu.Try(func() {
+		doc := j.Data
+		result := j.Result
+		_id := qu.BsonIdToSId((*doc)["_id"])
+		iscore, _ := ju.Config["fieldscore"].(bool)
+		if iscore { //打分
+			result = ScoreFields(j)
+		}
+		//结果排序
+		values := map[string][]*ju.SortObject{}
+		for key, val := range result {
+			fieldValue := map[string][]interface{}{}
+			if iscore { //走打分
+				for _, v := range val {
+					if len(fmt.Sprint(v.Value)) < 1 {
+						continue //去除空串
+					}
+					fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
+				}
+			} else { //不走打分,按出现频次
+				for _, v := range val {
+					if len(fmt.Sprint(v.Value)) < 1 {
+						continue //去除空串
+					}
+					if fieldValue[fmt.Sprint(v.Value)] == nil {
+						fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
+					} else {
+						fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
+					}
 				}
-				fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
 			}
-		} else { //不走打分,按出现频次
-			for _, v := range val {
-				if len(fmt.Sprint(v.Value)) < 1 {
-					continue //去除空串
+			objects := []*ju.SortObject{}
+			for k, v := range fieldValue {
+				ValueStr := "" //第二排序
+				if reflect.TypeOf(v[1]).String() == "string" {
+					ValueStr = qu.ObjToString(v[1])
 				}
-				if fieldValue[fmt.Sprint(v.Value)] == nil {
-					fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
-				} else {
-					fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
+				tmp := &ju.SortObject{
+					Key:      k,
+					Value:    qu.IntAll(v[0]),
+					Object:   v[1],
+					ValueStr: ValueStr,
 				}
+				objects = append(objects, tmp)
 			}
+			values[key] = ju.ExtSort(objects)
 		}
-		objects := []*ju.SortObject{}
-		for k, v := range fieldValue {
-			ValueStr := "" //第二排序
-			if reflect.TypeOf(v[1]).String() == "string" {
-				ValueStr = qu.ObjToString(v[1])
-			}
-			tmp := &ju.SortObject{
-				Key:      k,
-				Value:    qu.IntAll(v[0]),
-				Object:   v[1],
-				ValueStr: ValueStr,
+		//从排序结果中取值
+		tmp := map[string]interface{}{} //抽取值
+		for key, val := range values {
+			for _, v := range val { //取第一个非负数
+				if v.Key != "" && v.Value > -1 {
+					tmp[key] = v.Object
+					break
+				}
 			}
-			objects = append(objects, tmp)
 		}
-		values[key] = ju.ExtSort(objects)
-	}
-	//从排序结果中取值
-	tmp := map[string]interface{}{} //抽取值
-	for key, val := range values {
-		for _, v := range val { //取第一个非负数
-			if v.Key != "" && v.Value > -1 {
-				tmp[key] = v.Object
-				break
-			}
+		if len(j.PackageInfo) > 0 { //分包信息
+			tmp["package"] = j.PackageInfo
 		}
-	}
-	if len(j.PackageInfo) > 0 { //分包信息
-		tmp["package"] = j.PackageInfo
-	}
-	if len(j.Winnerorder) > 0 { //候选人信息
-		tmp["winnerorder"] = j.Winnerorder
-	}
-	for k, v := range *doc {
-		//去重冗余字段
-		if k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" {
-			continue
+		if len(j.Winnerorder) > 0 { //候选人信息
+			tmp["winnerorder"] = j.Winnerorder
 		}
-		if tmp[k] == nil {
-			tmp[k] = v
+		for k, v := range *doc {
+			//去重冗余字段
+			if k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" {
+				continue
+			}
+			if tmp[k] == nil {
+				tmp[k] = v
+			}
 		}
-	}
 
 	//质量审核
 	if ju.Config["qualityaudit"].(bool) {
@@ -905,6 +918,9 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 			log.Println(e.TaskInfo.TestColl, _id)
 		}
 	}
+	}, func(err interface{}) {
+		log.Println("AnalysisSaveResult err", err)
+	})
 }
 
 func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {

+ 1 - 0
src/jy/extract/extractudp.go

@@ -84,6 +84,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
 //根据id区间抽取
 func ExtractByUdp(sid, eid string, instanceId ...string) {
+	defer qu.Catch()
 	ext := &ExtractTask{}
 	ext.Id = qu.ObjToString(ju.Config["udptaskid"])
 	ext.InitTaskInfo()

+ 1 - 0
src/jy/extract/isextract.go

@@ -22,6 +22,7 @@ func init() {
 }
 
 func IsExtract(filed, title, content string) bool {
+	defer qu.Catch()
 	b := true
 	if N_extract[filed] != nil {
 		nregs := N_extract[filed]

+ 97 - 93
src/jy/extract/score.go

@@ -35,113 +35,117 @@ func init() {
 //结果打分
 func ScoreFields(j *ju.Job) map[string][]*ju.ExtField {
 	result := j.Result
-	//打分
-	for field, tmps := range result {
-		scoreRule := SoreConfig[field]
-		if scoreRule == nil {
-			continue
-		}
-		extractype := SoreConfig["extractype"]
-		fieldtype := scoreRule["type"]
-		for _, v := range tmps {
-			if len(fmt.Sprint(v.Value)) < 1 {
-				continue //空串跳过
+	qu.Try(func() {
+		//打分
+		for field, tmps := range result {
+			scoreRule := SoreConfig[field]
+			if scoreRule == nil {
+				continue
 			}
-			//长度超过100个字,直接负分
-			vlen := len([]rune(qu.ObjToString(v.Value)))
-			if vlen > 100 && field != "projectscope" {
-				v.Score = -1
-			} else {
-				//类型打分
-				if v.ExtFrom == "title" {
-					v.Score += qu.IntAll(extractype["title"])
+			extractype := SoreConfig["extractype"]
+			fieldtype := scoreRule["type"]
+			for _, v := range tmps {
+				if len(fmt.Sprint(v.Value)) < 1 {
+					continue //空串跳过
+				}
+				//长度超过100个字,直接负分
+				vlen := len([]rune(qu.ObjToString(v.Value)))
+				if vlen > 100 && field != "projectscope" {
+					v.Score = -1
 				} else {
-					if strings.Contains(v.Type, "table") {
-						v.Score += qu.IntAll(extractype["table"])
-					} else if strings.Contains(v.Type, "colon") {
-						v.Score += qu.IntAll(extractype["colon"])
-					} else if strings.Contains(v.Type, "space") {
-						v.Score += qu.IntAll(extractype["space"])
-					} else if strings.Contains(v.Type, "regexp") {
-						v.Score += qu.IntAll(extractype["regexp"])
-					} else if strings.Contains(v.Type, "winnerorder") {
-						v.Score += qu.IntAll(extractype["winnerorder"])
+					//类型打分
+					if v.ExtFrom == "title" {
+						v.Score += qu.IntAll(extractype["title"])
+					} else {
+						if strings.Contains(v.Type, "table") {
+							v.Score += qu.IntAll(extractype["table"])
+						} else if strings.Contains(v.Type, "colon") {
+							v.Score += qu.IntAll(extractype["colon"])
+						} else if strings.Contains(v.Type, "space") {
+							v.Score += qu.IntAll(extractype["space"])
+						} else if strings.Contains(v.Type, "regexp") {
+							v.Score += qu.IntAll(extractype["regexp"])
+						} else if strings.Contains(v.Type, "winnerorder") {
+							v.Score += qu.IntAll(extractype["winnerorder"])
+						}
 					}
-				}
-				//字符型打分
-				if fieldtype == "string" {
-					//位置打分
-					if positions, ok := scoreRule["position"].([]interface{}); ok {
-						for _, position := range positions {
-							if p, ok := position.(map[string]interface{}); ok {
-								qu.Try(func() {
-									if p["regexp"] != nil {
-										reg := p["regexp"].(*regexp.Regexp)
-										if reg.MatchString(qu.ObjToString(v.Value)) {
-											v.Score += qu.IntAll(p["score"])
+					//字符型打分
+					if fieldtype == "string" {
+						//位置打分
+						if positions, ok := scoreRule["position"].([]interface{}); ok {
+							for _, position := range positions {
+								if p, ok := position.(map[string]interface{}); ok {
+									qu.Try(func() {
+										if p["regexp"] != nil {
+											reg := p["regexp"].(*regexp.Regexp)
+											if reg.MatchString(qu.ObjToString(v.Value)) {
+												v.Score += qu.IntAll(p["score"])
+											}
 										}
-									}
-								}, func(err interface{}) {
-									log.Println(err)
-								})
+									}, func(err interface{}) {
+										log.Println(err)
+									})
+								}
 							}
 						}
-					}
-					//长度打分
-					if lengths, ok := scoreRule["length"].([]interface{}); ok {
-						for _, tmp := range lengths {
-							if length, ok := tmp.(map[string]interface{}); ok {
-								min := qu.IntAll(length["min"])
-								max := qu.IntAll(length["max"])
-								scores, _ := length["score"].([]interface{})
-								if len(scores) < 3 {
-									continue
-								}
-								if vlen < min {
-									v.Score += qu.IntAll(scores[0])
-								} else if vlen > max {
-									v.Score += qu.IntAll(scores[2])
-								} else {
-									v.Score += qu.IntAll(scores[1])
+						//长度打分
+						if lengths, ok := scoreRule["length"].([]interface{}); ok {
+							for _, tmp := range lengths {
+								if length, ok := tmp.(map[string]interface{}); ok {
+									min := qu.IntAll(length["min"])
+									max := qu.IntAll(length["max"])
+									scores, _ := length["score"].([]interface{})
+									if len(scores) < 3 {
+										continue
+									}
+									if vlen < min {
+										v.Score += qu.IntAll(scores[0])
+									} else if vlen > max {
+										v.Score += qu.IntAll(scores[2])
+									} else {
+										v.Score += qu.IntAll(scores[1])
+									}
 								}
 							}
 						}
 					}
-				}
-				//float类型打分
-				if fieldtype == "float" {
-					min := qu.IntAll(scoreRule["min"])
-					max := qu.IntAll(scoreRule["max"])
-					val := qu.IntAll(v.Value)
-					scores, _ := scoreRule["score"].([]interface{})
-					if len(scores) < 3 {
-						continue
-					}
-					if val < min && 0 < val {
-						v.Score += qu.IntAll(scores[0])
-					} else if val > max {
-						v.Score += qu.IntAll(scores[2])
-					} else if val <= max && val >= min {
-						v.Score += qu.IntAll(scores[1])
-					}
-				}
-				//decimal
-				if fieldtype == "decimal" {
-					min := qu.IntAll(scoreRule["min"])
-					max := qu.IntAll(scoreRule["max"])
-					val := qu.IntAll(v.Value)
-					scores, _ := scoreRule["score"].([]interface{})
-					if len(scores) < 3 {
-						continue
+					//float类型打分
+					if fieldtype == "float" {
+						min := qu.IntAll(scoreRule["min"])
+						max := qu.IntAll(scoreRule["max"])
+						val := qu.IntAll(v.Value)
+						scores, _ := scoreRule["score"].([]interface{})
+						if len(scores) < 3 {
+							continue
+						}
+						if val < min && 0 < val {
+							v.Score += qu.IntAll(scores[0])
+						} else if val > max {
+							v.Score += qu.IntAll(scores[2])
+						} else if val <= max && val >= min {
+							v.Score += qu.IntAll(scores[1])
+						}
 					}
-					if val > max {
-						v.Score += qu.IntAll(scores[2])
-					} else if val <= max && val > min {
-						v.Score += qu.IntAll(scores[1])
+					//decimal
+					if fieldtype == "decimal" {
+						min := qu.IntAll(scoreRule["min"])
+						max := qu.IntAll(scoreRule["max"])
+						val := qu.IntAll(v.Value)
+						scores, _ := scoreRule["score"].([]interface{})
+						if len(scores) < 3 {
+							continue
+						}
+						if val > max {
+							v.Score += qu.IntAll(scores[2])
+						} else if val <= max && val > min {
+							v.Score += qu.IntAll(scores[1])
+						}
 					}
 				}
 			}
 		}
-	}
+	}, func(err interface{}) {
+		log.Println("ScoreFields err", err)
+	})
 	return result
 }

+ 45 - 23
src/jy/pretreated/analytable.go

@@ -21,7 +21,7 @@ var (
 	//清理表格中是key中包含的空格或数字等
 	tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。、_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
 	//清理表格td中的符号
-	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、,。、_??;;~\\-#\\\\附(件|图)]|^*")
+	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、,。、_??;;~\\-#\\\\]*|(详?见)附(件|图)")
 	//判断key是金额,对万元的处理
 	moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
 	//根据表格的内容判断是不是表头,如果含有金额则不是表头
@@ -658,8 +658,6 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 		TR := NewTR(table)
 		tdTextIsNull := true
 		tds.Each(func(m int, selm *goquery.Selection) {
-			//			t, _ := selm.Html()
-			//			fmt.Println("t---------", t)
 			//对隐藏列不处理!!!
 			if IsHide(selm) {
 				return
@@ -667,7 +665,6 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 			//进入每一个单元格
 			td := NewTD(selm, TR, table)
 			//num++
-			//fmt.Println("------", td.SortKV.Keys, td.SortKV.Map)
 			TR.AddTD(td)
 			if td.Val != "" { //删除一个tr,tr中所有td是空值的
 				tdTextIsNull = false
@@ -680,6 +677,11 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 	})
 	//重置行列
 	table.ComputeRowColSpan()
+	//	for n, tr := range table.TRs {
+	//		for m, td := range tr.TDs {
+	//			qutil.Debug(td.BH, n, m, td.Text, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
+	//		}
+	//	}
 
 	tm := []map[string]interface{}{}
 	tmk := map[string]bool{}
@@ -738,7 +740,7 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 			//删除尾部空行
 			for len(table.TRs) > 0 {
 				npos := len(table.TRs)
-				tailTR := table.TRs[npos-1]
+				tailTR := table.TRs[npos-1] //最后一个tr
 				bspace := true
 				for _, v := range tailTR.TDs {
 					if v.Val != "" || v.SonTableResult != nil || len(v.SortKV.Keys) > 0 {
@@ -782,6 +784,7 @@ func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 			table.TdContactFormat(contactFormat)
 			//开始查找kv,核心模块
 			table.FindKV()
+			qutil.Debug(table.SortKV.Map)
 			//table中抽取品牌
 			if u.IsBrandGoods {
 				table.analyBrand1()
@@ -827,16 +830,16 @@ func (table *Table) Adjust() {
 	table.RowNum = len(table.TRs)
 	//		for k1, tr := range table.TRs {
 	//			for k2, td := range tr.TDs {
-	//				u.Debug(k1, k2, td.Val, td.Rowspan, td.Colspan, td.ColPos, tr.RowPos)
+	//				qutil.Debug(k1, k2, td.Val, td.Rowspan, td.Colspan, td.ColPos, tr.RowPos)
 	//			}
 	//		}
 	//计算行列起止位置,跨行跨列处理
 	table.ComputeRowColSpan()
-	//		for k1, tr := range table.TRs {
-	//			for k2, td := range tr.TDs {
-	//				u.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
-	//			}
+	//	for k1, tr := range table.TRs {
+	//		for k2, td := range tr.TDs {
+	//			qutil.Debug(k1, k2, td.Val, td.StartRow, td.EndRow, td.StartCol, td.EndCol)
 	//		}
+	//	}
 	//大概计算每个起止行列的概率
 	table.GetKeyRation()
 	/*
@@ -847,7 +850,7 @@ func (table *Table) Adjust() {
 				for _, td := range v.Tdmap[v1] {
 					str += "__" + td.Val + fmt.Sprintf("%d_%d_%d_%d", td.StartRow, td.EndRow, td.StartCol, td.EndCol)
 				}
-				u.Debug(k, k1, string(bs), v.Rationmap[v1], str)
+				qutil.Debug(k, k1, string(bs), v.Rationmap[v1], str)
 			}
 		}
 	*/
@@ -862,7 +865,6 @@ func (table *Table) Adjust() {
 			}
 		}
 	}
-
 	if float32(count)/float32(table.TDNum) < 0.85 {
 		//精确计算起止行列是表头的概率
 		table.ComputeRowColIsKeyRation()
@@ -871,7 +873,7 @@ func (table *Table) Adjust() {
 		for i, tr := range table.TRs {
 			for _, td := range tr.TDs {
 				if td.BH {
-					//u.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1)
+					//qutil.Debug("----=====---", td.Val, len(table.TRs[len(table.TRs)-1].TDs), i, len(table.TRs)-1)
 					if i == len(table.TRs)-1 && len(table.TRs[len(table.TRs)-1].TDs) == 2 {
 						res, _, _, _, _ := CheckCommon(td.Val, "abandontable")
 						if res {
@@ -896,7 +898,7 @@ func (table *Table) ComputeRowColSpan() {
 	for k, v := range table.TRs {
 		nk := 0 //nk列的起始,k行的起始||如果有合并,起始就不是0
 		ball := true
-		rowspans := v.TDs[0].Rowspan
+		rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
 		for _, v1 := range v.TDs {
 			if v1.Rowspan != rowspans {
 				ball = false
@@ -995,16 +997,20 @@ func (table *Table) FindTag() {
 //计算r/c_start_end的概率
 func (table *Table) GetKeyRation() {
 	for _, vn := range table.StartAndEndRationKSort.Keys {
+		qutil.Debug("vn:", vn)
 		v := table.StartAndEndRation[vn]
 		for _, v1 := range v.Poss {
 			count := 0
 			n := 0
+			qutil.Debug("len:", len(v.Tdmap[v1]))
 			for _, td := range v.Tdmap[v1] {
 				n++
 				if td.BH {
+					qutil.Debug("val:", td.Val)
 					count++
 				}
 			}
+			qutil.Debug(float32(count), float32(n), float32(count)/float32(n))
 			v.Rationmap[v1] = float32(count) / float32(n)
 		}
 	}
@@ -1020,11 +1026,15 @@ func (table *Table) ComputeRowColIsKeyRation() {
 		checkCompute := map[string]bool{}
 		for k, tr := range table.TRs {
 			rk := fmtkey("r", tr.TDs[0].StartRow, tr.TDs[0].EndRow)
+			qutil.Debug("rk", rk)
 			if k == 0 { //第1行的概率
 				ck := fmtkey("c", tr.TDs[0].StartCol, tr.TDs[0].EndCol)
+				qutil.Debug("ck", ck)
 				//u.Debug(table.BFirstRow, "--", table.StartAndEndRation[rk], table.StartAndEndRation[ck])
 				ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
 				ration2, _ := table.StartAndEndRation[ck].GetTDRation(tr.TDs[0])
+				qutil.Debug("ration1:", ration1, "ration2:", ration2)
+				qutil.Debug(len(tr.TDs) == 2 && ration2 < 0.55, len(tr.TDs) == 2 && ration1 > 0.5)
 				if (len(tr.TDs) == 2 && ration2 < 0.55) && (len(tr.TDs) == 2 && ration1 > 0.5) { //第一行为key
 					bkeyfirstrow = true
 					ball := true
@@ -1061,6 +1071,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 						}
 					}
 				}
+				qutil.Debug("bkeyfirstrow:", bkeyfirstrow, "bkeyfirstcol:", bkeyfirstcol)
 				if !bkeyfirstrow && !bkeyfirstcol {
 					if len(tr.TDs) > 1 && ration1 > ration2 && ration1 > 0.5 {
 						bkeyfirstrow = true
@@ -1091,6 +1102,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 					}
 				}
 			} else {
+				qutil.Debug("bkeyfirstrow", bkeyfirstrow)
 				if bkeyfirstrow {
 					//第一列的概率
 					ration1, _ := table.StartAndEndRation[rk].GetTDRation(tr.TDs[0])
@@ -1105,6 +1117,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 					} //else {for _, td := range tr.TDs {}}
 				} else {
 					//列在起作用
+					qutil.Debug("bkeyfirstcol", bkeyfirstcol)
 					if bkeyfirstcol {
 						for _, td := range tr.TDs {
 							ck := fmtkey("c", td.StartCol, td.EndCol)
@@ -1142,20 +1155,25 @@ func (table *Table) ComputeRowColIsKeyRation() {
 			}
 		}
 	}
+	//qutil.Debug("table.Brule", table.Brule, !bkeyfirstcol && !bkeyfirstrow)
 	if !table.Brule || (!bkeyfirstcol && !bkeyfirstrow) {
 		//断行问题,虽然同列或同行,但中间被跨行截断,表格方向调整
 		for _, k := range table.StartAndEndRationKSort.Keys {
+			qutil.Debug("k:", k)
 			v := table.StartAndEndRation[k]
 			//横向判断,要判断最多的方向,否则会出现不定的情况(map遍历问题)
 			k1 := k[:1]
 			for _, v2 := range v.Poss {
 				lentds := len(v.Tdmap[v2])
+				qutil.Debug(v2.Max, v2.Min, "len", lentds)
 				if v.Rationmap[v2] > checkval {
 					for _, td := range v.Tdmap[v2] {
+						qutil.Debug("td:", td.Val)
 						if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) {
 							if k1 == "r" {
 								ck := fmtkey("c", td.StartCol, td.EndCol)
 								rt := table.StartAndEndRation[ck]
+								qutil.Debug("ck:", ck, "rt:", rt)
 								//clen := 0
 								var fv float32
 								var tdn []*TD
@@ -1164,6 +1182,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 									//clen = len(tdn)
 								}
 								if lentds > 1 {
+									qutil.Debug((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil)
 									if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
 										td.KeyDirect = 1
 										td.KVDirect = 2
@@ -1173,6 +1192,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 							} else {
 								ck := fmtkey("r", td.StartRow, td.EndRow)
 								rt := table.StartAndEndRation[ck]
+								qutil.Debug("ck:", ck, "rt:", rt)
 								var fv float32
 								var tdn []*TD
 								//clen := 0
@@ -1181,6 +1201,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 									//clen = len(tdn)
 								}
 								if lentds > 1 {
+									qutil.Debug(tdn != nil, v.Rationmap[v2] > fv, tdn == nil)
 									if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
 										td.KeyDirect = 2
 										td.KVDirect = 1
@@ -1188,12 +1209,13 @@ func (table *Table) ComputeRowColIsKeyRation() {
 									}
 								}
 							}
-
+							qutil.Debug(td.Val, td.BH, td.KeyDirect, td.KVDirect)
 						} else {
 							break
 						}
 					}
 				} else if v.Rationmap[v2] < 0.5 && len(v.Tdmap[v2]) > 3 {
+					qutil.Debug("================================")
 					for _, td := range v.Tdmap[v2] {
 						//						u.Debug(td.Val, "-----", td.BH)
 						if td.KeyDirect == 0 && td.BH && !td.MustBH {
@@ -2755,7 +2777,7 @@ func (table *Table) analyBrand1() {
 			arrcount1 := 0 //记录key是否存在必须title(数组数据)
 			arrcount2 := 0
 			ka := make(map[string][]string) //最终存储数据
-			//qutil.Debug(k, "aMap.Keys----", aMap.Keys)
+			//qutil.Debug("aMap.Keys----", aMap.Keys)
 			for _, k0 := range aMap.Keys {
 				v0 := aMap.Map[k0].([]string)
 				//qutil.Debug("k0:", k0, "v0:", v0)
@@ -3202,13 +3224,13 @@ func assembleData(m interface{}, n int) []map[string]string {
 			datas[i] = data
 		}
 		//end
-		for _, fdv := range datas { //清除空数据和只含特殊符号的数据
-			for fmk, fmv := range fdv {
-				if tabletdclear.ReplaceAllString(fmv, "") == "" {
-					delete(fdv, fmk)
-				}
-			}
-		}
+		//		for _, fdv := range datas { //清除空数据和只含特殊符号的数据
+		//			for fmk, fmv := range fdv {
+		//				if tabletdclear.ReplaceAllString(fmv, "") == "" {
+		//					delete(fdv, fmk)
+		//				}
+		//			}
+		//		}
 	} else { //字符串数据
 		realTypeM := m.(map[string]string)
 		datas = append(datas, realTypeM)

+ 12 - 13
src/jy/pretreated/tablev2.go

@@ -29,10 +29,10 @@ type TableResult struct {
 	SortKV         *SortMap             //全局KVmap值,标准化处理过的
 	SortKVWeight   map[string]int       //全局KVmap值,标准化处理过的
 	WinnerOrder    []map[string]interface{}
-	BrandData      [][]map[string]string
-	HasKey         int //有key
-	HasBrand       int //有品牌
-	HasGoods       int //有商品
+	BrandData      [][]map[string]string //品牌抽取结果
+	HasKey         int                   //有key
+	HasBrand       int                   //有品牌
+	HasGoods       int                   //有商品
 }
 
 //快速创建TableResult对象
@@ -116,6 +116,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 	bsontable := false
 	txt := ""
 	if ht.Size() > 0 {
+		//qutil.Debug("有子表格")
 		txt = TextAfterRemoveTable(td.Html)
 		ts := td.TR.Table.TableResult
 		tabs, _ := ComputeConRatio(td.Html, 2)
@@ -194,9 +195,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 						}
 					}
 				}
-
 				//u.Debug(fmt.Sprintf("%v", td.TR.Table.BlockPackage.Map["1"]))
-
 			}
 		}
 	} else {
@@ -471,7 +470,7 @@ type Table struct {
 	Brule                  bool //是否规则
 	TRs                    []*TR
 	BFirstRow              bool
-	RowNum                 int                       //
+	RowNum                 int                       //行
 	ColNum                 int                       //列数
 	TDNum                  int                       //td个数
 	BPackage               bool                      //是否有包
@@ -491,12 +490,12 @@ type Table struct {
 	StartAndEndRation      map[string]*TDRationScope //同行或同列的概率,截断的单独起算
 	StartAndEndRationKSort *SortMap
 	WinnerOrder            []map[string]interface{}
-	BSplit                 bool //是否是有一个表拆分成的多个表
-	BHeader                bool //拆分表是否有表头
-	BrandData              [][]map[string]string
-	HasKey                 int //有key
-	HasBrand               int //有品牌
-	HasGoods               int //有商品
+	BSplit                 bool                  //是否是有一个表拆分成的多个表
+	BHeader                bool                  //拆分表是否有表头
+	BrandData              [][]map[string]string //品牌抽取结果
+	HasKey                 int                   //有key
+	HasBrand               int                   //有品牌
+	HasGoods               int                   //有商品
 }
 
 func NewTable(Html string, TableResult *TableResult, tab *goquery.Selection) *Table {

+ 3 - 3
src/main_test.go

@@ -14,7 +14,7 @@ import (
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")
-	extract.StartExtractTestTask("5c528686698414055c47b115", "5c2a439aa5cb26b9b76405de", "1", "mxs_v2", "mxs_v2")
+	extract.StartExtractTestTask("5c528686698414055c47b115", "5a524c3d40d2d9bbe8e9cef0", "1", "mxs_v2", "mxs_v2")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }
@@ -66,12 +66,12 @@ func Test_reg3(t *testing.T) {
 	text := []rune("(法撒旦法士大夫发的发)生(的]发的法旦法士大夫三发的)")
 	for i := 1; i <= 2; i++ {
 		if len(text) > 0 {
-			text = aa12(i, text)
+			text = gl(i, text)
 		}
 	}
 	log.Println("finish--", string(text))
 }
-func aa12(i int, text []rune) []rune {
+func gl(i int, text []rune) []rune {
 	pairedIndex := make(map[int]int)
 	surplusMax := -1  //记录多余的反符号最大值
 	positiveMax := -1 //记录多余的正符号最大值

+ 1 - 1
src/res/brandrule.json

@@ -1,6 +1,6 @@
 {
 	"must":{
-		"itemname":"((^(货物|品目|产品|商品|物资|印刷品|物料|材料|采购项目|设备|成交标(的)?)(名称|种类|内容|服务)+|服务产品|(采购|机械)(目录|设备)|^(品名|品目)$)和?)+",
+		"itemname":"((^(货物|品目|产品|标项|商品|物资|印刷品|物料|材料|设备|成交标(的)?)(名称|种类|内容|服务)+|服务产品|(采购|机械)(目录|设备)|^(品名|品目)$)和?)+",
 		"brandname":"^(品牌(名称)?|厂家)",
 		"modal":"^(规格)?(型号|参数)|规格$|技术规格", 
 		"unitprice":"单价|^价格|(预算|采购预算)(金额)?$|(单个商品|包件)最高限价|(中标成交|单次服务|控制)+金额|^金额$"

+ 1 - 1
src/web/templates/admin/audit_classlist.html

@@ -63,7 +63,7 @@ $(function () {
         },
 		"columns": [
             { "data": "s_name"},
-			{ "data": "l_lasttime"},
+			{ "data": "l_createtime"},
 			{ "data": "s_user"},
 			{ "data": "_id",render:function(val,a,row){
 				return '<a class="btn btn-sm btn-info opr" opr="edit">编辑</a>'+

+ 1 - 1
src/web/templates/admin/audit_recogfield.html

@@ -62,7 +62,7 @@ $(function () {
 		"columns": [
             { "data": "s_name"},
 			{ "data": "s_recogfield"},
-			{ "data": "l_lasttime"},
+			{ "data": "l_createtime"},
 			{ "data": "s_user"},
 			{ "data": "_id",render:function(val,a,row){
 				return '<a class="btn btn-sm btn-info opr" opr="edit">编辑</a>'+

+ 1 - 1
src/web/templates/admin/audit_rulelist.html

@@ -65,7 +65,7 @@ $(function () {
         },
 		"columns": [
             { "data": "s_name"},
-			{ "data": "l_lasttime"},
+			{ "data": "l_createtime"},
 			{ "data": "s_user"},
 			{ "data": "_id",render:function(val,a,row,meta){
 				var udhtml = '&nbsp;&nbsp;<a class="btn btn-sm btn-success opr" opr="moveup" num="'+meta.row+'">上移</a>'+