zhangjinkun 6 سال پیش
والد
کامیت
af0bffbb9e

+ 10 - 11
src/jy/admin/version.go

@@ -77,25 +77,24 @@ func init() {
 		c.JSON(200, gin.H{"data": list, "vid": vid})
 	})
 	Admin.POST("/version/infosave", func(c *gin.Context) {
-		s_field, _ := c.GetPostForm("s_field")
-		vid, _ := c.GetPostForm("vid")
+		_id, _ := c.GetPostForm("_id")
 		data := GetPostForm(c)
-		data["l_createtime"] = time.Now().Unix()
-		data["s_username"] = sessions.Default(c).Get("username")
-		data["l_lasttime"] = time.Now().Unix()
-		data["delete"] = false
-		tmp, _ := Mgo.FindOne("versioninfo", `{"s_field":"`+s_field+`","vid":"`+vid+`"}`)
-		if len(*tmp) > 0 {
-			c.JSON(200, gin.H{"rep": false})
+		if _id != "" {
+			Mgo.UpdateById("versioninfo", _id, map[string]interface{}{"$set": data})
 		} else {
+			s_field, _ := c.GetPostForm("s_field")
+			vid, _ := c.GetPostForm("vid")
+			data["l_createtime"] = time.Now().Unix()
+			data["s_username"] = sessions.Default(c).Get("username")
+			data["l_lasttime"] = time.Now().Unix()
+			data["delete"] = false
 			pid := Mgo.Save("versioninfo", data)
 			fromvid, _ := data["s_pversionid"].(string)
 			if fromvid != "" {
 				copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string))
 			}
-			c.JSON(200, gin.H{"rep": true})
 		}
-
+		c.JSON(200, gin.H{"rep": true})
 	})
 	Admin.POST("/version/infouse", func(c *gin.Context) {
 		_id, _ := c.GetPostForm("_id")

+ 27 - 11
src/jy/extract/extract.go

@@ -46,7 +46,7 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitProvince()
 	ext.InitCityAll()
 	ext.InitCitySim()
-	InitDFA()
+	//InitDFA()
 
 	return RunExtractTestTask(ext, startId, num)
 }
@@ -190,7 +190,13 @@ func PreInfo(doc map[string]interface{}) *ju.Job {
 
 //抽取
 func (e *ExtractTask) ExtractProcess(j *ju.Job) {
-	qu.Catch()
+	//	for _, bl := range j.Block {
+	//		log.Println(bl.ColonKV.Kv)
+	//	}
+	//	for k, v := range j.BlockPackage {
+	//		bs, _ := json.Marshal(v.TableKV)
+	//		log.Println(k, string(bs), v.WinnerOrder)
+	//	}
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -287,7 +293,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
 				if tmps, ok := v.([]map[string]interface{}); ok {
 					for _, tmp := range tmps {
 						j.Result[k] = append(j.Result[k],
-							&ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"]})
+							&ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"], 0})
 					}
 				}
 			}
@@ -490,7 +496,7 @@ func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[stri
 						if j.Result[v.Field] == nil {
 							j.Result[k] = [](*ju.ExtField){}
 						}
-						j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val})
+						j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
 					}
 				}
 			}
@@ -522,7 +528,7 @@ func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[stri
 			if j.Result[v.Field] == nil {
 				j.Result[v.Field] = [](*ju.ExtField){}
 			}
-			j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val})
+			j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
 		}
 	}
 	return extinfo
@@ -541,7 +547,7 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 			if tmps, ok := v.([]map[string]interface{}); ok {
 				j.Result[k] = [](*ju.ExtField){}
 				for _, tmp := range tmps {
-					j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"]})
+					j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
 				}
 			}
 		}
@@ -555,6 +561,9 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 				tmp := j.Result[in.Field]
 				exts := []interface{}{}
 				for k, v := range tmp {
+					if v.Type == "table" { //table抽取到的数据不清理
+						continue
+					}
 					text := qu.ObjToString(v.Value)
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
@@ -579,6 +588,9 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 			for key, tmp := range j.Result {
 				exts := []interface{}{}
 				for k, v := range tmp {
+					if v.Type == "table" { //table抽取到的数据不清理
+						continue
+					}
 					text := qu.ObjToString(v.Value)
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
@@ -684,16 +696,20 @@ type FieldValue struct {
 //分析抽取结果并保存
 func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.ExtField, task *TaskInfo) {
 	_id := qu.BsonIdToSId((*doc)["_id"])
+	result = ScoreFields(result)
 	//结果排序
 	values := map[string][]*ju.SortObject{}
 	for key, val := range result {
 		fieldValue := map[string][]interface{}{}
+		//		for _, v := range val {
+		//			if fieldValue[fmt.Sprint(v.Value)] == nil {
+		//				fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
+		//			} else {
+		//				fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
+		//			}
+		//		}
 		for _, v := range val {
-			if fieldValue[fmt.Sprint(v.Value)] == nil {
-				fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
-			} else {
-				fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
-			}
+			fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
 		}
 		objects := []*ju.SortObject{}
 		for k, v := range fieldValue {

+ 2 - 1
src/jy/extract/extractInit.go

@@ -178,6 +178,7 @@ func (e *ExtractTask) InitRuleCore() {
 		if b, _ := vinfo["isuse"].(bool); !b {
 			continue
 		}
+		s_field := qu.ObjToString(vinfo["s_field"])
 		pid := qu.BsonIdToSId(vinfo["_id"])
 		list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
 		for _, vv := range *list {
@@ -185,7 +186,7 @@ func (e *ExtractTask) InitRuleCore() {
 				continue
 			}
 			rcore := &RuleCore{}
-			rcore.Field = vinfo["s_field"].(string)
+			rcore.Field = s_field
 			rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
 			rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
 			//前置规则

+ 135 - 0
src/jy/extract/score.go

@@ -0,0 +1,135 @@
+// score
+package extract
+
+import (
+	ju "jy/util"
+	"log"
+	qu "qfw/util"
+	"regexp"
+	"strings"
+)
+
+var SoreConfig map[string]map[string]interface{}
+
+func init() {
+	qu.ReadConfig("./res/fieldscore.json", &SoreConfig)
+	//实例化正则
+	for _, tmp := range SoreConfig {
+		if tmp["type"] == "string" {
+			if positions, ok := tmp["position"].([]interface{}); ok {
+				for _, position := range positions {
+					if p, ok := position.(map[string]interface{}); ok {
+						qu.Try(func() {
+							p["regexp"] = regexp.MustCompile(qu.ObjToString(p["regstr"]))
+						}, func(err interface{}) {
+							log.Println(err)
+						})
+					}
+				}
+			}
+		}
+	}
+	log.Println(SoreConfig["projectname"])
+}
+
+//结果打分
+func ScoreFields(result map[string][]*ju.ExtField) map[string][]*ju.ExtField {
+	for field, tmps := range result {
+		scoreRule := SoreConfig[field]
+		if scoreRule == nil {
+			continue
+		}
+		extractype := SoreConfig["extractype"]
+		fieldtype := scoreRule["type"]
+		for _, v := range tmps {
+			//类型打分
+			if v.ExtFrom == "title" {
+				v.Score += qu.IntAll(extractype["title"])
+			} else {
+				if strings.Contains(v.Type, "table") {
+					v.Score += qu.IntAll(extractype["table"])
+				} else if strings.Contains(v.Type, "colon") {
+					v.Score += qu.IntAll(extractype["colon"])
+				} else if strings.Contains(v.Type, "space") {
+					v.Score += qu.IntAll(extractype["space"])
+				} else if strings.Contains(v.Type, "regexp") {
+					v.Score += qu.IntAll(extractype["regexp"])
+				}
+			}
+			//字符型打分
+			if fieldtype == "string" {
+				//位置打分
+				if positions, ok := scoreRule["position"].([]interface{}); ok {
+					for _, position := range positions {
+						if p, ok := position.(map[string]interface{}); ok {
+							qu.Try(func() {
+								if p["regexp"] != nil {
+									reg := p["regexp"].(*regexp.Regexp)
+									if reg.MatchString(qu.ObjToString(v.Value)) {
+										v.Score += qu.IntAll(p["score"])
+									}
+								}
+							}, func(err interface{}) {
+								log.Println(err)
+							})
+						}
+					}
+				}
+				//长度打分
+				if lengths, ok := scoreRule["length"].([]interface{}); ok {
+					for _, tmp := range lengths {
+						if length, ok := tmp.(map[string]interface{}); ok {
+							min := qu.IntAll(length["min"])
+							max := qu.IntAll(length["max"])
+							vlen := len([]rune(qu.ObjToString(v.Value)))
+							scores, _ := length["score"].([]interface{})
+							if len(scores) < 3 {
+								continue
+							}
+							if vlen < min {
+								v.Score += qu.IntAll(scores[0])
+							} else if vlen > max {
+								v.Score += qu.IntAll(scores[2])
+							} else {
+								v.Score += qu.IntAll(scores[1])
+							}
+						}
+					}
+				}
+			}
+			//float类型打分
+			if fieldtype == "float" {
+				min := qu.IntAll(scoreRule["min"])
+				max := qu.IntAll(scoreRule["max"])
+				val := qu.IntAll(v.Value)
+				scores, _ := scoreRule["score"].([]interface{})
+				if len(scores) < 3 {
+					continue
+				}
+				if val < min && 0 < val {
+					v.Score += qu.IntAll(scores[0])
+				} else if val > max {
+					v.Score += qu.IntAll(scores[2])
+				} else if val <= max && val >= min {
+					v.Score += qu.IntAll(scores[1])
+				}
+			}
+			//decimal
+			if fieldtype == "decimal" {
+				min := qu.IntAll(scoreRule["min"])
+				max := qu.IntAll(scoreRule["max"])
+				val := qu.IntAll(v.Value)
+				scores, _ := scoreRule["score"].([]interface{})
+				if len(scores) < 3 {
+					continue
+				}
+				if val > max {
+					v.Score += qu.IntAll(scores[2])
+				} else if val <= max && val > min {
+					v.Score += qu.IntAll(scores[1])
+				}
+			}
+		}
+	}
+	return result
+}

+ 120 - 5
src/jy/pretreated/analystep.go

@@ -30,6 +30,8 @@ func AnalyStart(job *util.Job) {
 	}
 	blockArrays, _ := DivideBlock(con, 1)
 	if len(blockArrays) > 0 { //有分块
+		//从块里面找分包
+		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title)
 		for _, bl := range blockArrays {
 			if len([]rune(bl.Text)) > 80 {
 				ba1, _ := DivideBlock(bl.Text, 1)
@@ -47,24 +49,30 @@ func AnalyStart(job *util.Job) {
 			t1, _ := ComputeConRatio(bl.Text, 2)
 			if len(t1) > 0 {
 				tabres := AnalyTableV2(t1, job.Category, bl.Title, bl.Text, 2, job.SourceMid)
-				processTableResult(tabres, bl)
+				processTableResult(tabres, bl, job)
 				if bl.Title == "" && tabres.BlockTag != "" {
 					bl.Title = tabres.BlockTag
 				}
+				//				for k, v := range bl.TableKV.Kv {
+				//					log.Println("bl.TableKV.Kv", k, v)
+				//				}
 			}
 			job.Block = append(job.Block, bl)
 		}
 	} else { //未分块,创建分块
 		bl := &util.Block{}
 		newCon := con
-		if len(tabs) > 0 { //解析表格逻辑(article,处理完把值赋到article)
+		if len(tabs) > 0 { //解析表格逻辑
 			newCon = TextAfterRemoveTable(con)
-			//table中kv覆盖全文正则的kv
+			job.BlockPackage = FindPackageFromText(job.Title, newCon)
 			tabres := AnalyTableV2(tabs, job.Category, "", con, 1, job.SourceMid)
-			processTableResult(tabres, bl)
+			processTableResult(tabres, bl, job)
 			//			for k, v := range bl.TableKV.Kv {
 			//				log.Println("bl.TableKV.Kv", k, v)
 			//			}
+		} else {
+			//从正文里面找分包
+			job.BlockPackage = FindPackageFromText(job.Title, newCon)
 		}
 		//调用kv解析
 		bl.ColonKV = GetKVAll(newCon, "", 1)
@@ -74,7 +82,7 @@ func AnalyStart(job *util.Job) {
 }
 
 //分析table解析结果
-func processTableResult(tabres *TableResult, block *util.Block) {
+func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 	//解析结果中的kv
 	kv := map[string]string{}
 	for k, v := range tabres.SortKV.Map {
@@ -85,6 +93,113 @@ func processTableResult(tabres *TableResult, block *util.Block) {
 		kvIndex[k] = v
 	}
 	block.TableKV = &util.JobKv{Kv: kv, KvIndex: kvIndex}
+
+	//分包
+	tablePackage := map[string]*util.BlockPackage{}
+	if tabres.IsMultiPackage {
+		//分包中的map
+		for k, v := range tabres.PackageMap.Map {
+			blockPackage, ok := v.(*util.BlockPackage)
+			if !ok {
+				continue
+			}
+			//解析kv
+			//找到key是“包1中标单位”这种的key,过滤掉包1,再次到标签库中匹配
+			labelKVs := []*util.Kv{}
+			if blockPackage.TableKV != nil && blockPackage.TableKV.Kv != nil {
+				for tk, tv := range blockPackage.TableKV.Kv {
+					if regReplKey.MatchString(tk) || regSplit.MatchString(tk) {
+						labelKVs = append(labelKVs, &util.Kv{
+							Key:   tk,
+							Value: tv,
+						})
+					}
+				}
+			}
+			labelKV, _ := KvTagsToKV(labelKVs, "", nil, 2)
+			for lk, lv := range labelKV {
+				if blockPackage.TableKV.Kv[lk] != "" {
+					continue
+				}
+				blockPackage.TableKV.Kv[lk] = lv
+			}
+			tablePackage[k] = blockPackage
+		}
+	}
+	//处理中标人排序
+	wror := []map[string]interface{}{}
+	for _, v := range tabres.WinnerOrder {
+		entName, _ := v["entname"].(string)
+		v["entname"] = winnerOrderEntity.clear("中标单位", entName)
+		if price, ok := v["price"].(string); ok {
+			v["price"] = winnerOrderEntity.clear("中标金额", price)
+		}
+		v["type"] = 2
+		wror = append(wror, v)
+	}
+	if len(wror) > 0 {
+		job.Winnerorder = wror
+	}
+	//分包
+	if len(tablePackage) > 0 {
+		pkgMap := map[string]*util.BlockPackage{}
+		for tk, tv := range tablePackage {
+			bv := job.BlockPackage[tk]
+			if bv == nil {
+				pkgMap[tk] = tv
+				continue
+			}
+			bv.Text += "\n" + tv.Text
+			/************table中的分包替换块里面找到的****************/
+			//
+			if tv.ColonKV != nil {
+				if bv.ColonKV == nil {
+					bv.ColonKV = util.NewJobKv()
+				}
+				for k, v := range tv.ColonKV.Kv {
+					if bv.ColonKV.Kv[k] != "" {
+						continue
+					}
+					bv.ColonKV.Kv[k] = v
+				}
+			}
+			//
+			if tv.TableKV != nil {
+				if bv.TableKV == nil {
+					bv.TableKV = util.NewJobKv()
+				}
+				for k, v := range tv.TableKV.Kv {
+					if bv.TableKV.Kv[k] != "" {
+						continue
+					}
+					bv.TableKV.Kv[k] = v
+				}
+			}
+			//
+			if tv.Origin != "" {
+				bv.Origin = tv.Origin
+			}
+			//
+			if tv.Index != "" {
+				bv.Index = tv.Index
+			}
+			//
+			if tv.Type != "" {
+				bv.Type = tv.Type
+			}
+			//
+			if tv.BidStatus != "" {
+				bv.BidStatus = tv.BidStatus
+			}
+			//
+			if tv.WinnerOrder != nil && len(tv.WinnerOrder) > 0 {
+				bv.WinnerOrder = tv.WinnerOrder
+			}
+		}
+		for k, v := range pkgMap {
+			job.BlockPackage[k] = v
+		}
+	}
 }
 
 //一行多列 一列多行,按照分块逻辑处理

+ 420 - 12
src/jy/pretreated/analytable.go

@@ -534,10 +534,14 @@ func AnalyTableV2(tabs []*goquery.Selection, toptype, blockTag, con string, ityp
 //开始解析表格集
 func (ts *TableResult) Analy() {
 	tabs := []*Table{}
+	contactFormat := &u.ContactFormat{
+		IndexMap: map[int]string{},
+		MatchMap: map[string]map[string]bool{},
+	}
 	for _, table := range ts.GoqueryTabs {
 		tn := NewTable(ts.Html, ts, table)
 		//核心模块
-		ts := tn.Analy()
+		ts := tn.Analy(contactFormat)
 		for _, tab := range ts {
 			tabs = append(tabs, tab)
 			//log.Println("tab.SortKV.Map", tab.SortKV.Map)
@@ -621,7 +625,7 @@ func (ts *TableResult) Analy() {
 }
 
 //解析表格
-func (table *Table) Analy() []*Table {
+func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
 	//查找表体中的tr对象
 	trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
 	if trs.Size() == 0 {
@@ -729,7 +733,7 @@ func (table *Table) Analy() []*Table {
 			table.Adjust()
 			//查找表格的标签
 			table.FindTag()
-			//u.Debug(table.TableResult.Id, table.Tag)
+			//log.Println(table.TableResult.Id, table.Html)
 			//分割表格
 			if table.BSplit {
 				if !table.BHeader && n > 0 {
@@ -751,7 +755,7 @@ func (table *Table) Analy() []*Table {
 				table.StandKV["项目名称"] = table.Tag
 				table.StandKVWeight["项目名称"] = -100
 			}
-			//table.TdContactFormat(contactFormat)
+			table.TdContactFormat(contactFormat)
 			//开始查找kv,核心模块
 			table.FindKV()
 			//判断是否是多包,并处理分包的
@@ -1290,15 +1294,17 @@ func (table *Table) FindKV() {
 				}
 				**/
 				if !td.BH && td.KVDirect < 3 {
-					if !table.FindTdVal(td, vdirect, direct) {
-						//都识别不到时,对第一、二中标候选人的处理
-						bo, res := GetBidOrder(td, bodirect, sort)
-						if res {
-							sort++
-							bodirect = bo
+					if !table.FindTdVal(td, direct, vdirect) {
+						if !table.FindTdVal(td, vdirect, direct) {
+							//都识别不到时,对第一、二中标候选人的处理
+							bo, res := GetBidOrder(td, bodirect, sort)
+							if res {
+								sort++
+								bodirect = bo
+							}
 						}
 					}
-					//u.Debug(td.Val, td.BH, td.HeadTd, td.KVDirect)
+					//log.Println("td", td.Val, td.BH, td.HeadTd, td.KVDirect)
 				}
 			}
 		}
@@ -1457,7 +1463,6 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 	if near != nil && near.BH && (near.KeyDirect == vdirect || near.KeyDirect == 0) && (near.KVDirect == direct || near.KVDirect == 0) && near.KVDirect < 3 {
 		near.KVDirect = direct
 		near.KeyDirect = vdirect
-		//u.Debug(direct, near.KVDirect, near.Val, td.Val)
 		td.KVDirect = direct
 		key := near.Val
 		if near.Val == "" {
@@ -2165,3 +2170,406 @@ func replPkgConfusion(v1 string) string {
 	v1 = PreCon2.ReplaceAllString(v1, "")
 	return v1
 }
+
+//对td中的值,进行再处理
+func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
+	//处理表格中的联系人信息
+	indexMap := contactFormat.IndexMap
+	matchMap := contactFormat.MatchMap
+	weightMap := map[string]map[string]interface{}{} //权重
+	mustMatchFirst := len(indexMap) > 0              //第一个必须匹配上
+	reCreate := false
+	matchCount := 0
+	contactTypeTagMap := map[string]map[string][]interface{}{}
+	//u.Debug(mustMatchFirst, indexMap, matchMap)
+	notMatchTrCount := 0
+	allAscFind := true
+	if len(indexMap) == 0 {
+		isCanAddToIndexMap := false
+		matchPrevFlag := false
+		prevCanAddToIndexMap := false
+	LS:
+		for _, tr := range tn.TRs {
+			for td_index, td := range tr.TDs {
+				thisTdKvs := colonkvEntity.GetKvs(td.Text, "", 2)
+				if len(thisTdKvs) == 0 {
+					tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
+					if tdValue != "" && len([]rune(tdValue)) < 10 {
+						thisTdKvs = append(thisTdKvs, &u.Kv{
+							Key:   tdValue,
+							Value: "",
+						})
+					}
+				}
+				if len(thisTdKvs) != 1 {
+					continue
+				}
+				//采购人在联系人、电话后面的处理
+				td_k := FilterContactKey(thisTdKvs[0].Key)
+				td_k_length := len([]rune(td_k))
+				if td_k_length < 2 || td_k_length > 15 {
+					continue
+				}
+				isContinue := ContactInfoMustReg.MatchString(td_k)
+				if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
+					if !matchPrevFlag && len(indexMap) > 0 {
+						indexMap = map[int]string{}
+						break LS
+					}
+					isCanAddToIndexMap = true
+				}
+				if isContinue {
+					continue
+				}
+				for _, k := range HasOrderContactType(td_k) {
+					if !ContactType[k].MatchString(td_k) {
+						continue
+					}
+					if len(indexMap) == 0 {
+						if isCanAddToIndexMap || (prevCanAddToIndexMap && len(tr.TDs) == 1) {
+							myPrevTdVal := ""
+							if td_index-2 >= 0 {
+								myPrevTdVal = tr.TDs[td_index-2].Val
+							}
+							if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
+								matchPrevFlag = true
+							}
+							indexMap[0] = k
+							break
+						}
+					} else {
+						indexMap = map[int]string{}
+						break LS
+					}
+				}
+			}
+			prevCanAddToIndexMap = isCanAddToIndexMap
+			isCanAddToIndexMap = false
+		}
+		if len(indexMap) > 0 {
+			allAscFind = false
+		}
+	}
+	//////
+L:
+	for tr_index, tr := range tn.TRs {
+		thisTrHasMatch := false
+		jumpNextTd := false
+		for td_index, td := range tr.TDs {
+			//和|以?及|与|、多个词和在一起
+			if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
+				thisTdVals := regSplit.Split(td.Text, -1)
+				nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
+				if len(thisTdVals) == len(nextTdVals) {
+					isHandle := false
+					for _, k := range HasOrderContactType(td.Text) {
+						if ContactType[k].MatchString(td.Text) {
+							for thisTdVals_k, thisTdVals_v := range thisTdVals {
+								thisTdVals_v = strings.TrimSpace(thisTdVals_v)
+								if ContactType[k].MatchString(thisTdVals_v) {
+									thisTrHasMatch = true
+									tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
+									continue
+								}
+								if !ContactInfoMustReg.MatchString(thisTdVals_v) {
+									continue
+								}
+								jumpNextTd = true
+								thisTrHasMatch = true
+								tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
+							}
+							break
+						}
+					}
+					if !isHandle && len(indexMap) > 0 {
+						_, onlyContactType := u.FirstKeyValueInMap(indexMap)
+						if myContactType, _ := onlyContactType.(string); myContactType != "" {
+							for thisTdVals_k, thisTdVals_v := range thisTdVals {
+								thisTdVals_v = strings.TrimSpace(thisTdVals_v)
+								if ContactInfoMustReg.MatchString(thisTdVals_v) {
+									jumpNextTd = true
+									thisTrHasMatch = true
+									tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
+								}
+							}
+						}
+					}
+				}
+			} else {
+				jumpNextTd = false
+			}
+			///////////////////////////////////////
+			thisTdKvs := kvAfterDivideBlock(td.Text, 3)
+			if len(thisTdKvs) == 0 {
+				thisTdKvs = colonkvEntity.GetKvs(td.Text, "", 2)
+			}
+			if len(thisTdKvs) == 0 {
+				tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
+				if tdValue != "" && len([]rune(tdValue)) < 15 {
+					thisTdKvs = append(thisTdKvs, &u.Kv{
+						Key:   tdValue,
+						Value: "",
+					})
+				}
+			}
+			tdAscFind := true
+			if len(thisTdKvs) == 0 {
+				continue
+			} else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
+				//采购人在联系人、电话后面的处理
+				isCanAddToIndexMap := false
+			LL:
+				for _, td_kv := range thisTdKvs {
+					//u.Debug(td_kv.PrevLine)
+					td_k := FilterContactKey(td_kv.Key)
+					td_k_length := len([]rune(td_k))
+					if td_k_length < 2 || td_k_length > 15 {
+						continue
+					}
+					isContinue := ContactInfoMustReg.MatchString(td_k)
+					if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
+						if len(indexMap) > 0 {
+							indexMap = map[int]string{}
+							break LL
+						}
+						isCanAddToIndexMap = true
+					}
+					if isContinue {
+						continue
+					}
+					if len(indexMap) == 0 {
+						for _, k := range HasOrderContactType(td_k) {
+							if !ContactType[k].MatchString(td_k) {
+								continue
+							}
+							if isCanAddToIndexMap && len(indexMap) == 0 {
+								indexMap[0] = k
+								break
+							}
+						}
+					}
+				}
+				if len(indexMap) > 0 {
+					tdAscFind = false
+				}
+			}
+			prevKey := ""
+			oldIndexMapLength := len(indexMap)
+			thidTdIndex := td_index
+			notmatchCount := 0
+			kvTitle := ""
+			for _, td_kv := range thisTdKvs {
+				//u.Debug(td_kv.Key, td_kv.Value, td_kv.Title)
+				iscontinue := false
+				td_v := td_kv.Value
+				td_k := FilterContactKey(td_kv.Key)
+				td_k_length := len([]rune(td_k))
+				//
+				if allAscFind && tdAscFind {
+					for _, k := range HasOrderContactType(td_k) {
+						if td_k_length < 3 || td_k_length > 15 {
+							continue
+						}
+						if !ContactType[k].MatchString(td_k) {
+							matchCount++
+							continue
+						}
+						if weightMap[k] == nil {
+							weightMap[k] = map[string]interface{}{}
+						}
+						if ContactInfoVagueReg.MatchString(td_k) {
+							if matchMap[k] == nil {
+								matchMap[k] = map[string]bool{}
+							}
+							isAddToMatchMap := true
+							if !strings.HasSuffix(td_k, "方式") {
+								_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
+								if len(kTag) == 1 {
+									tagVal, weightVal := u.FirstKeyValueInMap(kTag)
+									if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
+										isAddToMatchMap = false
+									}
+									if td.SortKV.Map[tagVal] != nil {
+										if weightMap[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][tagVal].(int)) {
+											weightMap[k][tagVal] = weightVal.(int)
+											td.SortKV.AddKey(tagVal, td_v)
+											thisTrHasMatch = true
+										}
+									} else {
+										weightMap[k][tagVal] = weightVal.(int)
+									}
+								}
+							}
+							if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
+								matchMap[k][ContactInfoVagueReg.FindString(td_k)] = true
+							}
+						} else if k == "采购单位" { //打标签,权重高的重新覆盖
+							_, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
+							tagVal, weightVal := u.FirstKeyValueInMap(kTag)
+							if tagVal == k {
+								if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
+									weightMap[k][k] = weightVal.(int)
+									matchMap[k] = map[string]bool{}
+									indexMap = map[int]string{}
+								}
+							}
+						}
+						if u.IsMapHasValue(k, indexMap) {
+							thisTrHasMatch = true
+							iscontinue = true
+							continue
+						}
+						if reCreate {
+							indexMap = map[int]string{}
+							reCreate = false
+						}
+						indexMap[thidTdIndex] = k
+						iscontinue = true
+						thisTrHasMatch = true
+						thidTdIndex++
+						break
+					}
+					if len(indexMap) == 0 {
+						prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
+						for k, v := range ContactType {
+							if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
+								indexMap[thidTdIndex] = k
+								thisTrHasMatch = true
+								thidTdIndex++
+							}
+						}
+					}
+					if len(indexMap) == 0 {
+						if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
+							thidTdIndex = 0
+							matchMap = map[string]map[string]bool{}
+							indexMap = map[int]string{1: titleMatchType}
+						}
+					}
+				}
+				if iscontinue {
+					continue
+				}
+				//不在同一块中
+				if td_kv.Title != "" && kvTitle != td_kv.Title && len(indexMap) > 0 && !ContactInfoMustReg.MatchString(td_kv.Key) {
+					thidTdIndex = 0
+					matchMap = map[string]map[string]bool{}
+					indexMap = map[int]string{}
+				}
+				kvTitle = td_kv.Title
+				//u.Debug(indexMap, td_k, td_v, matchMap)
+				if len(indexMap) > 0 {
+					if td_k_length < 2 || td_k_length > 10 {
+						continue
+					}
+					modle := 0
+					if len(thisTdKvs) == 1 {
+						if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
+							modle = 1
+						} else {
+							modle = 2
+						}
+					}
+					if !ContactInfoMustReg.MatchString(td_k) {
+						notmatchCount++
+						if notmatchCount < len(indexMap)*2 && false {
+							notmatchCount = 0
+							thidTdIndex = 0
+							indexMap = map[int]string{}
+							matchMap = map[string]map[string]bool{}
+						}
+						if mustMatchFirst {
+							break L
+						}
+						continue
+					}
+					reCreate = true
+					index := td_index
+					if oldIndexMapLength == 0 && len(indexMap) > 1 {
+						if prevKey != td_k {
+							prevKey = td_k
+							index = td_index
+						} else if prevKey == td_k {
+							index++
+						}
+					}
+					if filterValue.MatchString(td_v) {
+						thisTrHasMatch = true
+						continue
+					}
+					//u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
+					myContactType := indexMap[index]
+					if myContactType == "" && len(indexMap) == 1 {
+						_, onlyContactType := u.FirstKeyValueInMap(indexMap)
+						myContactType, _ = onlyContactType.(string)
+					}
+					if myContactType == "" {
+						continue
+					}
+					matchCount++
+					if matchMap[myContactType] == nil {
+						matchMap[myContactType] = map[string]bool{}
+					}
+					if IsContactKvHandle(ContactInfoMustReg.FindString(td_k), matchMap[myContactType]) {
+						continue
+					}
+					matchMap[myContactType][ContactInfoMustReg.FindString(td_k)] = true
+					if ContactType[myContactType].MatchString(td_k) {
+						continue
+					}
+					thisTrHasMatch = true
+					if modle == 1 {
+						td.Text = myContactType + td_k
+						td.Val = td.Text
+					} else {
+						//
+						if !strings.HasSuffix(td_k, "方式") {
+							_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
+							if len(kTag) == 1 {
+								tagVal, _ := u.FirstKeyValueInMap(kTag)
+								if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
+									continue
+								}
+								if contactTypeTagMap[myContactType] == nil {
+									contactTypeTagMap[myContactType] = map[string][]interface{}{}
+								}
+								myOldKeyArray := contactTypeTagMap[myContactType][tagVal]
+								if myOldKeyArray != nil {
+									tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
+								} else {
+									contactTypeTagMap[myContactType][tagVal] = make([]interface{}, 3)
+								}
+								if weightMap[myContactType] == nil {
+									weightMap[myContactType] = map[string]interface{}{}
+								}
+								weightMap[myContactType][tagVal] = 1
+								contactTypeTagMap[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
+							}
+						}
+						td.SortKV.AddKey(myContactType+td_k, td_v)
+					}
+				}
+			}
+			//u.Debug(td.SortKV.Map)
+		}
+		if allAscFind && !thisTrHasMatch {
+			notMatchTrCount++
+			if notMatchTrCount >= 2 {
+				notMatchTrCount = 0
+				indexMap = map[int]string{}
+			}
+		}
+	}
+	//u.Debug("end", matchCount, indexMap, matchMap)
+	if matchCount == 0 {
+		indexMap = map[int]string{}
+		matchMap = map[string]map[string]bool{}
+	}
+	(*contactFormat).IndexMap = indexMap
+	(*contactFormat).MatchMap = matchMap
+	//	for _, tr := range tn.TRs {
+	//		for _, td := range tr.TDs {
+	//			log.Println(td.SortKV.Map)
+	//		}
+	//	}
+}

+ 24 - 13
src/jy/util/article.go

@@ -2,19 +2,21 @@ package util
 
 //
 type Job struct {
-	SourceMid  string                  //数据源的MongoId
-	Category   string                  //类别
-	Content    string                  //正文
-	Title      string                  //标题
-	SpiderCode string                  //爬虫代码
-	Domain     string                  //网站域名
-	Href       string                  //原文链接
-	City       string                  //城市
-	Province   string                  //省份
-	Data       *map[string]interface{} //数据库源数据
-	Block      []*Block                //分块
-	Result     map[string][]*ExtField  //结果
-	BuyerAddr  string                  //采购单位地址
+	SourceMid    string                   //数据源的MongoId
+	Category     string                   //类别
+	Content      string                   //正文
+	Title        string                   //标题
+	SpiderCode   string                   //爬虫代码
+	Domain       string                   //网站域名
+	Href         string                   //原文链接
+	City         string                   //城市
+	Province     string                   //省份
+	Data         *map[string]interface{}  //数据库源数据
+	Block        []*Block                 //分块
+	Result       map[string][]*ExtField   //结果
+	BuyerAddr    string                   //采购单位地址
+	BlockPackage map[string]*BlockPackage //块中的分包
+	Winnerorder  []map[string]interface{} //中标候选人排序
 }
 
 type ExtField struct {
@@ -25,6 +27,7 @@ type ExtField struct {
 	MatchType string      //匹配类型:1:标签库类型(tag_string,tag_regexp),2:全文正则regcontent
 	ExtFrom   string      //抽取来源(title,detail)
 	Value     interface{} //抽取结果
+	Score     int         //得分
 }
 
 //块
@@ -63,6 +66,14 @@ type BlockPackage struct {
 	Accuracy    bool                     //包里面抽取字段的准确性,如果能打上块标签的话,就不用中标候选人中的值覆盖包里面的值
 }
 
+//联系人
+type ContactFormat struct {
+	Direction int
+	IndexMap  map[int]string
+	MatchMap  map[string]map[string]bool
+	WeightMap map[string]map[string]interface{}
+}
+
 //kv
 type Kv struct {
 	Key      string

+ 59 - 0
src/res/fieldscore.json

@@ -0,0 +1,59 @@
+{
+    "extractype": {
+        "describe": "抽取类型打分",
+        "title": 3,
+        "table": 5,
+        "colon": 3,
+        "space": 3,
+        "regexp": 2
+    },
+    "projectname": {
+        "type": "string",
+        "position": [
+            {
+                "describe": "以*开头",
+                "regstr": "关于|\\[|【",
+                "score": -1
+            },
+            {
+                "describe": "以*结尾",
+                "regstr": "项目|工程|采购",
+                "score": 3
+            }
+        ],
+        "length": [
+            {
+                "describe": "长度打分min>val:0,min<=val<=max:1,max<val:-1",
+                "min": 4,
+                "max": 20,
+                "score": [
+                    0,
+                    1,
+                    -1
+                ]
+            }
+        ]
+    },
+    "budget": {
+        "type": "float",
+        "describe": "min>val:1,min<=val<=max:3,max<val:1",
+        "min": 1000,
+        "max": 1000000000,
+        "score": [
+            1,
+            3,
+            1
+        ]
+    },
+    "supervisorrate": {
+        "type": "decimal",
+        "describe": "费率min>=val:0,min<val<=max:3,max<val:-3",
+        "min": 0,
+        "max": 1,
+        "score": [
+            0,
+            3,
+            -3
+        ]
+    }
+}

+ 13 - 10
src/web/templates/admin/versioninfo.html

@@ -81,7 +81,7 @@ $(function () {
 				return '<a class="btn btn-sm btn-success" href="/admin/rulelogic?vid={{.vid}}&pid='+val+'">配置逻辑</a>'
 			}},
 			{"data":"_id",render:function(val,a,row){
-				return '<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\')">删除</a>'
+				return '<a class="btn btn-sm btn-primary opr" opr="edit">编辑</a>&nbsp;<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\')">删除</a>'
 			}}
        	]
 	});
@@ -90,21 +90,23 @@ $(function () {
 			var n=$(this).attr("opr")
 			var _tit="",htmlObj={},obj,tag=[]
 			switch(n){
-			case "edit":			
-				obj=ttable.row($(this).closest("tr")).data();
+			case "edit":	
+                obj=ttable.row($(this).closest("tr")).data()		
 			case "new":
-				if(n=="edit"){
-					_tit="编辑-"+obj.s_field
-				}else{
-					_tit="新增字段"
-					tag=[
+                tag=[
 						{label:"属性名称",s_label:"s_field",type:"tpl_list_local",url:"/admin/getfields",must:true},
 						{label:"描述",s_label:"s_descrip",placeholder:"描述信息"},
 						{label:"克隆版本",s_label:"s_pversionid",type:"tpl_list_local",url:"/admin/getversions"},
-						{s_label:"_id",type:"tpl_hidden"},
+                        {s_label:"_id",type:"tpl_hidden"},
 						{s_label:"vid",type:"tpl_hidden"},
 						{s_label:"isuse",type:"tpl_hidden"},
 					]
+				if(n=="edit"){
+					_tit="编辑-"+obj.s_field
+                    tag[0]={label:"属性名称",s_label:"s_field",type:"tpl_list_local",url:"/admin/getfields",must:true,disabled:true}
+                    tag[2]={label:"克隆版本",s_label:"s_pversionid",type:"tpl_list_local",url:"/admin/getversions",disabled:true}
+				}else{
+					_tit="新增字段"
 					obj={"vid":"{{.vid}}","isuse":false}
 				}
 				htmlObj={
@@ -123,7 +125,8 @@ $(function () {
 										return false
 									}
 								})
-								if (bcon){								
+								if (bcon){	
+                                    console.log(obj)							
 									$.post("/admin/version/infosave",obj,function(data){
 										if(data&&data.rep){
 											window.location.href="/admin/version/info?vid={{.vid}}"