Procházet zdrojové kódy

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

maxiaoshan před 4 roky
rodič
revize
952488fa15

+ 55 - 39
src/jy/extract/extract.go

@@ -216,9 +216,7 @@ func RunExtractTask(taskId string) {
 			//if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 			//	continue
 			//}
-			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时
-				continue
-			}
+			//根据标题判断是否抽取
 			b := IsExtract("title", qu.ObjToString(v["title"]), "")
 			if !b {
 				continue
@@ -328,6 +326,17 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		if (*toMap)["jsoncontent"] != nil {
 			delete(*toMap, "jsoncontent")
 		}
+		for k,v := range *toMap{
+			if _,ok := v.(float64);ok{
+				continue
+			}else if _,ok := v.(int64);ok{
+				continue
+			}else if _,ok2 := v.(string);ok2{
+				continue
+			}else {
+				delete(*toMap,k)
+			}
+		}
 	}
 	j = &ju.Job{
 		SourceMid:      qu.BsonIdToSId(doc["_id"]),
@@ -459,12 +468,21 @@ func file2text(doc *map[string]interface{}) {
 
 //抽取
 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
+
 	e.ExtractDetail(j, isSite, j.SpiderCode)
 	if jf != nil && jf.IsFile {
-		e.ExtractFile(jf, isSite, j.SpiderCode)
-		for tmpk, _ := range jf.Result {
+		e.ExtractDetail(jf, isSite, j.SpiderCode)
+		for tmpk, xs := range jf.Result {
 			if len(j.Result[tmpk]) == 0 {
+				if tmpk == "budget" || tmpk == "bidamount" {
+					for _, v := range xs {
+						if fv, ok := v.Value.(float64); ok && fv > 100 && fv < 50000000000 {
+							j.Result[tmpk] = append(j.Result[tmpk], v)
+						}
+					}
+				} else {
 					j.Result[tmpk] = append(j.Result[tmpk], jf.Result[tmpk]...)
+				}
 			}
 		}
 		if len(j.Winnerorder) == 0 && jf.Winnerorder != nil && len(jf.Winnerorder) > 0 {
@@ -1455,6 +1473,12 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo, vc *RuleCore) {
 						return
 					}
 					text := qu.ObjToString(v.Value)
+					if v.Field == "bidamount" || v.Field == "budget" {
+						if strings.Contains(qu.ObjToString(v.SourceValue), "费率") {
+							j.Result[in.Field][k].IsTrue = false
+							continue
+						}
+					}
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
 					}
@@ -1825,7 +1849,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if len(j.Winnerorder) > 0 { //候选人信息
 			for i, v := range j.Winnerorder {
 				if v["price"] != nil {
-					j.Winnerorder[i]["price"] = clear.ObjToMoney([]interface{}{v["price"], ""}, j.SpiderCode)[0]				}
+					tmpPrice := clear.ObjToMoney([]interface{}{v["price"], ""}, j.SpiderCode, j.IsClearnMoney)
+					if tmpPrice[len(tmpPrice)-1].(bool) {
+						j.Winnerorder[i]["price"] = tmpPrice[0]
+					} else {
+						delete(j.Winnerorder[i], "price")
+					}
+				}
 			}
 			tmp["winnerorder"] = j.Winnerorder
 		}
@@ -1839,12 +1869,9 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					if v.Score > -1 {
 						ffield[v.Field] = v.Value
 						if tmp[v.Field] == nil {
-							if v.Field == "budget" || v.Field == "bidamount" {
-								if fv, ok := v.Value.(float64); ok && fv > 100 && fv < 50000000000 {
-									tmp[v.Field] = v.Value
-								}
-							} else {
+							if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue {
 								tmp[v.Field] = v.Value
+								break
 							}
 						}
 						break
@@ -1946,7 +1973,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 						kvtext.WriteString(jv_k)
 						kvtext.WriteString(":")
 						kvtext.WriteString(jv_vv.Value)
-						kvtext.WriteString(" ")
+						kvtext.WriteString("\n")
 					}
 				}
 			}
@@ -1975,21 +2002,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			tmp["projectname"] = j.Title
 		}
 		tmp["repeat"] = 0
+		if ju.Ffield {
+			if len(ffield) > 0 {
+				tmp["ffield"] = ffield
+			}
+		}
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
-				/*	if len(e.SiteFields) <= 0 {
-						//for field, _ := range e.Fields {
-						//	if tmp[field] == nil &&  {
-						//		tmp[field] = "" //覆盖之前版本数据
-						//	}
-						//}
-					} else {
-						//for field, _ := range e.SiteFields {
-						//	if tmp[field] == nil &&{
-						//		tmp[field] = "" //覆盖之前版本数据
-						//	}
-						//}
-					}*/
 				tmparr := []map[string]interface{}{
 					map[string]interface{}{
 						"_id": qu.StringTOBsonId(_id),
@@ -2018,19 +2037,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 		} else { //测试结果
 			delete(tmp, "_id")
-			//auxinfo := auxInfo(j)
-			//auxinfof := auxInfo(jf)
-			//if len(auxinfo) > 0 {
-			//	tmp["fieldall"] = auxinfo
-			//}
-			//if len(auxinfof) > 0 {
-			//	tmp["fieldallf"] = auxinfof
-			//}
-			if ju.Ffield {
-				if len(ffield) > 0 {
-					tmp["ffield"] = ffield
-				}
-			}
 			delete(tmp, "fieldall")
 			if len(j.BlockPackage) > 0 { //分包详情
 				if len(j.BlockPackage) > 10 {
@@ -2410,7 +2416,17 @@ func resetWinnerorder(j *ju.Job) {
 	} else if len(bidamounts) > 0 {
 		j.Result["bidamount"] = append(j.Result["bidamount"], bidamounts...)
 	}
-	
+	if j.Result["winner"] == nil && len(j.Winnerorder) > 0 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
+		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
+		j.Result["winner"] = winners
+		if j.Winnerorder[0]["price"] != nil {
+			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
+			if tmpPrice[len(tmpPrice)-1].(bool) {
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5, IsTrue: true})
+			}
+			j.Result["bidamount"] = bidamounts
+		}
+	}
 }
 func RemoveReplicaSliceString(slc []string) []string {
 	result := make([]string, 0)

+ 1 - 1
src/jy/extract/extractudp.go

@@ -161,7 +161,7 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				//	log.Debug(index, qu.BsonIdToSId(v["_id"]), "//去除含敏感词数据")
 				//	continue
 				//}
-				if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时开标记录
+				if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" || "a_hbszbtbggfwpt_kbjl" == qu.ObjToString(v["spidercode"]) { //临时开标记录
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}

+ 19 - 18
src/jy/extract/score_jsondata.go

@@ -24,7 +24,7 @@ var endOfPunctuationClrear = regexp.MustCompile("[,,.。??;;]+$")
 var keysClrear = regexp.MustCompile("(详见|公告|X|内文|某单位|某部|文件|\\*|暂无|?|\\?)")
 
 //jsondata清理
-func clearJd(jd *map[string]interface{}, e *ExtractTask, spiderCode,IsClearnMoney string) {
+func clearJd(jd *map[string]interface{}, e *ExtractTask, spiderCode string, isclearnMoney string) {
 	for k, v := range *jd {
 		if k == "buyer" || k == "winner" || k == "agency" || k == "projectcode" || k == "projectname" {
 			vstring := util2.ObjToString(v)
@@ -37,7 +37,7 @@ func clearJd(jd *map[string]interface{}, e *ExtractTask, spiderCode,IsClearnMone
 			cfn := e.ClearFn[k]
 			lockclear.Unlock()
 			if len(cfn) > 0 {
-				data := clear.DoClearFn(cfn, []interface{}{vstring, ""}, spiderCode,IsClearnMoney)
+				data := clear.DoClearFn(cfn, []interface{}{vstring, ""}, spiderCode, isclearnMoney)
 				lockclear.Lock()
 				if clear.AsyField[k] != nil || clear.SymField[k] != nil || clear.MesField[k] != nil {
 					vstring = clear.OtherClean(k, util2.ObjToString(data[0]))
@@ -47,7 +47,6 @@ func clearJd(jd *map[string]interface{}, e *ExtractTask, spiderCode,IsClearnMone
 			vstring = htmlclrear.ReplaceAllString(vstring, "")
 			vstring = endOfParenthesesClrear.ReplaceAllString(vstring, "")
 			vstring = endOfPunctuationClrear.ReplaceAllString(vstring, "")
-			vstring = keysClrear.ReplaceAllString(vstring, "")
 			if utf8.RuneCountInString(vstring) < 5 {
 				delete(*jd, k)
 				continue
@@ -85,7 +84,7 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				if len(cfn) == 0 {
 					continue
 				}
-				newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""}, j.SpiderCode,j.IsClearnMoney)
+				newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""}, j.SpiderCode, j.IsClearnMoney)
 				//if util2.IntAll(newNum[0]) != 0 {
 				extFields := make([]*util.ExtField, 0)
 				extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1, IsTrue: newNum[len(newNum)-1].(bool)})
@@ -105,7 +104,7 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				if bt, ok := (*j.Jsondata)[v].(float64); ok && bt > 0 {
 					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: bt, Score: 0.1})
 				} else {
-					newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""}, j.SpiderCode)
+					newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""}, j.SpiderCode, j.IsClearnMoney)
 					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
 				}
 				j.Result[v] = extFields
@@ -203,21 +202,23 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				if len(cfn) == 0 {
 					continue
 				}
-				newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""}, j.SpiderCode,j.IsClearnMoney)
-				//if util2.IntAll(newNum[0]) != 0 {
-				extFields := make([]*util.ExtField, 0)
-				if jdextweight > 1 {
-					if oneScore < 0 {
-						oneScore = 0.1
-					}
-					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: oneScore + 1, IsTrue: newNum[len(newNum)-1].(bool)})
-				} else {
-					if oneScore < 0 {
-						oneScore = 0.1
+				newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""}, j.SpiderCode, j.IsClearnMoney)
+				if newNum[len(newNum)-1].(bool) {
+					//if util2.IntAll(newNum[0]) != 0 {
+					extFields := make([]*util.ExtField, 0)
+					if jdextweight > 1 {
+						if oneScore < 0 {
+							oneScore = 0.1
+						}
+						extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: oneScore + 1, IsTrue: newNum[len(newNum)-1].(bool)})
+					} else {
+						if oneScore < 0 {
+							oneScore = 0.1
+						}
+						extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: oneScore, IsTrue: newNum[len(newNum)-1].(bool)})
 					}
-					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: oneScore, IsTrue: newNum[len(newNum)-1].(bool)})
+					j.Result[v] = append(j.Result[v], extFields...)
 				}
-				j.Result[v] = append(j.Result[v], extFields...)
 				continue
 			}
 

+ 29 - 11
src/jy/pretreated/analystep.go

@@ -14,25 +14,33 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|主要人员相关资料|投标文件格式|项目业绩|否决投标的?情况说明")
+var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|投标人业绩|主要人员相关资料|投标文件格式|唱标记录|项目业绩|否决投标的?情况说明")
+var hisReg2 = regexp.MustCompile("(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
+var formattext = regexp.MustCompile("(投标总价)([0-9,.万元]*)")
+var formattext2 = regexp.MustCompile("中标单价.*(中标总价.*)")
 
 func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con := job.Content
 	//全文的需要修复表格
 	con = RepairCon(con)
 	//格式化正文
+	con = hisReg.ReplaceAllString(con, "")
+	con = hisReg2.ReplaceAllString(con, "${2}")
+	con = formattext.ReplaceAllString(con, "${1}:${2}")
+	con = formattext2.ReplaceAllString(con, "${1}")
 	con = formatText(con, "all")
 	job.Content = con
 	//计算表格占比,返回表格数组、占比
-	tabs, ration := ComputeConRatio(con, 1)
-	if len(tabs) > 0 {
+	tabs, _ := ComputeConRatio(con, 1)
+	/*if len(tabs) > 0 {
 		newcon, newtabs, newration := FindBigText(con, ration, tabs)
 		if newcon != "" {
 			con = newcon
+			con = formatText(con, "all")
 			tabs = newtabs
 			ration = newration
 		}
-	}
+	}*/
 	job.BlockPackage = map[string]*util.BlockPackage{}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock, isSite, codeSite) //分块
 	if len(blockArrays) > 0 {                                                                  //有分块
@@ -66,13 +74,18 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
 			//log.Println(newCon)
-			if newCon!=""{
+			if newCon != "" {
 				job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
 			}
 			for i := 0; i < len(tabs); i++ {
-				//fmt.Println(tabs[i].Html())
+				blockTag := ""
+				if len(tabs[i].Nodes) > 0 {
+					if tabs[i].Nodes[0].PrevSibling != nil {
+						blockTag = tabs[i].Nodes[0].PrevSibling.Data
+					}
+				}
 				//添加标识:文本中有table
-				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
+				tabres := AnalyTableV2(tabs[i], job.Category, blockTag, con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
 				processTableResult(tabres, bl, job, isSite, codeSite)
 			}
 		} else {
@@ -83,11 +96,12 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 		//log.Println(bl.Text)
 		FindProjectCode(bl.Text, job) //匹配项目编号
 		if yjReg.MatchString(bl.Text) {
-			if strings.Index(bl.Text, "业绩")>1{
+			if strings.Index(bl.Text, "业绩") > 1 {
 				bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
 			}
 		}
 		//调用kv解析
+		bl.Text = formatText(bl.Text, "all")
 		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1, isSite, codeSite)
 		bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil, isSite, codeSite)
 		//新加 未分块table中未能解析到中标候选人,从正文中解析
@@ -106,7 +120,9 @@ func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite st
 	for _, tab := range tabs {
 		job.HasTable = 1
 		tmptag := ""
-		if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
+		if bl.Title != "" && len(bl.Title) < 20 {
+			tmptag = bl.Title
+		} else if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
 			tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)
 		}
 		//添加标识:文本中有table
@@ -290,8 +306,10 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 						wror = append(wror, v)
 						continue
 					} else if tmpWins[v["entname"].(string)] > 0 && tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] != nil {
-						if tmpWins[v["entname"].(string)]-1 >= 0 {
-							job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
+						if tmpWins[v["entname"].(string)]-1 >= 0 && len(job.Winnerorder) > 0 {
+							if len(job.Winnerorder) > (tmpWins[v["entname"].(string)] - 1) {
+								job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
+							}
 							continue
 						}
 					}

+ 24 - 10
src/jy/pretreated/winnerorder.go

@@ -1,6 +1,8 @@
 package pretreated
 
 import (
+	"jy/clear"
+	//"jy/clear"
 	"jy/util"
 	qutil "qfw/util"
 	"regexp"
@@ -27,14 +29,16 @@ var (
 	numberReg2        = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+")
 	thisNumberReg     = regexp.MustCompile("第" + numberReg.String())
 	winnerReg0        = regexp.MustCompile("(中标候选人第\\d名)")
-	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?((候|侯)选)?(入围|备选|成交|中(标|选))人?([((]成交[))])?((候|侯)选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
-	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|第[一二三四五六七八九十1-9]+(候|侯)选人)")
+	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
+	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|第[一二三四五六七八九十1-9]+(中标)?[候|侯]选人|中标候选人排名[:]\\d)")
 	//winnerReg2     = regexp.MustCompile("(第[一二三四五六七八九十1-9]+(候|侯)选人)")
 	winnerReg3     = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+名)")
-	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中(标|选)候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
+	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
 	winnerReg5     = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
 	winnerReg6     = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)")
 	winnerReg7     = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
+	winnerReg8     = regexp.MustCompile("(第[一二三四五六七八九十]中标候选人)[::]?\n(1)单位名称:(.*)\n(2)投标报价(含税):(.*)")
+	winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
 	colonEndReg    = regexp.MustCompile("[::]$")
 	toWarpReg      = regexp.MustCompile("[,。,;;]+")
 	findamountReg  = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+")
@@ -42,8 +46,8 @@ var (
 	companyWarpReg = regexp.MustCompile("(公司)(.+?[::])")
 	findCompanyReg = regexp.MustCompile("[^::]+公司")
 	colonSpaceReg  = regexp.MustCompile("[::]\\s+")
-	findCandidate  = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合(会|体))|工作室)")
-	findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合(会|体)|工作室)$)")
+	findCandidate  = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])|工作室)")
+	findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合[会|体]|工作室|有限司)$)")
 	clearSpace1    = regexp.MustCompile("([((][\\d一二三四五六七八九十][))][\\s\u3000\u2003\u00a0\\t]*|<[^>].+?>)")
 	clearSpace2    = regexp.MustCompile("</?[^>]+>")
 	offerReg       = regexp.MustCompile("(中标|磋商|投标|报|单|成交)总?(价|金额)")
@@ -55,10 +59,15 @@ var (
  *from 来源
  */
 func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool, codeSite string) []map[string]interface{} {
-	if clearSpace2.MatchString(text) {
-		return  []map[string]interface{}{}
+	if clearSpace2.MatchString(text){
+		text = TextAfterRemoveTable(text)
+	}
+	text = winnerRegclear.ReplaceAllString(text,"")
+	if nswinnertabletag.MatchString(text) {
+		return []map[string]interface{}{}
 	}
 	text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
+	text = winnerReg8.ReplaceAllString(text,"\n${1}:${2}\n中标金额:${3}\n")
 	/*
 		"_id" : ObjectId("5c2c6f60a5cb26b9b7b62cd8")
 
@@ -240,7 +249,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				val := wo.clear("中标单位", v)
 				if val != nil && utf8.RuneCountInString(qutil.ObjToString(val)) > 5 {
 					count++
-					object["entname"] = strings.TrimSpace(qutil.ObjToString(val))
+					object["entname"] = strings.TrimRight(strings.ReplaceAll(strings.TrimSpace(qutil.ObjToString(val)), "公司", "公司,"), ",")
 					object["sort"] = wo.toNumber(k, count)
 					object["sortstr"] = thisNumberReg.FindString(k)
 					object["type"] = i
@@ -257,9 +266,14 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				}
 				//找到了中标金额
 				if findOfferFlag && object["entname"] != nil {
-					val := wo.clear("中标金额", v)
+					val := wo.clear("中标金额", v+GetMoneyUnit(k, v))
 					if val != nil {
-						object["price"] = val
+						moneys := clear.ObjToMoney([]interface{}{val, ""})
+						if len(moneys) > 0 {
+							if vf, ok := moneys[0].(float64); ok &&  moneys[len(moneys)-1].(bool){
+								object["price"] = float64(vf)
+							}
+						}
 					}
 					winners = append(winners, object)
 					object = map[string]interface{}{}

+ 17 - 15
src/res/fieldscore.json

@@ -209,12 +209,12 @@
             },
             {
                 "describe": "包含负分不再展示",
-                "regstr": "(详见|提出|面向|[^实]施工[^程]|获得|test|认定|一批|项目$|详细请?见?正文)",
+                "regstr": "([^实]施工[^程]|项目$|详细请?见?正文)",
                 "score": -50
             },
             {
-                "describe": "一个字或者两个字不再显示",
-                "regstr": "^[\\s]*[\\u4e00-\\u9fa5]{1,2}[\\s]*$",
+                "describe": "黑名单",
+                "regstr": "(^.{0,4}$|T及分公司|大厦[0-9]+室|东侧路面拓宽|、技术研发中心|钢芯铝绞线)",
                 "score": -50
             }
         ],
@@ -223,16 +223,8 @@
                 "describe": "[gt,lte,score]",
                 "range": [
                     0,
-                    3,
-                    -20
-                ]
-            },
-            {
-                "describe": "[gt,lte,score]",
-                "range": [
-                    3,
                     4,
-                    -1
+                    -20
                 ]
             },
             {
@@ -258,7 +250,7 @@
         "positivewords": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(集团|公司|学校|中心|家具城|门诊|\\[大中小\\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$",
+                "regstr": ".{2,100}(集团|公司|学校|中心|家具城|门诊|[大中小]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行|联合体)$",
                 "score": 3
             },{
                 "describe": "关键词",
@@ -269,13 +261,18 @@
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(我公司|定标|通知|异议|要求|代理|详细|test|意见|原因|具体|结果|负责|付款|附件|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交[^通]|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|注:|\\\\d[\\\\s]{0,10}(\\\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})\n",
+                "regstr": "((中标|候选|成交|代表|投标|代理)人|我公司|定标|通知|异议|法院|要求|代理|详细|test|意见|原因|具体|结果|负责|付款|附件|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交[^通]|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|注:|\\d[\\s]{0,10}(\\d|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100}\\n)",
                 "score": -20
             },
 			{
                 "describe": "非结尾",
                 "regstr": ".*[^集团|公司|学校|中心|家具城|门诊|\\[大中小\\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行|处]$",
                 "score": -5
+            },
+            {
+                "describe": "黑名单",
+                "regstr": "(^.{0,4}$|[((]或印鉴[))]|(多家|没有)中标商|违法违规行|(中标人|供应商)名称|合同包合计|楼青年公寓装修工程|评标委员会|(投标人|供应商)被人民法院|符合初步评审标准|评标结果公示|供应商评审申报|与营业执照|保管员签字|评审小组名单|单位负责人|^[xX]+$|候选人数量|微型企业且所)",
+                "score": -50
             }
         ],
         "length": [
@@ -339,6 +336,11 @@
                 "describe": "乱码特殊符号",
                 "regstr": "[±??¨êí¤ì×üàóμˉ÷°úéè《》-]",
                 "score": -20
+            },
+            {
+                "describe": "黑名单",
+                "regstr": "(集中代理采购|竞争性谈判|“组组通”工程|提交一份公司|公司组织的|[((]章[))]$|^.{0,4}$)",
+                "score": -50
             }
         ],
         "length": [
@@ -771,7 +773,7 @@
             {
                 "describe": "-结束没有抽取完",
                 "regstr": "-$",
-                "score": -1
+                "score": -1.5
             },
             {
                 "describe": "开始到结束连续字符-0.5",