Эх сурвалжийг харах

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

apple 5 жил өмнө
parent
commit
e378c85129

+ 1 - 1
src/config.json

@@ -11,7 +11,7 @@
     "elasticPoolSize": 10,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
     "fieldsfind": false,
     "qualityaudit": false,
     "saveblock": false,

+ 12 - 5
src/jy/clear/tonumber.go

@@ -3,9 +3,11 @@ package clear
 
 import (
 	"fmt"
+	"qfw/util"
 	"regexp"
 	"strconv"
 	"strings"
+	"unicode/utf8"
 )
 
 var contentUnit *regexp.Regexp //全文检索单位:万元
@@ -80,6 +82,11 @@ func ObjToFloat(data []interface{}) []interface{} {
 func ObjToMoney(data []interface{}) []interface{} {
 	isfindUnit := true
 	tmpstr :=(data)[0]
+	if utf8.RuneCountInString(util.ObjToString(tmpstr)) > 30 {
+		(data)[0] = 0
+		data = append(data, false)
+		return data
+	}
 	ret := capitalMoney(data)[0]
 	if ret.(float64) < float64(10000) || ret.(float64) > float64(50000000000) {
 		ret2, b := numMoney(data)
@@ -89,9 +96,9 @@ func ObjToMoney(data []interface{}) []interface{} {
 		}
 	}
 	f, _ := strconv.ParseFloat(strconv.FormatFloat(ret.(float64), 'f', 4, 64), 64)
-	if f < 1 {
-		f = 0
-	}
+	//if f < 1 {
+	//	f = 0
+	//}
 	//若果金额小于50,全文检索单位:万
 	if f < 50 && f > 0 && isfindUnit {
 		rep := contentUnit.FindAllStringIndex(fmt.Sprint(data[1]), -1)
@@ -99,17 +106,17 @@ func ObjToMoney(data []interface{}) []interface{} {
 			f = f * 10000
 		}
 	}
+	data[0] = f
 	if f == 0 && !moneyUnitRegBool.MatchString(fmt.Sprint(tmpstr)) {
 		data = append(data, false)
 		return data
 	}
 	data = append(data, true)
-	data[0] = f
 	return data
 }
 //["中标金额","成交金额","合同金额","中标价","成交价","成交价格","中标(成交)金额","投标报价","中标标价","成交结果"]
 //["0元","零元","0.0万元","¥0元"]
-var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金额|中标价|成交价|成交价格|中标\(成交\)金额|投标报价|中标标价|成交结果)?[::\s]?(0元|零元|0.0万元|¥0元|0)+[\s]?$`)
+var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金额|中标价|成交价|成交价格|中标\(成交\)金额|投标报价|中标标价|成交结果)?[::\s]?(0|零|0.0|¥0)+(0|\.)*[\s]?(万|元|){0,2}[\s]?((人民币))?$`)
 //数字金额转换
 func numMoney(data []interface{}) ([]interface{}, bool) {
 	tmp := fmt.Sprintf("%f",data[0])

+ 18 - 8
src/jy/extract/extpackage.go

@@ -129,13 +129,19 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 				if pkg != nil {
 					sonJobResult["origin"] = pkg.Origin
 					sonJobResult["text"] = pkg.Text
-					sonJobResult["budget"] = pkg.Budget
-					sonJobResult["bidamount"] = pkg.Bidamount
+					if pkg.IsTrueBudget{
+						sonJobResult["budget"] = pkg.Budget
+					}
+					if pkg.IsTrueBidamount{
+						sonJobResult["bidamount"] = pkg.Bidamount
+					}
 					if pkg.Winner == "" && len(j.Winnerorder) > 0 {
 						if sonJobResult["winnerorder"] == nil {
 							sonJobResult["winnerorder"] = j.Winnerorder
-							if sonJobResult["bidamount"].(float64) <= 0 {
-								sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
+							if  sonJobResult["bidamount"] == nil ||sonJobResult["bidamount"].(float64) <= 0 {
+								if j.Winnerorder[0]["price"] != nil{
+									sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
+								}
 							}
 							if sonJobResult["winner"] == "" {
 								sonJobResult["winner"] = j.Winnerorder[0]["entname"]
@@ -143,7 +149,9 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 						}
 					} else {
 						if len(j.Winnerorder) > 0 {
-							sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
+							if j.Winnerorder[0]["price"] !=nil{
+								sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
+							}
 							sonJobResult["winner"] = j.Winnerorder[0]["entname"]
 						}
 						if len(pkg.WinnerOrder) > 0 {
@@ -158,7 +166,7 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 
 					sonJobResult["type"] = pkg.Type
 					if len(tmpkeys) == 1 {
-						if qu.Float64All(sonJobResult["budget"]) == 0 {
+						if qu.Float64All(sonJobResult["budget"]) == 0 && pkg.IsTrueBudget {
 							for _, bv := range j.Block {
 								kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite)
 								kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite)
@@ -271,8 +279,10 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 					if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
 						sonJobResult["winner"] = firstWinnerOrder["entname"]
 					}
-					if qu.Float64All(sonJobResult["bidamount"]) == 0 || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
-						sonJobResult["bidamount"] = firstWinnerOrder["price"]
+					if (qu.Float64All(sonJobResult["bidamount"]) == 0 && pkg.IsTrueBidamount)|| (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
+						if firstWinnerOrder["price"] != nil{
+							sonJobResult["bidamount"] = firstWinnerOrder["price"]
+						}
 					}
 				}
 				//log.Println(pkName, sonJobResult)

+ 128 - 46
src/jy/extract/extract.go

@@ -11,6 +11,7 @@ import (
 	qu "qfw/util"
 	"qfw/util/redis"
 	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -26,12 +27,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 100                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 100                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -318,6 +319,9 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
 		RuleBlock: e.RuleBlock,
 	}
+	if (j.Jsondata != nil||(*j.Jsondata) != nil)  && (*j.Jsondata)["jsoncontent"]!= nil{
+		delete((*j.Jsondata),"jsoncontent")
+	}
 	if isextFile {
 		jf = &ju.Job{
 			SourceMid:  qu.BsonIdToSId(doc["_id"]),
@@ -335,22 +339,31 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 			RuleBlock:  e.RuleBlock,
 			IsFile:     isextFile,
 		}
+		if (jf.Jsondata != nil||(*jf.Jsondata) != nil)  && (*jf.Jsondata)["jsoncontent"]!= nil{
+			delete((*jf.Jsondata),"jsoncontent")
+		}
 	}
-	//是否配置站点
 	codeSite := j.SpiderCode
-	exp, isSite := e.Luacodes.Load(codeSite)
+	//是否启用站点
+	if value, ok := e.SiteMerge.Load(codeSite); ok {
+		isSite = value.(bool)
+	}
 	if isSite {
-		if exp.(map[string]interface{})["e.SiteClearFn"] != nil {
-			e.SiteClearFn = exp.(map[string]interface{})["e.SiteClearFn"].(map[string][]string)
-		}
-		if exp.(map[string]interface{})["e.SiteTag"] != nil {
-			e.SiteTag = exp.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)
-		}
-		if exp.(map[string]interface{})["e.SiteRuleCores"] != nil {
-			e.SiteRuleCores = exp.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)
-		}
-		if exp.(map[string]interface{})["e.SiteRuleBacks"] != nil {
-			e.SiteRuleBacks = exp.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo)
+		//是否配置站点
+		exp, isSite := e.Luacodes.Load(codeSite)
+		if isSite {
+			if exp.(map[string]interface{})["e.SiteClearFn"] != nil {
+				e.SiteClearFn = exp.(map[string]interface{})["e.SiteClearFn"].(map[string][]string)
+			}
+			if exp.(map[string]interface{})["e.SiteTag"] != nil {
+				e.SiteTag = exp.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)
+			}
+			if exp.(map[string]interface{})["e.SiteRuleCores"] != nil {
+				e.SiteRuleCores = exp.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)
+			}
+			if exp.(map[string]interface{})["e.SiteRuleBacks"] != nil {
+				e.SiteRuleBacks = exp.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo)
+			}
 		}
 	}
 	qu.Try(func() {
@@ -587,6 +600,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
 						j.Result[key][i].IsTrue = true
 					} else {
+						j.Result[key][i].Value = data[0]
 						continue
 					}
 				}
@@ -654,6 +668,7 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
 				if value, ok := e.FileFields.Load(vc.Field); ok && qu.IntAllDef(value, 1) > 0 {
 					ExtRuleCore(tmp, e, vc, j, isSite)
 				}
+
 				// log.Debug("抽取-规则", tmp)
 
 				//抽取-后置规则
@@ -757,9 +772,6 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 					for k, v := range []*ju.JobKv{bp.ColonKV, bp.SpaceKV, bp.TableKV} {
 						if k == 0 {
 							tp = "colon"
-							//							for _, vv := range v.Kvs {
-							//								qu.Debug(vv.Key, vv.Value)
-							//							}
 						} else if k == 1 {
 							tp = "space"
 						} else if k == 2 {
@@ -962,14 +974,20 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
 								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content})
-								j.BlockPackage[k].Budget = qu.Float64All(data[0])
+								if data[len(data)-1].(bool){
+									j.BlockPackage[k].Budget = qu.Float64All(data[0])
+									j.BlockPackage[k].IsTrueBudget = true
+								}
 								break
 							} else if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
 								lock.Lock()
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
 								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content})
-								j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
+								if data[len(data)-1].(bool){
+									j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
+									j.BlockPackage[k].IsTrueBidamount = true
+								}
 								break
 							} else if in.Field == "winner" {
 								if j.BlockPackage[k].Winner == "" {
@@ -1008,7 +1026,10 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 						cfn := e.ClearFn[in.Field]
 						lock.Unlock()
 						data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
-						j.BlockPackage[k].Budget = qu.Float64All(data[0])
+						if data[len(data)-1].(bool){
+							j.BlockPackage[k].Budget = qu.Float64All(data[0])
+							j.BlockPackage[k].IsTrueBudget = true
+						}
 						break
 					}
 					if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
@@ -1016,7 +1037,10 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 						cfn := e.ClearFn[in.Field]
 						lock.Unlock()
 						data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
-						j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
+						if data[len(data)-1].(bool){
+							j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
+							j.BlockPackage[k].IsTrueBidamount = true
+						}
 						break
 					} else if in.Field == "bidstatus" {
 						if j.BlockPackage[k].BidStatus == "" {
@@ -1047,13 +1071,14 @@ func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]ma
 				kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
 					"code":        "winnerorder",
 					"field":       vc.Field,
-					"ruletext":    "中标候选人",
+					"ruletext":    "中标候选人_"+ v["sortstr"].(string),
 					"extfrom":     v["sortstr"],
 					"sourcevalue": v["price"],
 					"value":       v["price"],
 					"type":        "winnerorder",
 					"matchtype":   "winnerorder",
 				})
+				return kvmap, false
 			}
 			//候选人中标金额
 			if price := j.Winnerorder[0]["price"]; price != nil {
@@ -1126,6 +1151,14 @@ func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kv
 				//				}
 			} else if k == 1 {
 				tp = "space"
+				//				for _, vv := range v.Kvs {
+				//					qu.Debug("space-kvs:", vv.Key, vv.Value)
+				//				}
+				//				for kkk, vv := range v.KvTags {
+				//					for _, vvv := range vv {
+				//						qu.Debug("space-tags", kkk, vvv.Key, vvv.Value)
+				//					}
+				//				}
 			} else if k == 2 {
 				tp = "table"
 				//				for _, vv := range v.Kvs {
@@ -1563,6 +1596,7 @@ type FieldValue struct {
 	Value interface{}
 	Count int
 }
+var clearWinnerReg =regexp.MustCompile("名称|施工|拟定供应商名称|:|:")
 
 //分析抽取结果并保存
 func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
@@ -1583,7 +1617,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		for _, val := range result {
 			for _, v := range val { //取第一个非负数,项目名称除外
 				//存0是否有效
-				if v.Field == "bidamount" || v.Field == "budget" && v.IsTrue {
+				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue {
 					tmp[v.Field] = v.Value
 					break
 				}
@@ -1598,22 +1632,70 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		}
 		if len(j.PackageInfo) > 0 { //分包信息
 			tmp["package"] = j.PackageInfo
+			//包预算,中标金额合并大于抽取就覆盖
 			var tmpBidamount, tmpBudget float64
+			//s_winner逗号分隔拼接,分包中标人
+			var tmpstr,savewinner []string
+			//按包排序
+			for b, v := range j.PackageInfo {
+				if v["winner"]!= nil && v["winner"]!=""{
+					tmpstr = append(tmpstr,b)
+				}
+			}
 			//包预算,中标金额合并大于抽取就覆盖
-			for _, v := range j.PackageInfo {
-				if v["budget"] != nil {
-					tmpBudget += qu.Float64All(v["budget"])
+			if len(j.PackageInfo) >1{
+				//包数大于1累加
+				for _, v := range j.PackageInfo {
+					if v["budget"] != nil {
+						tmpBudget += qu.Float64All(v["budget"])
+					}
+					if v["bidamount"] != nil {
+						tmpBidamount += qu.Float64All(v["bidamount"])
+					}
+				}
+				if qu.Float64All(tmp["budget"]) < tmpBudget {
+					tmp["budget"] = tmpBudget
+				}
+				if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
+					tmp["bidamount"] = tmpBidamount
+				}
+			}else {
+				//包数等于1,tmp没有值取包里的值
+				if tmp["budget"] == nil || tmp["budget"] == 0 {
+					for _,v := range j.PackageInfo {
+						if v["budget"] != nil {
+							tmp["budget"] = v["budget"]
+						}
+					}
+
 				}
-				if v["bidamount"] != nil {
-					tmpBidamount += qu.Float64All(v["bidamount"])
+				if tmp["bidamount"] == nil || tmp["bidamount"] == 0 {
+					for _,v := range j.PackageInfo {
+						if v["bidamount"] != nil {
+							tmp["bidamount"] = v["bidamount"]
+						}
+					}
 				}
 			}
-			if qu.Float64All(tmp["budget"]) < tmpBudget {
-				tmp["budget"] = tmpBudget
+			//s_winner逗号分隔拼接,分包中标人
+			sort.Strings(tmpstr)
+			for _,v := range tmpstr{
+				svvvv := qu.ObjToString(j.PackageInfo[v]["winner"])
+				savevvv := clearWinnerReg.ReplaceAllString(svvvv, "")
+				if savevvv == ""{
+					continue
+				}
+				savewinner = append(savewinner,savevvv)
 			}
-			if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
-				tmp["bidamount"] = tmpBidamount
+			if (savewinner  == nil || len(savewinner)==0) && tmp["winner"]!=nil{
+				tmp["s_winner"] = tmp["winner"]
+			}else if savewinner != nil{
+				tmp["s_winner"] = strings.Join(savewinner,",")
 			}
+
+		}else if tmp["winner"]!= nil && tmp["winner"]!=""{
+			//没有分包取winner
+			tmp["s_winner"] = tmp["winner"]
 		}
 		if len(j.Winnerorder) > 0 { //候选人信息
 			tmp["winnerorder"] = j.Winnerorder
@@ -1721,6 +1803,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				}
 			}
 		}
+		//		fmt.Println("=============抽取结果================")
+		//		for k, v := range tmp {
+		//			qu.Debug(k, "---", v)
+		//		}
 		//tmp["extract_content"] = j.Content
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
@@ -1765,13 +1851,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				e.RWMutex.Unlock()
 			}
 		} else { //测试结果
-			//			fmt.Println("=============抽取结果================")
-			//			for k, v := range tmp {
-			//				qu.Debug(k, "---", v)
-			//			}
-			//			for field, _ := range e.Fields {
-			//				qu.Debug(field, "---", tmp[field])
-			//			}
 			delete(tmp, "_id")
 			if len(j.BlockPackage) > 0 { //分包详情
 				bs, _ := json.Marshal(j.BlockPackage)
@@ -1967,7 +2046,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库
@@ -2059,7 +2138,10 @@ func resetWinnerorder(j *ju.Job) {
 	if maxlen > 0 {
 		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
 		if j.Winnerorder[0]["price"] != nil {
-			bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["price"], Score: 0.5})
+			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"],""})
+			if tmpPrice[len(tmpPrice)-1].(bool){
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder",SourceValue:j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 0.5})
+			}
 		}
 	}
 	if j.Result["winner"] == nil && len(winners) > 0 {

+ 5 - 5
src/jy/extract/score_jsondata.go

@@ -12,7 +12,7 @@ import (
 )
 
 func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
-	if len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0 {
+	if len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0 || j.Site =="中国政府采购网"{
 		return j.Result
 	}
 	jdextweight := util2.IntAll((*j.Jsondata)["extweight"])
@@ -30,13 +30,13 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				cfn := e.ClearFn[v]
 				lockclear.Unlock()
 				newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""})
-				if util2.IntAll(newNum[0]) != 0 {
+				//if util2.IntAll(newNum[0]) != 0 {
 					extFields := make([]*util.ExtField, 0)
 					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1,IsTrue:newNum[len(newNum)-1].(bool)})
 					j.Result[v] = extFields
 					//AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
 					//AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
-				}
+				//}
 				continue
 			}
 			vv := strings.TrimSpace(util2.ObjToString((*j.Jsondata)[v]))
@@ -126,7 +126,7 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				cfn := e.ClearFn[v]
 				lockclear.Unlock()
 				newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""})
-				if util2.IntAll(newNum[0]) != 0 {
+				//if util2.IntAll(newNum[0]) != 0 {
 					extFields := make([]*util.ExtField, 0)
 					if jdextweight >1{
 						if oneScore < 0{
@@ -142,7 +142,7 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 					j.Result[v] = append(j.Result[v], extFields...)
 					//AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
 					//AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
-				}
+				//}
 				continue
 			}
 

+ 25 - 8
src/jy/pretreated/analystep.go

@@ -37,7 +37,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			if len([]rune(bl.Text)) > 80 {
 				bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock, isSite, codeSite)
 				for _, bl_bl := range bl.Block {
-					processTableInBlock(bl_bl, job,  isSite, codeSite)
+					processTableInBlock(bl_bl, job, isSite, codeSite)
 				}
 			}
 			FindProjectCode(bl.Text, job) //匹配项目编号
@@ -96,7 +96,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 						tmpw := 0
 						if bl.TableKV != nil {
 							for kk, v := range bl.TableKV.KvTags {
-								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk){
+								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk) {
 									for _, vv := range v {
 										if winbs[vv.Value] {
 											continue
@@ -115,7 +115,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 						}
 						if bl.SpaceKV != nil {
 							for kk, v := range bl.SpaceKV.KvTags {
-								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk){
+								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk) {
 									for _, vv := range v {
 										if winbs[vv.Value] {
 											continue
@@ -134,7 +134,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 						}
 						if bl.ColonKV != nil {
 							for kk, v := range bl.ColonKV.KvTags {
-								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk){
+								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk) {
 									for _, vv := range v {
 										if winbs[vv.Value] {
 											continue
@@ -159,7 +159,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	}
 }
 
-func processTableInBlock(bl *util.Block, job *util.Job,  isSite bool, codeSite string) {
+func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite string) {
 	//块中再查找表格(块,处理完把值赋到块)
 	tabs, _ := ComputeConRatio(bl.Text, 2)
 	for _, tab := range tabs {
@@ -283,19 +283,28 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 			tablePackage[v] = blockPackage
 		}
 	}
+	tmpWins := make(map[string]int)
+	for _, v := range job.Winnerorder {
+		if v["entname"] != nil && v["entname"] != "" {
+			tmpWins[v["entname"].(string)] = v["sort"].(int)
+		}
+	}
 	//处理中标人排序
 	wror := []map[string]interface{}{}
-	for i, v := range tabres.WinnerOrder {
+	for _, v := range tabres.WinnerOrder {
 		entName, _ := v["entname"].(string)
 		v["entname"] = winnerOrderEntity.clear("中标单位", entName)
 		if price, ok := v["price"].(string); ok {
 			v["price"] = winnerOrderEntity.clear("中标金额", price)
 		}
-		v["type"] = i
+		v["type"] = len(job.Winnerorder)
+		if tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] == nil {
+			continue
+		}
 		wror = append(wror, v)
 	}
 	if len(wror) > 0 {
-		job.Winnerorder = wror
+		job.Winnerorder = append(job.Winnerorder, wror...)
 	}
 	//分包
 	if len(tablePackage) > 0 {
@@ -342,6 +351,14 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 			if tv.WinnerOrder != nil && len(tv.WinnerOrder) > 0 {
 				bv.WinnerOrder = tv.WinnerOrder
 			}
+			if tv.Bidamount > 0 && bv.Bidamount == 0 {
+				bv.Bidamount = tv.Bidamount
+				bv.IsTrueBidamount = tv.IsTrueBidamount
+			}
+			if tv.Budget >0 && bv.Bidamount == 0{
+				bv.Budget = tv.Budget
+				bv.IsTrueBudget = tv.IsTrueBudget
+			}
 		}
 		for k, v := range pkgMap {
 			job.BlockPackage[k] = v

+ 54 - 5
src/jy/pretreated/analytable.go

@@ -2,7 +2,10 @@ package pretreated
 
 import (
 	"fmt"
+	"jy/clear"
 	u "jy/util"
+
+	//"log"
 	qutil "qfw/util"
 	"regexp"
 	"strings"
@@ -45,7 +48,7 @@ var (
 	FindVal_1  = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
 	FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
 	//判断分包前排除
-	excludeKey = regexp.MustCompile("(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
+	excludeKey = regexp.MustCompile("(涉及包号|分包数量|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
 	//-------------
 
 	cut = u.NewCut()
@@ -70,6 +73,8 @@ var (
 	filter_zbje_k = regexp.MustCompile("(中标|成交|总|拦标|合同|供[应货]商|报)[\\p{Han}、]{0,6}(价|额|[大小]写|[万亿]?元).{0,4}$")
 	//简单判断金额
 	filter_zbje_jd = regexp.MustCompile("^[^(售|保证)]{0,4}(价|额).{0,4}$")
+	//预算金额
+	filter_ysje_jd = regexp.MustCompile("预算")
 	//且排队以下字眼的key
 	filter_zbje_kn = regexp.MustCompile("得分|打分|时间|业绩|须知|分|要求$")
 	//且值包含以下字眼
@@ -83,7 +88,7 @@ var (
 	//简单判断
 	filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
 	//且不包含以下字眼
-	filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址")
+	filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金")
 	//且值包含以下字眼
 	filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
 	//且值包含以下字眼
@@ -96,7 +101,7 @@ var (
 	projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
 	MhSpilt        = regexp.MustCompile("[::]")
 	//识别采购单位联系人、联系电话、代理机构联系人、联系电话
-	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式)([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
+	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
 	ContactInfoMustReg  = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
 	ContactType         = map[string]*regexp.Regexp{
 		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
@@ -194,7 +199,11 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
 			if filter_tag_zb.MatchString(tabletag) || filter_tag_zb.MatchString(tabledesc) {
 				//u.Debug(v1, k, "-----", filter_zbdw_jd.MatchString(k), filter_zbdw_v.MatchString(v1))
 				if filter_zbje_jd.MatchString(k) && !filter_zbje_kn.MatchString(k) && filter_zbje_v.MatchString(v1) {
-					kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
+					if filter_ysje_jd.MatchString(k) {
+						kvTags["预算金额"] = append(kvTags["预算金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
+					} else if !filter_zbdw_kn.MatchString(k) {
+						kvTags["中标金额"] = append(kvTags["中标金额"], &u.Tag{Key: k, Value: v1, Weight: -100})
+					}
 
 				} /*else if filter_zbdw_jd.MatchString(k) && filter_zbdw_v.MatchString(v1) {
 					k1 = append(k1, "中标单位")
@@ -324,6 +333,8 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 		winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
 	}
 	checkKey := map[int]bool{}
+	//tmpBidmout := []string{}
+	//log.Println(tmpBidmout)
 	for kn, k := range as.Keys { //遍历table.SortKV.value为数组的key
 		v := as.Map[k]
 		if vm, ok := v.([]map[string]interface{}); ok && k == NullTxtBid {
@@ -2119,6 +2130,44 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 			//}
 			for _, vcgdw := range k1tags {
 				if vcgdw.Value == "采购单位" {
+				} else if vcgdw.Value == "预算" && len(val) == len(index) {
+					for bi, bv := range index {
+						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget == 0 {
+							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
+							if len(moneys) > 0 {
+								if vf, ok := moneys[0].(float64); ok {
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget = vf
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBudget = moneys[len(moneys)-1].(bool)
+								} else if vi, ok := moneys[0].(int); ok {
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget = float64(vi)
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBudget = moneys[len(moneys)-1].(bool)
+								}
+							}
+						}
+					}
+				} else if vcgdw.Value == "中标金额" && len(val) == len(index){
+					for bi, bv := range index {
+						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount == 0 {
+							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
+							if len(moneys) > 0 {
+								if vf, ok := moneys[0].(float64); ok {
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount = vf
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBidamount = moneys[len(moneys)-1].(bool)
+								} else if vi, ok := moneys[0].(int); ok {
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount = float64(vi)
+									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBidamount = moneys[len(moneys)-1].(bool)
+								}
+							}
+						}
+					}
+				} else if vcgdw.Value == "中标单位" {
+					for _, bv := range index {
+						if tn.BlockPackage.Map[bv].(*u.BlockPackage).WinnerOrder == nil || len(tn.BlockPackage.Map[bv].(*u.BlockPackage).WinnerOrder) == 0 {
+							continue
+						} else {
+							tn.BlockPackage.Map[bv].(*u.BlockPackage).Winner = qutil.ObjToString(tn.BlockPackage.Map[bv].(*u.BlockPackage).WinnerOrder[0]["entname"])
+						}
+					}
 				}
 			}
 		} else if val, bvs := v1.(string); bvs && len(index) == 1 {
@@ -3271,7 +3320,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 7 - 0
src/jy/pretreated/tablev2.go

@@ -494,6 +494,9 @@ func (tr *TR) AddTD(td *TD) {
 		tr.TDs[len(tr.TDs)-1].RightNode = td
 	}
 	**/
+	if tr==nil|| tr.TDs == nil{
+		return
+	}
 	td.ColPos = len(tr.TDs)
 	tr.TDs = append(tr.TDs, td)
 }
@@ -846,6 +849,7 @@ func CheckHeader(txt string) (res, must bool, stype, reg, repl string) {
 con 文本
 strtype 1全文 2块文本
 **/
+var hisReg =regexp.MustCompile("类似业绩|历史业绩")
 func ComputeConRatio(con string, strtype int) (tabs []*goquery.Selection, ratio float32) {
 	defer qutil.Catch()
 	doc, _ := goquery.NewDocumentFromReader(strings.NewReader(con))
@@ -863,6 +867,9 @@ func ComputeConRatio(con string, strtype int) (tabs []*goquery.Selection, ratio
 				}
 			}
 			if !b {
+				if hisReg.MatchString(tmpt.First().Text()){
+					continue
+				}
 				tabs = append(tabs, tmpt)
 			}
 		}

+ 16 - 14
src/jy/util/article.go

@@ -128,20 +128,22 @@ type Segment struct {
 
 //包
 type BlockPackage struct {
-	Origin      string                   //包的原始值
-	Name        string                   //标段(包)名称
-	Text        string                   //包文 (包对应的正文)
-	Budget      float64                  //标段(包)预算
-	Winner      string                   //标段(包)中标单位
-	Bidamount   float64                  //标段(包)中标价
-	Index       string                   //序号 (转换后编号,只有数字或字母)
-	Type        string                   //类型 (匹配后面的标段、包之类的词)
-	ColonKV     *JobKv                   //冒号kv (分出的对应的KV值)
-	TableKV     *JobKv                   //table kv (分出的对应的KV值)
-	SpaceKV     *JobKv                   //空格 kv (分出的对应的KV值)
-	BidStatus   string                   //成交状态
-	WinnerOrder []map[string]interface{} //中标人排序
-	Accuracy    bool                     //包里面抽取字段的准确性,如果能打上块标签的话,就不用中标候选人中的值覆盖包里面的值
+	Origin          string                   //包的原始值
+	Name            string                   //标段(包)名称
+	Text            string                   //包文 (包对应的正文)
+	Budget          float64                  //标段(包)预算
+	IsTrueBudget    bool                     //标段(包)预算0是否有效
+	Winner          string                   //标段(包)中标单位
+	Bidamount       float64                  //标段(包)中标价
+	IsTrueBidamount bool                     //标段(包)中标价 0是否有效
+	Index           string                   //序号 (转换后编号,只有数字或字母)
+	Type            string                   //类型 (匹配后面的标段、包之类的词)
+	ColonKV         *JobKv                   //冒号kv (分出的对应的KV值)
+	TableKV         *JobKv                   //table kv (分出的对应的KV值)
+	SpaceKV         *JobKv                   //空格 kv (分出的对应的KV值)
+	BidStatus       string                   //成交状态
+	WinnerOrder     []map[string]interface{} //中标人排序
+	Accuracy        bool                     //包里面抽取字段的准确性,如果能打上块标签的话,就不用中标候选人中的值覆盖包里面的值
 }
 
 //联系人

+ 1 - 1
src/main.go

@@ -44,7 +44,7 @@ func init() {
 	//	util.ElasticClient = eClient
 	util.ElasticClientIndex = qu.ObjToString(util.Config["elasticsearch_index"])
 	util.ElasticClientType = qu.ObjToString(util.Config["elasticsearch_type"])
-	util.ElasticClientDB = qu.ObjToString(util.Config["winner_enterprise"])
+	util.ElasticClientDB = qu.ObjToString(util.Config["elasticsearch_db"])
 	//}
 }
 

+ 1 - 1
src/main_test.go

@@ -28,7 +28,7 @@ func Test_han(t *testing.T) {
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_dev32")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
-	extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "5df6e6a6e9d1f601e494b749", "1", "mxs_v1", "mxs_v1")
+	extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "5df59ee3e9d1f601e46fc3f9", "1", "mxs_v1", "mxs_v1")
 	//extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5e17e00e85a9271abf0860a6", "1", "mxs_v1", "mxs_v1")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)

+ 34 - 9
src/res/fieldscore.json

@@ -77,7 +77,7 @@
             },
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(的|招标|公示|公告|谈判|公开|通知|采购文件|交易中心)$",
+                "regstr": ".{2,100}(的|招标|公示|公告|谈判|公开|通知|采购文件|交易中心|\\d#)$",
                 "score": -5
             },
             {
@@ -147,7 +147,7 @@
         "positivewords": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(委员会|分校|办公室|学校|幼儿园|动物园|管理站|图书馆|殡仪馆|博物馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|初中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司)$",
+                "regstr": ".{2,100}(委员会|中心|分校|办公室|学校|幼儿园|动物园|管理站|图书馆|殡仪馆|博物馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|初中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站)$",
                 "score": 3
             }
         ],
@@ -207,7 +207,7 @@
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(附件|否决原因|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
+                "regstr": "(定标|通知|异议|要求|详细|意见|原因|具体|结果|负责|付款|附件|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
                 "score": -10
             },
 			{
@@ -259,8 +259,8 @@
         ],
         "negativewords": [
             {
-                "describe": "不在展示",
-                "regstr": "(详见公告)",
+                "describe": "包含负分",
+                "regstr": "(详见公告|原因|未知|收费|标注|负责人)",
                 "score": -10
             }
         ],
@@ -291,6 +291,18 @@
             }
         ]
     },
+    "agencyaddr": {
+        "type": "string",
+        "positivewords": [
+        ],
+        "negativewords": [
+            {
+                "describe": "包含负分",
+                "regstr": "(详见|公告|原因|未知|收费|标注)",
+                "score": -10
+            }
+        ]
+    },
     "buyerperson": {
         "type": "string",
         "positivewords": [
@@ -389,7 +401,7 @@
             },
 			{
                 "describe": "包含负分",
-                "regstr": "(详见公告)",
+                "regstr": "(详见|公告)",
                 "score": -10
             }
         ],
@@ -402,7 +414,7 @@
                     -10
                 ]
             },
-			 {
+			{
                 "describe": "[gt,∞,score]",
                 "range": [
                     90,
@@ -501,7 +513,20 @@
             }
         ]
     },
-    
+    "contractcode": {
+        "type": "string",
+        "positivewords": [
+        ],
+        "negativewords": [
+            {
+                "describe": "包含负分",
+                "regstr": "(null|无|[//,,。、::“”‘’_\"])",
+                "score": -10
+            }
+        ],
+        "length": [
+        ]
+    },
 	"projectcode": {
         "type": "string",
         "positivewords": [
@@ -524,7 +549,7 @@
         "negativewords": [
             {
                 "describe": "以什么开始的减分",
-                "regstr": "^[【|-]",
+                "regstr": "^[-]",
                 "score": -1
             },
             {

+ 1 - 0
udp_winner/config.json

@@ -5,6 +5,7 @@
   "udpport": "127.0.0.1:12311",
   "mgoinit": "192.168.3.207:27081",
   "mgodb_bidding": "qfw",
+  "mgodb_mgoinit_c": "bidding",
   "mgodb_enterprise": "enterprise",
   "mgodb_enterprise_c": "qyxy",
   "mgourl2": "192.168.3.207:27092",

+ 20 - 9
udp_winner/main.go

@@ -14,6 +14,7 @@ import (
 	"net"
 	elastic "qfw/common/src/qfw/util/elastic"
 	"qfw/util"
+	"regexp"
 
 	"sort"
 	"strings"
@@ -29,6 +30,9 @@ var
 	Addrs                                 = make(map[string]interface{}, 0) //省市县
 	udpclient                             mu.UdpClient                      //udp对象
 	ElasticClientIndex, ElasticClientType string
+	Reg_xing                              = regexp.MustCompile(`\*{1,}`)
+	Reg_person                            = regexp.MustCompile("[\u4E00-\u9FA5\\s]+")
+	Reg_tel                               = regexp.MustCompile(`^[0-9\-\s]*$`)
 )
 /**
 新增
@@ -40,7 +44,7 @@ func init() {
 	log.Println(Config)
 	Fields = []string{"_id", "contact", "partners", "business_scope", "company_address", "capital",
 		"establish_date", "legal_person", "company_type", "district", "city", "province", "area_code", "credit_no",
-		"company_name", "history_name", "topscopeclass",  "wechat_accounts", "alias","website","report_websites"}
+		"company_name", "history_name", "topscopeclass", "wechat_accounts", "alias", "website", "report_websites"}
 	var err error
 	//mongo init
 	SourceClient, err = mongo.NewClient(options.Client().ApplyURI("mongodb://" + Config["mgoinit"]).SetMaxPoolSize(20))
@@ -86,7 +90,7 @@ func init() {
 	log.Println(len(Addrs))
 	//es.NewClient(es.SetURL(addrs...), es.SetMaxRetries(2), es.SetSniff(false))
 	//es init
-	elastic.InitElasticSize(Config["elasticsearch"],10)
+	elastic.InitElasticSize(Config["elasticsearch"], 10)
 	//esConn := elastic.GetEsConn()
 	//defer elastic.DestoryEsConn(esConn)
 	//log.Println(esConn.Index().Index(Config["elasticsearch_index"]).Type(Config["elasticsearch_type"]).Id("123").BodyJson(map[string]interface{}{"testname":"六盘水市钟山开发区亿农科贸有限公司"}).Refresh(true).Do())
@@ -170,7 +174,7 @@ func task(mapinfo *map[string]interface{}) {
 	// topscopeclass项目类型-industry行业类型&&topscopeclass联系人项目类型
 	// (area地区-province省份 city城市-city城市 district区县-district区县)
 	// winneraddr-company_address企业地址
-	cursor, err := SourceClient.Database(Config["mgodb_bidding"]).Collection("bidding").Find(context.TODO(), bson.M{
+	cursor, err := SourceClient.Database(Config["mgodb_bidding"]).Collection(Config["mgodb_mgoinit_c"]).Find(context.TODO(), bson.M{
 		"_id": bson.M{
 			"$gte": GId,
 			"$lte": LtId,
@@ -192,7 +196,7 @@ func task(mapinfo *map[string]interface{}) {
 			defer rdb.Close()
 			if reply, err := redis.String(rdb.Do("GET", tmp["winner"])); err != nil {
 				//redis不存在存到临时表,定时任务处理
-				FClient.Database(Config["mgodb_extract_kf"]).Collection("tmp_winner_qyk").InsertOne(context.TODO(), tmp)
+				FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_new").InsertOne(context.TODO(), tmp)
 				//log.Println(tmp, err)
 				continue
 			} else {
@@ -251,9 +255,9 @@ func task(mapinfo *map[string]interface{}) {
 				sort.Strings(tmpTopscopeclass)
 				oldTmp["industry"] = tmpTopscopeclass
 				esId := oldTmp["_id"].(primitive.ObjectID).Hex()
-				//联系方式合并
-				if tmp["winnerperson"] == nil || tmp["winnerperson"] == "" {
-					oldTmp["updatatime"] =time.Now().Unix()
+				//更新行业类型
+				if tmp["winnerperson"] == nil || tmp["winnerperson"] == "" || Reg_xing.MatchString(util.ObjToString(tmp["winnerperson"])) {
+					oldTmp["updatatime"] = time.Now().Unix()
 					//mongo更新
 					FClient.Database(Config["mgodb_extract_kf"]).Collection(Config["mgo_qyk_c"]).
 						UpdateOne(context.TODO(), bson.M{"_id": oldTmp["_id"]}, bson.M{"$set": oldTmp})
@@ -265,10 +269,17 @@ func task(mapinfo *map[string]interface{}) {
 					//log.Println( err2,err3)
 					continue
 				}
+				//联系方式合并
 				var tmpperson, winnertel string
 				tmpperson = tmp["winnerperson"].(string)
-				if tmp["winnertel"] == nil {
+				if tmp["winnertel"] == nil || tmp["winnertel"]==""{
 					winnertel = ""
+				}else {
+					if Reg_xing.MatchString(util.ObjToString(tmp["winnertel"]))||!Reg_tel.MatchString(util.ObjToString(tmp["winnertel"])){
+						winnertel = ""
+					}else {
+						winnertel = util.ObjToString(tmp["winnertel"])
+					}
 				}
 				contactMaps := make([]interface{}, 0)
 				if oldTmp["contact"] == nil {
@@ -306,7 +317,7 @@ func task(mapinfo *map[string]interface{}) {
 				}
 				oldTmp["contact"] = contactMaps
 				//mongo更新
-				oldTmp["updatatime"] =time.Now().Unix()
+				oldTmp["updatatime"] = time.Now().Unix()
 				FClient.Database(Config["mgodb_extract_kf"]).Collection(Config["mgo_qyk_c"]).
 					UpdateOne(context.TODO(), bson.M{"_id": oldTmp["_id"]}, bson.M{"$set": oldTmp})
 				//es更新

+ 5 - 7
udp_winner/timedTask.go

@@ -21,7 +21,7 @@ func TimedTask() {
 	t2 := time.NewTimer(time.Second * 5)
 	for range t2.C {
 		tmpLast := map[string]interface{}{}
-		if err := FClient.Database(Config["mgodb_extract_kf"]).Collection("tmp_winner_qyk").FindOne(context.TODO(), bson.M{}, options.FindOne().SetSort(bson.M{"_id": -1})).Decode(&tmpLast); err != nil {
+		if err := FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_new").FindOne(context.TODO(), bson.M{}, options.FindOne().SetSort(bson.M{"_id": -1})).Decode(&tmpLast); err != nil {
 			//临时表无数据
 			log.Println("临时表无数据:", err)
 			t2.Reset(time.Minute * 5)
@@ -29,7 +29,7 @@ func TimedTask() {
 		} else {
 			//临时表有数据
 			log.Println("临时表有数据:", tmpLast)
-			cursor, err := FClient.Database(Config["mgodb_extract_kf"]).Collection("tmp_winner_qyk").Find(context.TODO(), bson.M{
+			cursor, err := FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_new").Find(context.TODO(), bson.M{
 				"_id": bson.M{
 					"$lte": tmpLast["_id"],
 				},
@@ -48,8 +48,8 @@ func TimedTask() {
 					if r != nil {
 						//log.Println(r)
 						//匹配不到原始库,存入异常表删除临时表
-						FClient.Database(Config["mgodb_extract_kf"]).Collection("err_winner_qyk").InsertOne(context.TODO(), tmp)
-						FClient.Database(Config["mgodb_extract_kf"]).Collection("tmp_winner_qyk").DeleteOne(context.TODO(), tmp)
+						FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_err").InsertOne(context.TODO(), tmp)
+						FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_new").DeleteOne(context.TODO(), tmp)
 						continue
 					} else {
 						//log.Println(123)
@@ -84,13 +84,11 @@ func TimedTask() {
 							if err != nil {
 								log.Println("annual_reports err:", err)
 							}
-							//log.Println(2, string(bytes))
 							phonetmp := make([]map[string]interface{}, 0)
 							err = json.Unmarshal(bytes, &phonetmp)
 							if err != nil {
 								log.Println("Unmarshal err:", err)
 							}
-							//log.Println(44, err)
 							for _, vv := range phonetmp {
 								if vv["company_phone"] != nil {
 									if vv["company_phone"] == "" {
@@ -204,7 +202,7 @@ func TimedTask() {
 									log.Println("save es err :", tmp["_id"], savetmp["_id"], err)
 								} else {
 									//删除临时表
-									FClient.Database(Config["mgodb_extract_kf"]).Collection("tmp_winner_qyk").DeleteOne(context.TODO(), tmp)
+									FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_new").DeleteOne(context.TODO(), tmp)
 								}
 							}
 						} else {