Parcourir la source

备份-修改~物业逻辑 , 城市抽取

zhengkun il y a 3 ans
Parent
commit
edfdb65895

+ 61 - 37
src/jy/extract/extract.go

@@ -33,7 +33,7 @@ var (
 	ClearTaskList map[string]*ClearTask                  //清理任务列表
 	saveLimit     = 100                                  //抽取日志批量保存
 	PageSize      = 5000                                 //查询分页
-	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"review_experts":1,"purchasing":1}`
+	Fields        = `{"jyfb_data":1,"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"review_experts":1,"purchasing":1}`
 	//Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"new_attach_text":1,"createtime":1,"currency":1,"id":1,"company_email":1,"buyerclass":1,"tagname":1,"company_phone":1,"appid":1,"industry":1,"projectscope":1,"item":1,"s_subscopeclass":1,"matchkey":1,"jybxhref":1,"legal_person":1,"matchtype":1,"review_experts":1,"purchasing":1}`
 	Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 	spidercode = map[string]bool{
@@ -2120,17 +2120,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					}
 				}
 				if qu.Float64All(tmp["budget"]) < tmpBudget {
-					if tmpBudget == qu.Float64All(tmp["budget"])*float64(10000) &&
-						tmpBudget>=1000000000 && qu.Float64All(tmp["budget"])>0{
-						tmp["is_dif_ratioMoney"] = true
-					}else {
-						fieldSource["budget"] = map[string]interface{}{
-							"ext_type":"",
-							"ext_from":"package",
-						}
-						tmp["budget"] = tmpBudget
+					fieldSource["budget"] = map[string]interface{}{
+						"ext_type":"",
+						"ext_from":"package",
 					}
+					tmp["budget"] = tmpBudget
 				}
+
 				if qu.Float64All(tmp["agencyfee"]) < tmpAgencyfee {
 					fieldSource["agencyfee"] = map[string]interface{}{
 						"ext_type":"",
@@ -2138,6 +2134,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					}
 					tmp["agencyfee"] = tmpAgencyfee
 				}
+
 				if qu.Float64All(tmp["bidamount"]) > 0 && qu.Float64All(tmp["budget"]) > 0 && (qu.Float64All(tmp["bidamount"])/10 > qu.Float64All(tmp["budget"])) {
 					fieldSource["bidamount"] = map[string]interface{}{
 						"ext_type":"",
@@ -2145,17 +2142,31 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					}
 					tmp["bidamount"] = tmpBidamount
 				} else if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
-					if tmpBidamount == qu.Float64All(tmp["bidamount"])*float64(10000) &&
-						tmpBidamount>=1000000000 && qu.Float64All(tmp["bidamount"])>0{
-						tmp["is_dif_ratioMoney"] = true
-					}else {
-						fieldSource["bidamount"] = map[string]interface{}{
-							"ext_type":"",
-							"ext_from":"package",
-						}
-						tmp["bidamount"] = tmpBidamount
+					fieldSource["bidamount"] = map[string]interface{}{
+						"ext_type": "",
+						"ext_from": "package",
 					}
+					tmp["bidamount"] = tmpBidamount
 				}
+
+				//if qu.Float64All(tmp["bidamount"]) > 0 && qu.Float64All(tmp["budget"]) > 0 && (qu.Float64All(tmp["bidamount"])/10 > qu.Float64All(tmp["budget"])) {
+				//	fieldSource["bidamount"] = map[string]interface{}{
+				//		"ext_type":"",
+				//		"ext_from":"package",
+				//	}
+				//	tmp["bidamount"] = tmpBidamount
+				//} else if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
+				//	if tmpBidamount == qu.Float64All(tmp["bidamount"])*float64(10000) &&
+				//		tmpBidamount>=1000000000 && qu.Float64All(tmp["bidamount"])>0{
+				//		tmp["is_dif_ratioMoney"] = true
+				//	}else {
+				//		fieldSource["bidamount"] = map[string]interface{}{
+				//			"ext_type":"",
+				//			"ext_from":"package",
+				//		}
+				//		tmp["bidamount"] = tmpBidamount
+				//	}
+				//}
 			} else {
 				//包数等于1,tmp没有值取包里的值
 				if tmp["budget"] == nil || tmp["budget"] == 0 {
@@ -2412,7 +2423,12 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 		}*/
 		//检查字段
-		tmp = checkFields(tmp)
+		tmp = checkFields(tmp,*j.Data)
+
+
+
+
+
 		if tmp["projectname"] == nil || tmp["projectname"] == "" {
 			tmp["projectname"] = j.Title
 		}
@@ -2477,7 +2493,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 }
 
 //检查字段-
-func checkFields(tmp map[string]interface{}) map[string]interface{} {
+func checkFields(tmp map[string]interface{},j_data map[string]interface{}) map[string]interface{} {
 	delete(tmp, "contenthtml")
 	delete(tmp, "detail")
 	tmp["repeat"] = 0
@@ -2595,8 +2611,6 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 		delete(tmp, "bidway")
 	}
 
-
-
 	//折扣系数
 	discount := dealWithDiscountBid(tmp)
 	if discount >0.0 {
@@ -2607,28 +2621,38 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	delete(tmp, "biddiscount_up")
 	delete(tmp, "biddiscount_down")
 
-
-
 	//临时
-	docstarttime := qu.Int64All(tmp["docstarttime"])
-	docendtime := qu.Int64All(tmp["docendtime"])
-	timeLayout := "2006-01-02 15:04:05"
+	//bidstarttime := qu.Int64All(tmp["bidstarttime"])
+	//docendtime := qu.Int64All(tmp["docendtime"])
+	//timeLayout := "2006-01-02 15:04:05"
 
-	if docstarttime>0 {
-		time_1 := time.Unix(docstarttime, 0).Format(timeLayout) //设置时间戳 使用模板格式化为日期字符串
-		tmp["docstarttime"] = time_1
-	}
-	if docendtime>0 {
-		time_2 := time.Unix(docendtime, 0).Format(timeLayout) //设置时间戳 使用模板格式化为日期字符串
-		tmp["docendtime"] = time_2
-	}
+	//if bidstarttime>0 {
+	//	time_1 := time.Unix(bidstarttime, 0).Format(timeLayout) //设置时间戳 使用模板格式化为日期字符串
+	//	tmp["bidstarttime"] = time_1
+	//}
+	//if docendtime>0 {
+	//	time_2 := time.Unix(docendtime, 0).Format(timeLayout) //设置时间戳 使用模板格式化为日期字符串
+	//	tmp["docendtime"] = time_2
+	//}
 
 	jyhref:= fmt.Sprintf(JYUrl, qu.CommonEncodeArticle("content", qu.BsonIdToSId(tmp["_id"])))
 	tmp["jytest_href"] = jyhref
 
+
+	//检查剑鱼发布-爬虫
+	jyfb_data := *qu.ObjToMap(j_data["jyfb_data"])
+	if jyfb_data!=nil {
+		for k,v := range jyfb_data{
+			if k=="area" {
+				delete(tmp,"district")
+			}
+			tmp[k] = v
+		}
+	}
 	return tmp
 }
-//处理折扣系数
+
+//处理折扣系数-
 func dealWithDiscountBid(tmp map[string]interface{}) float64 {
 	biddiscount := qu.Float64All(tmp["biddiscount"])
 	biddiscount_up := qu.Float64All(tmp["biddiscount_up"])

+ 8 - 1
src/jy/extract/extractInit.go

@@ -2,6 +2,7 @@
 package extract
 
 import (
+	"github.com/sensitive"
 	db "jy/mongodbutil"
 	ju "jy/util"
 	qu "qfw/util"
@@ -128,7 +129,8 @@ type ExtractTask struct {
 	AreaCodeMap map[string]*AreaCode //区号
 
 	XjbtCityArr          []map[string]interface{}  		//新疆兵团相关数据
-
+	SensitiveFullCity  		 *sensitive.Filter
+	SensitiveSimCity		 *sensitive.Filter
 	InfoType []map[string]interface {
 	}
 
@@ -1140,11 +1142,13 @@ func (e *ExtractTask) InitCityInfo() {
 			qc_city := qu.ObjToString(vcity["city"])
 			jc_city := qu.ObjToString(vcity["brief_city"])
 			e.Trie_Full_City.AddWords(qc_city) //加入市全称Trie(k:杭州市)
+			e.SensitiveFullCity.AddWord(qc_city)
 			c := &City{}
 			c.Name = qc_city //市全称:杭州市
 			if jc_city != "" {
 				c.Brief = jc_city                 //市简称:杭州
 				e.Trie_Sim_City.AddWords(c.Brief) //加入市简称Trie(k:杭州)
+				e.SensitiveSimCity.AddWord(c.Brief)
 				e.CityMap[qc_city] = c.Brief      //杭州市:杭州
 				e.CityBriefMap[c.Brief] = c       //杭州:市信息{}
 				e.CityFullMap[qc_city] = c        //杭州市:市信息{}
@@ -1277,6 +1281,9 @@ func (e *ExtractTask) InitVar() {
 		e.XjbtCityArr = make([]map[string]interface{},0)
 	}
 
+	//敏感词-筛选
+	e.SensitiveFullCity = sensitive.New()
+	e.SensitiveSimCity = sensitive.New()
 	//初始化map
 	if e.SiteCityMap == nil {
 		e.SiteCityMap = make(map[string]*SiteCity)

+ 21 - 21
src/jy/extract/extractudp.go

@@ -64,31 +64,31 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					}
 
 					//新版本控制抽取
-					ExtractByUdp(sid, eid, ra)
-					log.Debug("抽取完成udp通知抽取id段-控制台",udpinfo, sid, "~", eid)
-					Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
+					//ExtractByUdp(sid, eid, ra)
+					//log.Debug("抽取完成udp通知抽取id段-控制台",udpinfo, sid, "~", eid)
+					//Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
 
 
 
 					//适配重采抽取-发送udp-必须替换
-					//go Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
-					//log.Debug("udp通知抽取id段", sid, " ", eid)
-					//ExtractByUdp(sid, eid, ra)
-					//for _, m := range nextNodes {
-					//	by, _ := json.Marshal(map[string]interface{}{
-					//		"gtid":  sid,
-					//		"lteid": eid,
-					//		"stype": qu.ObjToString(m["stype"]),
-					//	})
-					//	err := Udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
-					//		IP:   net.ParseIP(m["addr"].(string)),
-					//		Port: qu.IntAll(m["port"]),
-					//	})
-					//	if err != nil {
-					//		log.Debug(err)
-					//	}
-					//}
-					//log.Debug("udp通知抽取完成,eid=", eid)
+					go Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
+					log.Debug("udp通知抽取id段", sid, " ", eid)
+					ExtractByUdp(sid, eid, ra)
+					for _, m := range nextNodes {
+						by, _ := json.Marshal(map[string]interface{}{
+							"gtid":  sid,
+							"lteid": eid,
+							"stype": qu.ObjToString(m["stype"]),
+						})
+						err := Udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
+							IP:   net.ParseIP(m["addr"].(string)),
+							Port: qu.IntAll(m["port"]),
+						})
+						if err != nil {
+							log.Debug(err)
+						}
+					}
+					log.Debug("udp通知抽取完成,eid=", eid)
 				}
 			}
 		}

+ 43 - 1
src/jy/extract/newextractcity.go

@@ -16,6 +16,8 @@ var AgencyReg = []*regexp.Regexp{
 }
 
 var xjbtReg *regexp.Regexp = regexp.MustCompile("^(新疆生产建设兵团|新疆兵团)")
+var sensitiveReg  = regexp.MustCompile("(上一[条篇]|下一[条篇])[::].*")
+
 
 //抽取city
 func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp *map[string]interface{}, id string) {
@@ -182,6 +184,16 @@ func (e *ExtractTask) NewExtractCity(j *ju.Job, resulttmp *map[string]interface{
 			(*resulttmp)["district"] = d
 		}
 	}
+
+
+	//如果-仅有省份-敏感词-校验核对方法
+	if arearesult!="全国" && cityresult=="" {
+		sensitive_city := e.SensitiveCityData(qu.ObjToString((*j.Data)["detail"]),arearesult)
+		if sensitive_city !="" {
+			(*resulttmp)["city"] = sensitive_city
+			(*resulttmp)["is_sensitive"] = 1
+		}
+	}
 }
 
 //jsondata中抽取城市
@@ -966,8 +978,38 @@ func(e *ExtractTask) CheckingXjbtCity(buyer string) (new_a,new_c,new_d string,ok
 
 
 
+//敏感词识别~~~
+func(e *ExtractTask) SensitiveCityData(detail string, area string) string{
+	//全程组
+	detail = sensitiveReg.ReplaceAllString(detail,"")
+	sim_arr := e.SensitiveSimCity.FindAll(detail)
+	full_arr := e.SensitiveFullCity.FindAll(detail)
+	if len(full_arr)<3 {
+		for _,v := range full_arr{
+			if cityMap := e.CityFullMap[v]; cityMap != nil {
+				if cityMap.P.Brief==area {
+					return cityMap.Name
+				}
+			}
+		}
+	}
+	if len(sim_arr)<3 {
+		for _,v := range sim_arr{
+			if cityMap := e.CityBriefMap[v]; cityMap != nil {
+				if cityMap.P.Brief==area {
+					return cityMap.Name
+				}
+			}
+		}
+	}
+	//if len(new_city)==1 { //仅有一个有效城市
+	//	for _,v := range new_city{
+	//		return v
+	//	}
+	//}
 
-
+	return ""
+}
 
 
 

+ 1 - 1
src/jy/extract/score.go

@@ -121,7 +121,7 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 
 		if field == "budget" || field == "bidamount" {
 			for tmpsindex, tmpsvalue := range tmps {
-				if (strings.Contains(tmpsvalue.RuleText, "总")||strings.Contains(tmpsvalue.Code, "总价")) && tmpsvalue.RuleText!="总价(元)" &&(tmpsvalue.Type == "colon"||tmpsvalue.Type == "table" ) {
+				if ((strings.Contains(tmpsvalue.RuleText, "总") && !strings.Contains(tmpsvalue.RuleText, "项目总投资"))||strings.Contains(tmpsvalue.Code, "总价")) && tmpsvalue.RuleText!="总价(元)" &&(tmpsvalue.Type == "colon"||tmpsvalue.Type == "table" ) {
 					tmps[tmpsindex].Score += 1
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: field + `value结果含总字+1`, Code: field, Value: tmpsvalue.Value, Score: 1})
 				}else if strings.Contains(qu.ObjToString(tmpsvalue.SourceValue), "㎡"){

+ 11 - 8
src/jy/pretreated/analystep.go

@@ -16,12 +16,14 @@ import (
 )
 //投标文件格式 技术评分明细表
 var yjReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|主要人员相关资料|唱标记录|标的名称|否决投标的?情况说明")
-var blTextReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|主要人员相关资料|唱标记录|否决投标的?情况说明")
+var blTextReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|业绩奖项|主要人员相关资料|唱标记录|否决投标的?情况说明")
 var unblTextReg *regexp.Regexp = regexp.MustCompile("(项目业绩案例|类似项目业绩)")
 
 
+
+var preConReg1 = regexp.MustCompile("(第[一二三1-3]中标候选人)\n(业绩奖项)")
 var hisReg = regexp.MustCompile("(开标记录|开标记录及投标报价|类似业绩|历史业绩|填报项目业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</td>)")
-var hisReg2 = regexp.MustCompile("(开标记录|(中标候选人)?业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(.*原因及其依据.*[::]?)?[\n]?.*?[\n]?(</tr>|</table>|</td>)")
+var hisReg2 = regexp.MustCompile("(开标记录|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(.*原因及其依据.*[::]?)?[\n]?.*?[\n]?(</tr>|</table>|</td>)")
 
 var formattext = regexp.MustCompile("(投标总价)([0-9,.万元]*)")
 var formattext2 = regexp.MustCompile("中标单价.*(中标总价.*)")
@@ -38,6 +40,8 @@ var formattext6  = regexp.MustCompile("(投标报价[::][0-9.]+)\n([万元]+)"
 
 var formattext10  = regexp.MustCompile(".*包号\n.*\n.*供应商名称\n.*\n.*(成交|中标)金额\n(.*单位\n)?" +
 	"<td.*>(.*)\n(<td>\n)?.*\n<td.*>[\n]?(.*公司)\n.*\n<td.*>([0-9.,,万元]+)\n")
+var formattext11  = regexp.MustCompile("(项目预算)\n(第[一1](包|标段)[::])([0-9.万元人民币]+)\n" +
+	"(第[二2](包|标段)[::])([0-9.万元人民币]+)\n")
 
 //特殊文本提取-计算
 var formattext50  = regexp.MustCompile("主要标的数量[::]([0-9.]+)\n主要标的单价[::]([0-9.]+)\n合同金额[::].*\n履约期限")
@@ -115,7 +119,8 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con := job.Content
 	//全文的需要修复表格
 	con = RepairCon(con)
-	//格式化正文 -断点
+	//格式化正文
+	//con = preConReg1.ReplaceAllString(con, "${1}${2}")
 	con = hisReg.ReplaceAllString(con, "${2}")
 	con = hisReg2.ReplaceAllString(con, "${4}")
 
@@ -123,14 +128,12 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con = formattext2.ReplaceAllString(con, "${1}")
 	con = formattext3.ReplaceAllString(con,"")
 	con = formattext4.ReplaceAllString(con,"\n${1}:${2}\n")
-
-
 	//特殊格式-影响分包候选人抽取-替换
 	con = formattext5.ReplaceAllString(con,"中标金额:${2}\n")
 	con = formattext6.ReplaceAllString(con,"$1$2")
-
 	//改变特殊结构
 	con = formattext10.ReplaceAllString(con,"\n分包$3\n中标单位:$5 中标金额:$6\n")
+	con = formattext11.ReplaceAllString(con,"${1}\n${2}\n预算金额:${4}\n${5}\n预算金额:${7}\n${8}\n")
 
 
 	//指定爬虫-特殊结构-计算抽取
@@ -229,14 +232,14 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			//从正文里面找分包
 			job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
 		}
+
 		bl.Text = HtmlToText(con)
-		FindProjectCode(bl.Text, job) //匹配项目编号
+		FindProjectCode(bl.Text, job) //匹配项目编号 ~~ 清洗无效信息文本
 		if blTextReg.MatchString(bl.Text) && !unblTextReg.MatchString(bl.Text)   {
 			if strings.Index(bl.Text, "业绩") > 1 {
 				bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
 			}
 		}
-
 		//特殊-指定处理-结构转化formattext100
 		if formattext100.MatchString(bl.Text) {
 			new_str := formattext100.FindString(bl.Text)

+ 32 - 3
src/jy/pretreated/analytable.go

@@ -1604,7 +1604,7 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 		//开始抽取
 		than_once_1,than_once_2 := false,false
 		for tr_index, tr := range table.TRs {
-			if tr_index==0 {
+			if tr_index==7 {
 				//fmt.Println("调试")
 			}
 			bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
@@ -2228,6 +2228,25 @@ func (tn *Table) GetTdByRCNo(row, col int) *TD {
 	return nil
 }
 
+//预算标签-不一定为分包
+func isUnRealBudgetBp(tnv []*u.Tag)bool {
+	if len(tnv)!=2 {
+		return false
+	}
+	key_1,key_2:= tnv[0].Key,tnv[1].Key
+	value_1,value_2 := tnv[0].Value,tnv[1].Value
+	if value_1!=value_2 {
+		if strings.Contains(key_1,"项目总投资")&&strings.Contains(key_2,"项目投资") {
+			return true
+		}
+		if strings.Contains(key_2,"项目总投资")&&strings.Contains(key_1,"项目投资") {
+			return true
+		}
+	}
+	return false
+}
+
+
 //判断表格是否是分包
 func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool, index []string) {
 	pac := 0             //包的数量
@@ -2295,10 +2314,12 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 				if tn.BlockPackage.Map[v] == nil {
 					kv := u.NewJobKv()
 					for tnk, tnv := range tn.StandKV {
-						if nk >= len(tnv) {
+						if nk >= len(tnv){
 							continue
 						} else if len(index) == len(tnv) {
-							//特殊处理-
+							if tnk=="预算" && isUnRealBudgetBp(tnv) {
+								continue
+							}
 							if tnk=="预算"&& codeSite=="ha_zmdszfcgw_cgxx" && len(tnv)>1{
 								isEqErr,budget_v := false,""
 								for bk,bv:=range tnv {
@@ -2745,6 +2766,11 @@ func (tn *Table) assemblePackage(k1, v1, key string, isSite bool, codeSite strin
 						bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
 					}
 				}
+			}else if k3 == "中标单位" && bp.Winner =="" {
+				new_str := qutil.ObjToString(winnerOrderEntity.clear("中标单位", v3[0].Value))
+				if new_str != "" && WinnerOrderStr.MatchString(new_str) {
+					bp.Winner = new_str
+				}
 			}
 		}
 	}
@@ -3105,6 +3131,9 @@ L:
 					//myContactType
 
 					myContactType := indexMap[index]
+					if td_kv.Key=="地址" && strings.Contains(td_kv.PrevLine,"采购代理机构") {
+						myContactType = "代理机构"
+					}
 					//qutil.Debug(indexMap, index, myContactType)
 					if myContactType == "" && len(indexMap) == 1 {
 						_, onlyContactType := u.FirstKeyValueInMap(indexMap)

+ 5 - 7
src/main.go

@@ -76,14 +76,12 @@ func main() {
 
 //验证规则
 func testMain()  {
-	text :=`
-第一成交候选人: 上海英立视数字科技有限公司;投标报含税总价为483000.00元,含税增值税税率为13%,工期为合同签订后7天内完成项目建设、试运行及项目验收。
-第二成交候选人: 广州函粤网络科技有限公司;投标报含税总价为487000.00元,含税增值税税率为13%,工期为合同签订后7天内完成项目建设、试运行及项目验收。
 
-`
-	var winnerReg_3 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五](中标|成交)候选人)[::\\s]+([\u4E00-\u9FA5]{4,20}公司)[,,;]?(报价|投标报价|投标含税总价|投标报含税总价)[为]?[::]?([0-9\\.\\s万元]+)")
-	if 	winnerReg_3.MatchString(text) {
-		text = winnerReg_3.ReplaceAllString(text,"\n${1}:${4} 中标金额:${6}\n")
+	text :=``
+	var formattext11  = regexp.MustCompile("(项目预算)\n(第[一1](包|标段)[::])([0-9.万元人民币]+)\n" +
+		"(第[二2](包|标段)[::])([0-9.万元人民币]+)\n")
+	if 	formattext11.MatchString(text) {
+		text = formattext11.ReplaceAllString(text,"${1}\n${2}\n预算金额:${4}\n${5}\n预算金额:${7}\n${8}\n")
 		log.Debug(text)
 	}else {
 		log.Debug("不匹配")

+ 2 - 33
src/res/tablev1.json

@@ -6,26 +6,7 @@
 		"^(包号|联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
 		"(专家|评委|[打得]分|附件材料)[a-zA-Z0-9]*$__M",
 		"(基本需求.{0,15}|.*联系方式|总计|包组|证书名称|证书编号|合同包|排序|二级建造师|项目负责人及资格证书编号)__M",
-		"品牌",
-        "姓名",
-		"起讫桩号",
-		"服务期",
-		"限价",
-		"邮编",
-		"面积",
-		"组织形式",
-		"招标方式",
-		"修建宽度",
-        "类别",
-        "备注",
-		"合计",
-        "电话",
-        "评审",
-		"原因",
-		"行业",
-		"价格",
-		"注册资金",
-		"印刷服务",
+		"(品牌|姓名|起讫桩号|服务期|限价|邮编|面积|组织形式|招标方式|修建宽度|类别|备注|合计|电话|评审|原因|行业|价格|注册资金|印刷服务|业绩奖项)__",
 		"[\\d]+标段$__M",
 		"(\\W{2,10}(名称|参数[及]?要求))$__M"
 	],
@@ -35,19 +16,7 @@
 		"^.{0,6}[打得评总](分)$__",
 		"(中标|磋商|投标|报|成交)总?(价|金额)__",
 		"(投标|中标)(人|方|单位|供应商)(名称)?__",
-		"成交",
-		"名次",
-	    "候选",
-		"业绩",
-		"荣誉",
-	    "排名",
-		"中标",
-		"供应商",
-		"详见附件及谈判、报价文件",
-		"折扣系数",
-		"合同期限",
-		"委托方",
-		"项目所在地"
+		"(成交|名次|候选|业绩|荣誉|排名|中标|供应商|详见附件及谈判、报价文件|折扣系数|合同期限|委托方|项目所在地|投标文件递交开始间)__"
 	],
 	"con":[
 		"(是否通过资格|比例|评分结果|中标金额|质量目标|安全目标|承诺工期|推荐顺序|参考对象|工程质量$|工期[((]交货期[))]|合作伙伴|(包名|中标(候选人|供应商).{0,5}|第[一二三四五六七八九十]+中标候选人)[::]?[\\s]*)__M",

+ 1 - 1
src/web/templates/front/login.html

@@ -30,7 +30,7 @@
 <body class="hold-transition login-page">
 <div class="login-box">
   <div class="login-logo">
-    <b>extract</b> 3
+    <b>互联网数据结构化抽取系统 v1.0</b>
   </div>
   <!-- /.login-logo -->
   <div class="login-box-body">

+ 1 - 1
udps/main.go

@@ -24,7 +24,7 @@ func main() {
 	flag.StringVar(&tmpkey, "tmpkey", "", "时间字段")
 
 	flag.StringVar(&id1, "gtid", "124ed30f4f7bde5444f1eb84", "gtid")
-	flag.StringVar(&id2, "lteid", "9142e5741a75b8f4467b3276", "lteid")
+	flag.StringVar(&id2, "lteid", "92446f91923488e1724735de", "lteid")
 
 	flag.StringVar(&ids, "ids", "", "id1,id2")
 	flag.StringVar(&stype, "stype", "biddingall", "stype,传递类型")