Bladeren bron

备份-修改-方法整合

zhengkun 2 jaren geleden
bovenliggende
commit
6b5c9ee190

+ 42 - 38
src/jy/clear/tonumber.go

@@ -4,6 +4,7 @@ package clear
 import (
 	"fmt"
 	"github.com/shopspring/decimal"
+	"math"
 	"qfw/util"
 	"regexp"
 	"strconv"
@@ -21,7 +22,7 @@ var numCapitals *regexp.Regexp  //中文大写金额过滤
 var regStrJe *regexp.Regexp     //匹配数字金额
 var regQianw *regexp.Regexp     //部分千万单位
 
-var regPercentMoney *regexp.Regexp  //取出 xx%
+var regPercentMoney *regexp.Regexp //取出 xx%
 
 var moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",壹贰叁肆伍陆柒捌玖
 	"一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
@@ -37,16 +38,17 @@ var moneyUnit = map[string]float64{
 }
 var kxjsReg *regexp.Regexp
 
+var unpkvBidamountReg = regexp.MustCompile("^([Xx]\\+[1-9\\.]+元/每)")
+var specBidamountReg = regexp.MustCompile("^([0-9.]+)E([1-7])$")
+var regUnitMoneyClean = regexp.MustCompile("^(.*单价[0-9.]+元[/][袋|块])[,,](含税总价[0-9.]+[万元]+)[.。]$")
+var blackMoneyClean = regexp.MustCompile("^([0-9.]+以下[万]?|分)$")
+var impactMoneyClean = regexp.MustCompile("(分二串口|分站模块)")
 
-var unpkvBidamountReg =  regexp.MustCompile("^([Xx]\\+[1-9\\.]+元/每)")
-var regUnitMoneyClean =  regexp.MustCompile("^(.*单价[0-9.]+元[/][袋|块])[,,](含税总价[0-9.]+[万元]+)[.。]$")
-var blackMoneyClean =  regexp.MustCompile("^([0-9.]+以下[万]?|分)$")
-var impactMoneyClean =  regexp.MustCompile("(分二串口|分站模块)")
 //大写金额补充
-var impactMoneyeplenish =  regexp.MustCompile("^([壹贰叁肆伍陆柒捌玖]分)")
-//特殊金额-格式-重置
-var resetAamountReg =  regexp.MustCompile("[.](0|00)[.](0|00)")
+var impactMoneyeplenish = regexp.MustCompile("^([壹贰叁肆伍陆柒捌玖]分)")
 
+//特殊金额-格式-重置
+var resetAamountReg = regexp.MustCompile("[.](0|00)[.](0|00)")
 
 func init() {
 	regOperator, _ = regexp.Compile(`[*|+|)*)]`)
@@ -60,8 +62,9 @@ func init() {
 
 	regQianw, _ = regexp.Compile(`\d{1,2}千万`)
 	kxjsReg = regexp.MustCompile("[0-9][E|e]{1}[-—+]{1}[0-9]{1,2}")
-	regPercentMoney,_ = regexp.Compile(`[0-9.]+[((]?[%|%][))]?`)
+	regPercentMoney, _ = regexp.Compile(`[0-9.]+[((]?[%|%][))]?`)
 }
+
 //转int
 func ObjToInt(data []interface{}, spidercode ...string) []interface{} {
 
@@ -72,8 +75,8 @@ func ObjToInt(data []interface{}, spidercode ...string) []interface{} {
 		"两": "2", "俩": "2",
 	}
 	tmp_value := fmt.Sprint(data[0])
-	for k,v:=range ch_num {
-		tmp_value = strings.ReplaceAll(tmp_value,k,v)
+	for k, v := range ch_num {
+		tmp_value = strings.ReplaceAll(tmp_value, k, v)
 	}
 
 	tmp, err := strconv.Atoi(tmp_value)
@@ -137,32 +140,37 @@ func ObjToMoney(data []interface{}, spidercode ...string) []interface{} {
 	//isfindUnit := true
 	tmpstr := (data)[0]
 	totmpstr := ""
-	if _,ok := tmpstr.(float64);ok {
-		totmpstr = fmt.Sprintf("%f",tmpstr)
-	}else {
+	if _, ok := tmpstr.(float64); ok {
+		totmpstr = fmt.Sprintf("%f", tmpstr)
+	} else {
 		totmpstr = util.ObjToString(tmpstr)
 	}
-
+	//特殊转换-科学计数法
+	if specBidamountReg.MatchString(totmpstr) {
+		price := util.Float64All(specBidamountReg.ReplaceAllString(totmpstr, "${1}"))
+		if unit := util.Float64All(specBidamountReg.ReplaceAllString(totmpstr, "${2}")); unit > 0.0 && price > 0.0 {
+			totmpstr = fmt.Sprintf("%f", math.Pow(10, unit)*price)
+			(data)[0] = totmpstr
+		}
+	}
 	//异常替换
 	if unpkvBidamountReg.MatchString(totmpstr) {
-		totmpstr = unpkvBidamountReg.ReplaceAllString(totmpstr,"")
+		totmpstr = unpkvBidamountReg.ReplaceAllString(totmpstr, "")
 		(data)[0] = totmpstr
 	}
-
 	if resetAamountReg.MatchString(totmpstr) {
-		totmpstr = resetAamountReg.ReplaceAllString(totmpstr,".0")
+		totmpstr = resetAamountReg.ReplaceAllString(totmpstr, ".0")
 		(data)[0] = totmpstr
 	}
-
 	//单位指定
 	if regUnitMoneyClean.MatchString(totmpstr) {
-		totmpstr = regUnitMoneyClean.ReplaceAllString(totmpstr,"$2")
+		totmpstr = regUnitMoneyClean.ReplaceAllString(totmpstr, "$2")
 		(data)[0] = totmpstr
 	}
 
 	//特殊替换
 	if impactMoneyClean.MatchString(totmpstr) {
-		totmpstr = impactMoneyClean.ReplaceAllString(totmpstr,"")
+		totmpstr = impactMoneyClean.ReplaceAllString(totmpstr, "")
 		(data)[0] = totmpstr
 	}
 
@@ -179,13 +187,12 @@ func ObjToMoney(data []interface{}, spidercode ...string) []interface{} {
 	}
 
 	//未含税总价1454400.00元,税率6%,含税总价1541664.00元
-	Percent:= regPercentMoney.FindAllString(totmpstr,-1)
-	for _,v:=range Percent{
-		totmpstr = strings.ReplaceAll(totmpstr,v,"")
+	Percent := regPercentMoney.FindAllString(totmpstr, -1)
+	for _, v := range Percent {
+		totmpstr = strings.ReplaceAll(totmpstr, v, "")
 	}
-	totmpstr = strings.ReplaceAll(totmpstr,"_","")
-	(data)[0] = totmpstr  //过滤到%相关数字
-
+	totmpstr = strings.ReplaceAll(totmpstr, "_", "")
+	(data)[0] = totmpstr //过滤到%相关数字
 
 	if kxjsReg.MatchString(totmpstr) {
 		fromString, err := decimal.NewFromString(totmpstr)
@@ -213,8 +220,6 @@ func ObjToMoney(data []interface{}, spidercode ...string) []interface{} {
 		}
 	}
 
-
-
 	ret := capitalMoney(data)[0]
 	if ret.(float64) < float64(10000) || ret.(float64) > float64(50000000000) {
 		ret2, _ := numMoney(data)
@@ -249,8 +254,8 @@ var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金
 
 //数字金额转换
 func numMoney(data []interface{}) ([]interface{}, bool) {
-	tmp := fmt.Sprintf("%f",data[0])
-	tmp = strings.ReplaceAll(tmp,"(不含税)","")
+	tmp := fmt.Sprintf("%f", data[0])
+	tmp = strings.ReplaceAll(tmp, "(不含税)", "")
 	//费率转换% ‰
 	flv := float64(1)
 	if strings.HasSuffix(tmp, "%") {
@@ -358,13 +363,12 @@ func capitalMoney(data []interface{}) []interface{} {
 	str := fmt.Sprint(data[0])
 	//提取第一个大写信息
 
-	if strings.Contains(str,"壹") {
-		str = strings.ReplaceAll(str,"一","壹")
+	if strings.Contains(str, "壹") {
+		str = strings.ReplaceAll(str, "一", "壹")
 	}
 
 	strmatch := numCapitals.FindAllStringSubmatch(str, -1)
 
-
 	if len(strmatch) > 0 {
 		str = strmatch[0][0]
 	}
@@ -487,14 +491,14 @@ func ClearMaxAmount(data []interface{}, spidercode ...string) []interface{} {
 					data[0] = value
 				}
 			}
-			
+
 		}
 	}
 	if value >= 50000000000 {
-		if len(spidercode)>0 {
+		if len(spidercode) > 0 {
 			code := util.ObjToString(spidercode[0])
-			if code == "xz_xzzzqjzscjgycxxxpt_zbtzs"||
-				code == "js_jsszbtbw_zbhxrgs"{
+			if code == "xz_xzzzqjzscjgycxxxpt_zbtzs" ||
+				code == "js_jsszbtbw_zbhxrgs" {
 				return data
 			}
 		}

File diff suppressed because it is too large
+ 78 - 2493
src/jy/extract/extract.go


+ 255 - 0
src/jy/extract/extractcheck.go

@@ -0,0 +1,255 @@
+package extract
+
+import (
+	"fmt"
+	"jy/clear"
+	ju "jy/util"
+	qu "qfw/util"
+	"regexp"
+	"strings"
+	"unicode/utf8"
+)
+
+//去重冗余字段
+func delFiled(k string) bool {
+	return k == "detailfile" || k == "summary" || k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo" || k == "jsondata"
+}
+
+//检查字段-
+func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[string]interface{} {
+	delete(tmp, "contenthtml")
+	delete(tmp, "detail")
+	//对于招标类信息~若winner没有值~过滤掉中标相关信息
+	if qu.ObjToString(tmp["toptype"]) == "招标" &&
+		qu.ObjToString(tmp["subtype"]) != "单一" {
+		delete(tmp, "winner")
+		delete(tmp, "s_winner")
+		delete(tmp, "bidamount")
+		delete(tmp, "winnerorder")
+		//if qu.ObjToString(tmp["winner"])=="" || qu.ObjToString(tmp["winner"])=="有限公司"{
+		//	delete(tmp,"winner")
+		//	delete(tmp,"s_winner")
+		//	delete(tmp,"bidamount")
+		//	delete(tmp,"winnerorder")
+		//}
+	}
+
+	tmp["repeat"] = 0
+	//指定爬虫-金额处理-预算-中标金额异常
+	if qu.ObjToString(tmp["spidercode"]) == "xz_xzzzqjzscjgycxxxpt_zbtzs" {
+		if budget, ok := tmp["budget"].(float64); ok && budget > 0 && budget < 1000000 {
+			tmp["budget"] = budget * 10000.0
+		}
+		if bidamount, ok := tmp["bidamount"].(float64); ok && bidamount > 0 && bidamount > 1000000000 {
+			tmp["bidamount"] = bidamount / 10000.0
+		}
+	}
+	if qu.ObjToString(tmp["spidercode"]) == "js_jsszbtbw_zbhxrgs" {
+		if bidamount, ok := tmp["bidamount"].(float64); ok && bidamount > 0 && bidamount > 1000000000 {
+			tmp["bidamount"] = bidamount / 10000.0
+		}
+	}
+
+	//异常金额类型清洗-
+	if _, ok := tmp["bidamount"].(string); ok {
+		delete(tmp, "bidamount")
+	}
+	if _, ok := tmp["budget"].(string); ok {
+		delete(tmp, "budget")
+	}
+
+	//budget bidamount 阈值限定
+	if bg, ok := tmp["budget"].(float64); ok && bg >= 1000000000 {
+		tmp["budget_threshold"] = bg
+		delete(tmp, "budget")
+	}
+	if bg, ok := tmp["bidamount"].(float64); ok && bg >= 1000000000 {
+		tmp["bidamount_threshold"] = bg
+		delete(tmp, "bidamount")
+	}
+
+	//对于金额与候选信息进行校验
+	if bidamount := qu.Float64All(tmp["bidamount"]); bidamount > float64(0) && qu.ObjToString(tmp["winner"]) != "" {
+		if winnerorder := ju.IsMarkInterfaceMap(tmp["winnerorder"]); len(winnerorder) > 0 {
+			//避免分包的金额被覆盖
+			if qu.IntAll(winnerorder[0]["sort"]) == 1 && qu.ObjToString(tmp["winner"]) == qu.ObjToString(winnerorder[0]["entname"]) && tmp["package"] == nil {
+				if price := qu.Float64All(winnerorder[0]["price"]); price > float64(0) && price != bidamount {
+					tmp["bidamount"] = price
+				}
+			}
+		}
+	}
+
+	//快速过滤一遍特殊字段
+	for k, v := range tmp {
+		if k == "qualifies" {
+			continue
+		}
+		if k == "contract_guarantee" || k == "bid_guarantee" ||
+			k == "is_acquire_tender" {
+			if len(fmt.Sprint(v)) > 0 {
+				tmp[k] = true
+			} else {
+				delete(tmp, k)
+			}
+		}
+		if k == "is_joint_bidding" || k == "is_payment_deposit" {
+			if fmt.Sprint(v) == "true" {
+				tmp[k] = true
+			} else {
+				delete(tmp, k)
+			}
+		}
+		if v == "" || len(strings.TrimSpace(fmt.Sprint(v))) == 0 {
+			delete(tmp, k)
+		}
+	}
+	//特殊字段~根据其他字段处理
+	bid_bond := qu.ObjToString(tmp["bid_bond"])
+	if bid_bond != "" && tmp["is_payment_deposit"] == nil {
+		if strings.Contains(bid_bond, "保证金") && !clearbondReg.MatchString(bid_bond) {
+			tmp["is_payment_deposit"] = true
+		}
+	}
+	//特殊字段~根据其他字段处理
+	bidopenaddress := qu.ObjToString(tmp["bidopenaddress"])
+	if bidopenaddress != "" && tmp["bidopen_shape"] == nil {
+		if utf8.RuneCountInString(bidopenaddress) > 5 {
+			tmp["bidopen_shape"] = "线下开标"
+		}
+	}
+	//项目周期-有效值
+	projectperiod := qu.ObjToString(tmp["projectperiod"])
+	if projectperiod != "" {
+		//项目周期包含日期,数字及日期单位可保留,其余可清洗
+		isNeedValueReg := regexp.MustCompile(`([0-9俩两一二三四五六七八九年月日天周]|合同)`)
+		if !isNeedValueReg.MatchString(projectperiod) {
+			delete(tmp, "projectperiod")
+		}
+	}
+	//工期单位是否有效-清理
+	if project_timeunit, ok := tmp["project_timeunit"].(string); ok {
+		dateReg := regexp.MustCompile(`[年|月|日|天|周]`)
+		if !dateReg.MatchString(project_timeunit) || utf8.RuneCountInString(project_timeunit) > 4 {
+			delete(tmp, "project_timeunit")
+		}
+		//年-0 >5 删除
+		if project_timeunit == "年" && (qu.Int64All(tmp["project_duration"]) == 0 || qu.Int64All(tmp["project_duration"]) > 5) {
+			delete(tmp, "project_timeunit")
+		}
+	}
+	//中标单位统一
+	if tmp["winner"] != nil && tmp["s_winner"] != nil {
+		strwin := qu.ObjToString(tmp["winner"])
+		strwin_s := qu.ObjToString(tmp["s_winner"])
+		if !strings.Contains(strwin_s, strwin) {
+			tmp["s_winner"] = strwin
+		}
+	}
+	//投标方式-
+	bidway := qu.IntAll(tmp["bidway"])
+	if bidway == 1 {
+		tmp["bidway"] = "纸质投标"
+	} else if bidway == 2 {
+		tmp["bidway"] = "电子投标"
+	} else {
+		delete(tmp, "bidway")
+	}
+	//折扣系数
+	discount := dealWithDiscountBid(tmp)
+	if discount > 0.0 {
+		tmp["biddiscount"] = discount
+	} else {
+		delete(tmp, "biddiscount")
+	}
+	delete(tmp, "biddiscount_up")
+	delete(tmp, "biddiscount_down")
+
+	//临时
+	//bidstarttime := qu.Int64All(tmp["bidstarttime"])
+	//docendtime := qu.Int64All(tmp["docendtime"])
+	//timeLayout := "2006-01-02 15:04:05"
+
+	//if bidstarttime>0 {
+	//	time_1 := time.Unix(bidstarttime, 0).Format(timeLayout) //设置时间戳 使用模板格式化为日期字符串
+	//	tmp["bidstarttime"] = time_1
+	//}
+	//if docendtime>0 {
+	//	time_2 := time.Unix(docendtime, 0).Format(timeLayout) //设置时间戳 使用模板格式化为日期字符串
+	//	tmp["docendtime"] = time_2
+	//}
+
+	jyhref := fmt.Sprintf(JYUrl, qu.CommonEncodeArticle("content", qu.BsonIdToSId(tmp["_id"])))
+	tmp["jytest_href"] = jyhref
+
+	//检查剑鱼发布-爬虫
+	jyfb_data := *qu.ObjToMap(j_data["jyfb_data"])
+	if jyfb_data != nil {
+		for k, v := range jyfb_data {
+			if k == "area" {
+				delete(tmp, "district")
+			}
+			tmp[k] = v
+		}
+	}
+
+	//针对拟建单位~需要验证~各种字段优先级
+	if qu.ObjToString(tmp["toptype"]) == "拟建" &&
+		qu.ObjToString(tmp["subtype"]) == "拟建" {
+		nj_record := map[string]interface{}{}
+		for _, v := range NiJianField {
+			arr := strings.Split(v, "#")
+			k_type, k_field := "", ""
+			if len(arr) == 2 {
+				k_type, k_field = arr[0], arr[1]
+			} else {
+				continue
+			}
+			tmpValue := tmp[k_field]
+			is_use := false
+			if k_type == "string" {
+				if qu.ObjToString(j_data[k_field]) != "" {
+					is_use = true
+					tmp[k_field] = qu.ObjToString(j_data[k_field])
+				}
+			} else if k_type == "time" {
+				if j_data[k_field] != nil {
+					tmp["s_"+k_field] = j_data[k_field]
+				}
+				//开竣工日期,采集为字符串
+				if qu.ObjToString(j_data[k_field]) != "" {
+					new_data := clear.ObjToTimestamp([]interface{}{j_data[k_field]}, "")
+					if len(new_data) > 0 {
+						if qu.Int64All(new_data[0]) > 0 {
+							is_use = true
+							tmp[k_field] = qu.Int64All(new_data[0])
+						}
+					}
+				} else {
+					if qu.Int64All(j_data[k_field]) > int64(0) {
+						is_use = true
+						tmp[k_field] = qu.Int64All(j_data[k_field])
+					}
+				}
+			} else if k_type == "map" {
+				p_info := *qu.ObjToMap(j_data["project_scale_info"])
+				if qu.ObjToString(p_info[k_field]) != "" {
+					is_use = true
+					tmp[k_field] = qu.ObjToString(p_info[k_field])
+				}
+			} else {
+
+			}
+			if tmpValue != nil {
+				nj_record[k_field] = map[string]interface{}{
+					k_field:  tmpValue,
+					"is_use": is_use,
+				}
+			}
+		}
+		if len(nj_record) > 0 {
+			tmp["nj_record"] = nj_record
+		}
+	}
+	return tmp
+}

+ 0 - 3
src/jy/extract/extractcity_new.go

@@ -397,9 +397,7 @@ func (e *ExtractTask) NewVerifyXjCorpsInfo(buyer string) (new_a, new_c, new_d st
 func (e *ExtractTask) NewVerifySensitiveInfo(detail string, area *string, city *string, district *string) bool {
 	detail = SensitiveReg.ReplaceAllString(detail, "")
 	detail = TextAfterRemoveTable(detail)
-
 	detail = CleanDetailReg1.ReplaceAllString(detail, "")
-
 	//全称城市
 	fullCityArr := e.SensitiveFullCity.FindAll(detail)
 	if len(fullCityArr) == 1 {
@@ -463,7 +461,6 @@ func (e *ExtractTask) NewVerifySensitiveInfo(detail string, area *string, city *
 			}
 		}
 	}
-
 	return false
 }
 

+ 1 - 1
src/jy/extract/extractInit.go → src/jy/extract/extractinit.go

@@ -666,7 +666,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 		infolist, _ := db.Mgo.Find("infotype", `{}`, `{}`, `{}`, false, -1, -1)
 		for _, v := range *infolist {
 			topclass := qu.ObjToString(v["topclass"])
-			if v["subclass"] == nil {
+			if v["subclass"] == nil { //此位置不会执行
 				e.RuleCores[topclass] = make(map[string][]*RuleCore)
 				for attr, _ := range v["fields"].(map[string]interface{}) {
 					if fieldrules[attr] != nil {

+ 904 - 0
src/jy/extract/extractrule.go

@@ -0,0 +1,904 @@
+package extract
+
+import (
+	"fmt"
+	"jy/clear"
+	"jy/pretreated"
+	ju "jy/util"
+	qu "qfw/util"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+//抽取-规则
+func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju.Job, isSite bool) {
+	//候选人加入
+	var kvMap map[string][]map[string]interface{}
+	extByReg := true
+	if vc.ExtFrom != "title" {
+		kvMap, extByReg = getKvByLuaFields(vc, j, e)
+	}
+	for _, v := range vc.RuleCores {
+		if v.IsLua {
+			ExtRuleCoreByKv(vc.ExtFrom, doc, j, v, &kvMap, e)
+		} else if extByReg {
+			ExtRuleCoreByReg(vc.ExtFrom, doc, j, v, e, isSite)
+		}
+	}
+	//如果只有一个分包,预算没有抽取到,把分包中的预算保存到外面
+	if vc.Field == "budget" && len(kvMap) == 0 {
+		if len(j.BlockPackage) == 1 {
+			for _, bp := range j.BlockPackage {
+				for fieldname, field := range vc.LFields {
+					if field != vc.Field {
+						continue
+					}
+					tp := ""
+					for k, v := range []*ju.JobKv{bp.ColonKV, bp.SpaceKV, bp.TableKV} {
+						if k == 0 {
+							tp = "colon"
+						} else if k == 1 {
+							tp = "space"
+						} else if k == 2 {
+							tp = "table"
+						}
+						if v == nil || v.KvTags == nil {
+							continue
+						}
+						for _, vv := range v.KvTags[fieldname] {
+							text := ju.TrimLRSpace(vv.Value, "")
+							if text != "" {
+								tmp := &ju.ExtField{
+									ExtFrom:     "package",
+									Field:       vc.Field,
+									Code:        "CL_分包",
+									Type:        tp,
+									MatchType:   "package",
+									RuleText:    bp.Text,
+									SourceValue: vv.Key,
+									Value:       text,
+								}
+								if isSite {
+									tmp.Score = 1
+								}
+								j.Result[vc.Field] = append(j.Result[vc.Field], tmp)
+							}
+						}
+					}
+				}
+				break
+			}
+		}
+	} else {
+		for k, v := range kvMap {
+			if j.Result[k] == nil {
+				j.Result[k] = [](*ju.ExtField){}
+			}
+			for _, tmp := range v {
+				field := &ju.ExtField{Weight: qu.IntAll(tmp["weight"]),
+					ExtFrom: qu.ObjToString(tmp["extfrom"]), Field: k,
+					Code: qu.ObjToString(tmp["code"]), Type: qu.ObjToString(tmp["type"]),
+					MatchType:   qu.ObjToString(tmp["matchtype"]),
+					RuleText:    qu.ObjToString(tmp["ruletext"]),
+					SourceValue: tmp["sourcevalue"],
+					Value:       tmp["value"]}
+				if k == "bidamount" && field.ExtFrom == "第一候选人" {
+					field.Score = 1
+				}
+				if isSite {
+					field.Score = 1
+				}
+				if (field.Field == "bidamount" || field.Field == "budget") && field.Type == "table" {
+					moneys := clear.ObjToMoney([]interface{}{field.Value, ""}, j.SpiderCode, j.IsClearnMoney)
+					if len(moneys) > 0 {
+						if vf, ok := moneys[0].(float64); ok {
+							field.Value = vf
+							field.IsTrue = moneys[len(moneys)-1].(bool)
+						} else if vi, ok := moneys[0].(int); ok {
+							field.Value = float64(vi)
+							field.IsTrue = moneys[len(moneys)-1].(bool)
+						}
+					}
+				}
+				if tmp["blocktag"] != nil {
+					btag := make(map[string]string)
+					for k := range tmp["blocktag"].(map[string]bool) {
+						blocktag.Lock()
+						if TagConfigDesc[k] != "" {
+							btag[k] = TagConfigDesc[k]
+						}
+						blocktag.Unlock()
+					}
+					field.BlockTag = btag
+				}
+				j.Result[k] = append(j.Result[k], field)
+			}
+		}
+	}
+
+}
+
+//抽取-规则-kv
+func ExtRuleCoreByKv(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, kvMap *map[string][]map[string]interface{}, et *ExtractTask) {
+	defer qu.Catch()
+	if extfrom == "title" || !in.IsLua {
+		return
+	}
+	lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
+	lua.KvMap = *kvMap
+	lua.Block = j.Block
+	extinfo := lua.RunScript("core")
+	if tmps, ok := extinfo[in.Field].([]map[string]interface{}); ok {
+		for _, v := range tmps {
+			v["core"] = in.Code
+		}
+		(*kvMap)[in.Field] = append((*kvMap)[in.Field], tmps...)
+	}
+	if len(extinfo) > 0 {
+		AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
+	}
+}
+
+//抽取-规则-正则
+func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask, isSite bool) {
+	defer qu.Catch()
+	//根据field配置项目,是否抽取。例如:废标、流标等跳过,
+	b := IsExtract(in.Field, j.Title, j.Content)
+	if !b {
+		return
+	}
+	//全文正则
+	//text := qu.ObjToString(doc[extfrom])
+	//if in.Field != "" {
+	//	extinfo := extRegCoreToResult(extfrom, text, j, in)
+	//	if len(extinfo) > 0 {
+	//		AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
+	//	}
+	//}
+	//块抽取
+	if in.Field != "" {
+		if extfrom == "title" {
+			extinfo := extRegCoreToResult(extfrom, qu.ObjToString(doc[extfrom]), &map[string]string{}, j, in, isSite)
+			if len(extinfo) > 0 {
+				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
+			}
+		} else if in.Field == "qualifies" {
+			extinfo := extRegCoreToResult(extfrom, pretreated.HtmlToText(qu.ObjToString(doc[extfrom])), &map[string]string{}, j, in, isSite)
+			if len(extinfo) > 0 {
+				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
+			}
+		} else {
+			for _, v := range j.Block {
+				btag := make(map[string]string)
+				for k := range v.Classify {
+					blocktag.Lock()
+					btag[k] = TagConfigDesc[k]
+					blocktag.Unlock()
+				}
+				extinfo := extRegCoreToResult(extfrom, v.Text, &btag, j, in, isSite)
+				if len(extinfo) > 0 {
+					AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
+				}
+			}
+		}
+	}
+}
+
+//pkg抽取-规则-正则
+func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
+	defer qu.Catch()
+	//根据field配置项目,是否抽取。例如:废标、流标等跳过,
+	b := IsExtract(in.Field, j.Title, j.Content)
+	if !b {
+		return
+	}
+	//块抽取
+	if in.Field != "" {
+		//临时调试分包抽取字段-
+		if in.Field == "bidamount" {
+			//log.Debug("分包-调试字段...")
+		}
+		for k, vbpkg := range j.BlockPackage {
+			rep := map[string]string{}
+			if in.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
+				if in.Field == "budget" && vbpkg.Budget > 0 {
+					continue
+				}
+				if in.Field == "agencyfee" && vbpkg.Agencyfee > 0 {
+					continue
+				}
+				if in.Field == "bidamount" && vbpkg.Bidamount > 0 {
+					continue
+				}
+				if in.Field == "winner" && vbpkg.Winner != "" {
+					continue
+				}
+				if in.Field == "bidstatus" && vbpkg.BidStatus != "" {
+					continue
+				}
+				if in.Field == "projectname" && vbpkg.Name != "" {
+					continue
+				}
+				if in.Field == "winner" && vbpkg.Winner != "" {
+					continue
+				}
+				if in.Field == "winnerperson" {
+					if vbpkg.Winner == "" || len(vbpkg.Winner) < 4 {
+						continue
+					}
+					if !strings.Contains(vbpkg.Text, vbpkg.Winner) {
+						continue
+					}
+				}
+				if in.Field == "winnertel" {
+					if vbpkg.WinnerPerson == "" {
+						continue
+					}
+				}
+				//处理正负数修正
+				ptmp := strings.Split(in.RuleText, "#")
+				sign := 0
+				if len(ptmp) == 2 {
+					if ptmp[1] == "正" {
+						sign = 1
+					} else if ptmp[1] == "负" {
+						sign = -1
+					}
+				}
+				tmp := strings.Split(ptmp[0], "__")
+				if len(tmp) == 2 {
+					epos := strings.Split(tmp[1], ",")
+					posm := map[string]int{}
+					for _, v := range epos {
+						ks := strings.Split(v, ":")
+						if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+							posm[ks[1]] = qu.IntAll(ks[0])
+						} else {
+							posm[in.Field] = qu.IntAll(ks[0])
+						}
+					}
+					var pattern string
+					if strings.Contains(tmp[0], "\\u") {
+						tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
+						tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
+						pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+					} else {
+						pattern = tmp[0]
+					}
+					//log.Debug("pattern", pattern)
+					//fmt.Println(text)
+					reg := regexp.MustCompile(pattern)
+					apos := reg.FindAllStringSubmatchIndex(vbpkg.Text, -1)
+					for i, _ := range apos {
+						pos := apos[i]
+						for k, p := range posm {
+							if len(pos) > p {
+								if pos[p] == -1 || pos[p+1] == -1 {
+									continue
+								}
+								val := vbpkg.Text[pos[p]:pos[p+1]]
+								if string(val) == "" {
+									continue
+								}
+								if sign == -1 {
+									rep[k+"_"+fmt.Sprint(i)] = "-" + val
+								} else {
+									rep[k+"_"+fmt.Sprint(i)] = val
+								}
+							}
+						}
+					}
+					//fmt.Println(text)
+					for i := 0; i < len(apos); i++ {
+						if strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]) != "" {
+							if in.Field == "budget" && vbpkg.Budget <= 0 {
+								lock.Lock()
+								cfn := e.ClearFn[in.Field]
+								lock.Unlock()
+								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
+								if data[len(data)-1].(bool) {
+									j.BlockPackage[k].Budget = qu.Float64All(data[0])
+									j.BlockPackage[k].IsTrueBudget = true
+								}
+								break
+							} else if in.Field == "agencyfee" && vbpkg.Agencyfee <= 0 {
+								lock.Lock()
+								cfn := e.ClearFn[in.Field]
+								lock.Unlock()
+								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
+								if data[len(data)-1].(bool) {
+									j.BlockPackage[k].Agencyfee = qu.Float64All(data[0])
+									j.BlockPackage[k].IsTrueAgencyfee = true
+								}
+								break
+							} else if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
+								lock.Lock()
+								cfn := e.ClearFn[in.Field]
+								lock.Unlock()
+								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
+								if data[len(data)-1].(bool) {
+									j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
+									j.BlockPackage[k].IsTrueBidamount = true
+								}
+								break
+							} else if in.Field == "winner" {
+								if j.BlockPackage[k].Winner == "" {
+									j.BlockPackage[k].Winner = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							} else if in.Field == "winnertel" {
+								if j.BlockPackage[k].WinnerTel == "" {
+									j.BlockPackage[k].WinnerTel = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							} else if in.Field == "winnerperson" {
+								if j.BlockPackage[k].WinnerPerson == "" {
+									j.BlockPackage[k].WinnerPerson = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							} else if in.Field == "bidstatus" {
+								if j.BlockPackage[k].BidStatus == "" {
+									j.BlockPackage[k].BidStatus = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							} else if in.Field == "projectname" {
+								if j.BlockPackage[k].Name == "" {
+									j.BlockPackage[k].Name = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							} else if in.Field == "winnerperson" {
+								if j.BlockPackage[k].WinnerPerson == "" {
+									j.BlockPackage[k].WinnerPerson = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							} else if in.Field == "winnertel" {
+								if j.BlockPackage[k].WinnerTel == "" && j.BlockPackage[k].Winner != "" && j.BlockPackage[k].WinnerPerson != "" {
+									j.BlockPackage[k].WinnerTel = rep[in.Field+"_"+fmt.Sprint(i)]
+									break
+								}
+							}
+						}
+					}
+				}
+			} else {
+				pos := in.RegCore.Reg.FindStringIndex(vbpkg.Text)
+				val := ""
+				if len(pos) == 2 {
+					//"text" = "text"[pos[1]:]
+					val = "text"[pos[1]:]
+					rs := regexp.MustCompile("[^\r\n\t]+")
+					tmp := rs.FindAllString("text", -1)
+					if len(tmp) > 0 {
+						val = tmp[0]
+					}
+				}
+				if val != "" {
+					if in.Field == "budget" && vbpkg.Budget <= 0 {
+						lock.Lock()
+						cfn := e.ClearFn[in.Field]
+						lock.Unlock()
+						data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
+						if data[len(data)-1].(bool) {
+							j.BlockPackage[k].Budget = qu.Float64All(data[0])
+							j.BlockPackage[k].IsTrueBudget = true
+						}
+						break
+					}
+					if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
+						lock.Lock()
+						cfn := e.ClearFn[in.Field]
+						lock.Unlock()
+						data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
+						if data[len(data)-1].(bool) {
+							j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
+							j.BlockPackage[k].IsTrueBidamount = true
+						}
+						break
+					} else if in.Field == "bidstatus" {
+						if j.BlockPackage[k].BidStatus == "" {
+							j.BlockPackage[k].BidStatus = val
+							break
+						}
+					} else if in.Field == "projectname" {
+						if j.BlockPackage[k].Name == "" {
+							j.BlockPackage[k].Name = val
+							break
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+//lua脚本根据属性设置提取kv值
+func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]map[string]interface{}, bool) {
+	kvmap := map[string][]map[string]interface{}{}
+	if len(j.Winnerorder) > 1 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
+		if vc.Field == "bidamount" {
+			for k, v := range j.Winnerorder {
+				if v["price"] == nil || k != 0 {
+					continue
+				}
+				kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
+					"code":        "winnerorder",
+					"field":       vc.Field,
+					"ruletext":    "中标候选人_" + fmt.Sprint(v["sortstr"]),
+					"extfrom":     v["sortstr"],
+					"sourcevalue": v["price"],
+					"value":       v["price"],
+					"type":        "winnerorder",
+					"matchtype":   "winnerorder",
+				})
+				if len(j.Winnerorder) < 4 {
+					return kvmap, false
+				}
+			}
+			//候选人中标金额
+			if price := j.Winnerorder[0]["price"]; price != nil {
+				kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
+					"code":        "CL_中标候选人",
+					"field":       vc.Field,
+					"ruletext":    "中标候选人",
+					"extfrom":     j.Winnerorder[0]["sortstr"],
+					"sourcevalue": price,
+					"value":       price,
+					"type":        "winnerorder",
+					"matchtype":   "winnerorder",
+				})
+				if len(j.Winnerorder) < 4 {
+					return kvmap, false
+				}
+			}
+		}
+	}
+	for fieldname, field := range vc.LFields {
+		if field != vc.Field {
+			continue
+		}
+		extractFromKv(field, fieldname, j.Block, vc, kvmap, j.Category)
+	}
+	AddExtLog("extract", j.SourceMid, nil, kvmap, &RegLuaInfo{Field: vc.Field}, et.TaskInfo) //抽取日志
+	return kvmap, true
+}
+
+func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kvmap map[string][]map[string]interface{}, Category string) {
+	//qu.Debug("fieldname+++", fieldname)
+	for _, bl := range blocks {
+		tp := ""
+		if strings.Contains(bl.Title, "保证金") && (field == "bid_bond" || field == "contract_bond") {
+			if text := ju.TrimLRSpace(bl.Text, ""); text != "" {
+				if Category == "招标" || Category == "拟建" || Category == "预告" {
+					kvmap[field] = append(kvmap[field], map[string]interface{}{
+						"code":        "CL_块内容",
+						"field":       field,
+						"ruletext":    "投标保证金",
+						"extfrom":     "投标保证金_块内容",
+						"sourcevalue": bl.Text,
+						"value":       text,
+						"type":        "投标保证金_块内容",
+						"matchtype":   "tag_string",
+						"blocktag":    bl.Classify,
+						"weight":      0,
+					})
+				} else if Category == "结果" {
+					kvmap[field] = append(kvmap[field], map[string]interface{}{
+						"code":        "CL_",
+						"field":       field,
+						"ruletext":    "履约保证金",
+						"extfrom":     "履约保证金_块内容",
+						"sourcevalue": bl.Text,
+						"value":       text,
+						"type":        "履约保证金_块内容",
+						"matchtype":   "tag_string",
+						"blocktag":    bl.Classify,
+						"weight":      0,
+					})
+				}
+			}
+			return
+		}
+		for k, v := range []*ju.JobKv{bl.ColonKV, bl.SpaceKV, bl.TableKV} {
+			if k == 0 {
+				tp = "colon"
+			} else if k == 1 {
+				tp = "space"
+			} else if k == 2 {
+				tp = "table"
+			}
+			if v == nil || v.KvTags == nil {
+				continue
+			}
+			for _, vv := range v.KvTags[fieldname] {
+				text := ju.TrimLRSpace(vv.Value, "")
+				if text != "" {
+					kvmap[field] = append(kvmap[field], map[string]interface{}{
+						"code":        "CL_" + vv.Key,
+						"field":       field,
+						"ruletext":    vv.Key,
+						"extfrom":     vc.ExtFrom,
+						"sourcevalue": text,
+						"value":       text,
+						"type":        tp,
+						"matchtype":   "tag_string",
+						"blocktag":    bl.Classify,
+						"weight":      vv.Weight,
+					})
+					//if field != "winnertel" && field != "winnerperson" {
+					//	//break //暂定取第一个
+					//}
+				}
+			}
+		}
+		if len(kvmap[field]) == 0 {
+			extractFromKv(field, fieldname, bl.Block, vc, kvmap, Category)
+		}
+	}
+}
+
+//正则提取结果
+func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job, vre *RegLuaInfo, isSite bool) map[string][]map[string]interface{} {
+	defer qu.Catch()
+	var score float64
+	score = vre.Score
+	if isSite {
+		score = score + 1.0
+	}
+
+	extinfo := map[string][]map[string]interface{}{}
+	rep := map[string]string{}
+	if vre.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
+		//处理正负数修正
+		ptmp := strings.Split(vre.RuleText, "#")
+		sign := 0
+		if len(ptmp) == 2 {
+			if ptmp[1] == "正" {
+				sign = 1
+			} else if ptmp[1] == "负" {
+				sign = -1
+			}
+		}
+		tmp := strings.Split(ptmp[0], "__")
+		if len(tmp) == 2 {
+			posm := map[string]int{}
+			sufVal := ""
+			sufArr := strings.Split(tmp[1], "~~")
+			if len(sufArr) == 2 { //后缀补
+				posm[vre.Field] = qu.IntAll(sufArr[0])
+				sufVal = sufArr[1]
+			} else {
+				epos := strings.Split(tmp[1], ",")
+				for _, v := range epos {
+					ks := strings.Split(v, ":")
+					if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+						posm[ks[1]] = qu.IntAll(ks[0])
+					} else {
+						posm[vre.Field] = qu.IntAll(ks[0])
+					}
+				}
+			}
+
+			var pattern string
+			if strings.Contains(tmp[0], "\\u") {
+				tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
+				tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
+				pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+			} else {
+				pattern = tmp[0]
+			}
+			//log.Debug("pattern", pattern)
+			//fmt.Println(text)
+			reg := regexp.MustCompile(pattern)
+			apos := reg.FindAllStringSubmatchIndex(text, -1)
+			for i, _ := range apos {
+				pos := apos[i]
+				for k, p := range posm {
+					if len(pos) > p {
+						if pos[p] == -1 || pos[p+1] == -1 {
+							continue
+						}
+						val := text[pos[p]:pos[p+1]]
+						if val != "" && sufVal != "" {
+							val += sufVal
+						}
+						if string(val) == "" {
+							continue
+						}
+						if sign == -1 {
+							rep[k+"_"+fmt.Sprint(i)] = "-" + val
+						} else {
+							rep[k+"_"+fmt.Sprint(i)] = val
+						}
+					}
+				}
+			}
+			tmps := []map[string]interface{}{}
+			for i := 0; i < len(apos); i++ {
+				if strings.TrimSpace(rep[vre.Field+"_"+fmt.Sprint(i)]) != "" {
+					tmp := map[string]interface{}{
+						"field":     vre.Field,
+						"code":      vre.Code,
+						"ruletext":  vre.RuleText,
+						"extfrom":   text,
+						"value":     rep[vre.Field+"_"+fmt.Sprint(i)],
+						"type":      "regexp",
+						"matchtype": "regcontent",
+						"blocktag":  *tag,
+						"score":     score,
+					}
+					exfield := ju.ExtField{
+						BlockTag:    *tag,
+						Field:       vre.Field,
+						Code:        vre.Code,
+						RuleText:    vre.RuleText,
+						Type:        "regexp",
+						MatchType:   "regcontent",
+						ExtFrom:     extfrom,
+						SourceValue: rep[vre.Field+"_"+fmt.Sprint(i)],
+						Value:       rep[vre.Field+"_"+fmt.Sprint(i)],
+						Score:       score,
+					}
+					if vre.Field == "qualifies" {
+						if len(rep) >= 2 {
+							tmp["ruletext"] = rep[vre.Field+"_key_"+fmt.Sprint(i)]
+							exfield.RuleText = rep[vre.Field+"_key_"+fmt.Sprint(i)]
+						}
+					}
+					tmps = append(tmps, tmp)
+					if tmp["blocktag"] != nil {
+						exfield.BlockTag = tmp["blocktag"].(map[string]string)
+					}
+					j.Result[vre.Field] = append(j.Result[vre.Field], &exfield)
+				}
+			}
+			if len(tmps) > 0 {
+				//fmt.Println(tmps)
+				extinfo[vre.Field] = tmps
+			}
+		}
+	} else {
+		pos := vre.RegCore.Reg.FindStringIndex(text)
+		val := ""
+		if len(pos) == 2 {
+			text = text[pos[1]:]
+			rs := regexp.MustCompile("[^\r\n\t]+")
+			tmp := rs.FindAllString(text, -1)
+			if len(tmp) > 0 {
+				val = tmp[0]
+			}
+		}
+		if val != "" {
+			tmps := []map[string]interface{}{}
+			tmp := map[string]interface{}{
+				"field":     vre.Field,
+				"code":      vre.Code,
+				"ruletext":  vre.RuleText,
+				"extfrom":   text,
+				"value":     val,
+				"type":      "regexp",
+				"matchtype": "regcontent",
+				"blocktag":  *tag,
+				"score":     score,
+			}
+			tmps = append(tmps, tmp)
+			extinfo[vre.Field] = tmps
+			if j.Result[vre.Field] == nil {
+				j.Result[vre.Field] = [](*ju.ExtField){}
+			}
+			field := &ju.ExtField{BlockTag: *tag, Field: vre.Field, Code: vre.Code, RuleText: vre.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text,
+				Value: val,
+				Score: score}
+			if tmp["blocktag"] != nil {
+				field.BlockTag = tmp["blocktag"].(map[string]string)
+			}
+			j.Result[vre.Field] = append(j.Result[vre.Field], field)
+		}
+	}
+	return extinfo
+}
+
+//后置过滤
+func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo, vc *RuleCore) {
+	defer qu.Catch()
+	if in.IsLua {
+		result := GetResultMapForLua(j)
+		lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
+		if j != nil {
+			lua.Block = j.Block
+		}
+		extinfo := lua.RunScript("back")
+		for k, v := range extinfo {
+			if tmps, ok := v.([]map[string]interface{}); ok {
+				j.Result[k] = [](*ju.ExtField){}
+				for _, tmp := range tmps {
+					field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]),
+						ExtFrom: qu.ObjToString(tmp["extfrom"]),
+						Value:   tmp["value"]}
+					if tmp["blocktag"] != nil {
+						field.BlockTag = tmp["blocktag"].(map[string]string)
+					}
+					j.Result[k] = append(j.Result[k], field)
+					//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
+				}
+			}
+		}
+		if len(extinfo) > 0 {
+			AddExtLog("clear", j.SourceMid, result, extinfo, in, t) //抽取日志
+		}
+	} else {
+		extinfo := map[string]interface{}{}
+		if in.Field != "" {
+			clearByTitle := false
+			if vc != nil && vc.ExtFrom == "title" && in.Field == "buyer" { //buyer从title抽取到的单独走titile的清理
+				clearByTitle = true
+			}
+			if j.Result[in.Field] != nil {
+				tmp := j.Result[in.Field]
+				exts := []interface{}{}
+				for k, v := range tmp {
+					if clearByTitle && v.ExtFrom != "title" {
+						continue
+					}
+					//table抽取到的数据不清理
+					if v.Type == "table" && v.Field == "projectname" {
+						return
+					}
+					text := qu.ObjToString(v.Value)
+					if v.Field == "bidamount" || v.Field == "budget" {
+						if (strings.Contains(qu.ObjToString(v.SourceValue), "费率") ||
+							strings.Contains(qu.ObjToString(v.SourceValue), "税率") ||
+							strings.Contains(qu.ObjToString(v.SourceValue), "(%)")) &&
+							!strings.Contains(qu.ObjToString(v.SourceValue), "工程设计费") &&
+							!strings.Contains(qu.ObjToString(v.SourceValue), "含税总价") {
+							j.Result[in.Field][k].IsTrue = false
+							continue
+						}
+					}
+					if text != "" {
+						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
+					}
+					if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
+						continue
+					}
+					j.Result[in.Field][k].Value = text
+					exts = append(exts, map[string]interface{}{
+						"field":     v.Field,
+						"code":      v.Code,
+						"ruletext":  v.RuleText,
+						"type":      v.Type,
+						"matchtype": v.MatchType,
+						"extfrom":   v.ExtFrom,
+						"value":     text,
+					})
+				}
+				if len(exts) > 0 {
+					extinfo[in.Field] = exts
+					AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志
+				}
+			}
+		} else {
+			for key, tmp := range j.Result {
+				exts := []interface{}{}
+				for k, v := range tmp {
+					//table抽取到的数据不清理
+					if v.Type == "table" && v.Field == "projectname" {
+						return
+					}
+					text := qu.ObjToString(v.Value)
+					if text != "" {
+						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
+					}
+					if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
+						continue
+					}
+					j.Result[key][k].Value = text
+					exts = append(exts, map[string]interface{}{
+						"field":     v.Field,
+						"code":      v.Code,
+						"ruletext":  v.RuleText,
+						"type":      v.Type,
+						"matchtype": v.MatchType,
+						"extfrom":   v.ExtFrom,
+						"value":     text,
+					})
+				}
+				if len(exts) > 0 {
+					extinfo[key] = exts
+					AddExtLog("clear", j.SourceMid, j.Result, extinfo, in, t) //抽取日志
+				}
+			}
+		}
+	}
+}
+
+//后置过滤
+func ExtRegBackPkg(j *ju.Job, in *RegLuaInfo) {
+	defer qu.Catch()
+	for k, v := range j.BlockPackage {
+		if in.Field == "winner" {
+			j.BlockPackage[k].Winner = in.RegPreBac.Reg.ReplaceAllString(v.Winner, in.RegPreBac.Replace)
+		} else if in.Field == "bidstatus" {
+			j.BlockPackage[k].BidStatus = in.RegPreBac.Reg.ReplaceAllString(v.BidStatus, in.RegPreBac.Replace)
+		} else if in.Field == "" {
+			j.BlockPackage[k].Text = in.RegPreBac.Reg.ReplaceAllString(v.Text, in.RegPreBac.Replace)
+		} else if in.Field == "projectname" {
+			j.BlockPackage[k].Name = in.RegPreBac.Reg.ReplaceAllString(v.Name, in.RegPreBac.Replace)
+		} else if in.Field == "winnerperson" {
+			j.BlockPackage[k].WinnerPerson = in.RegPreBac.Reg.ReplaceAllString(v.WinnerPerson, in.RegPreBac.Replace)
+		} else if in.Field == "winnertel" {
+			j.BlockPackage[k].WinnerTel = in.RegPreBac.Reg.ReplaceAllString(v.WinnerTel, in.RegPreBac.Replace)
+		}
+	}
+}
+
+//KV过滤
+func ExtRuleKV(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
+	defer qu.Catch()
+	extinfo := map[string]interface{}{}
+	if in.Field != "" {
+		if j.Result[in.Field] != nil {
+			tmp := j.Result[in.Field]
+			exts := []interface{}{}
+			for k, v := range tmp {
+				if v.Type != "table" && !strings.Contains(v.Type, "colon") && !strings.Contains(v.Type, "space") {
+					continue
+				}
+				if v.Field == "docendtime" {
+					//log.Debug("调试字段...")
+				}
+				text := qu.ObjToString(v.Value)
+				if text != "" {
+					text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
+				}
+				if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
+					continue
+				}
+				j.Result[in.Field][k].Value = text
+				exts = append(exts, map[string]interface{}{
+					"field":     v.Field,
+					"code":      v.Code,
+					"ruletext":  v.RuleText,
+					"type":      v.Type,
+					"matchtype": v.MatchType,
+					"extfrom":   v.ExtFrom,
+					"value":     text,
+				})
+			}
+			if len(exts) > 0 {
+				extinfo[in.Field] = exts
+				AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志
+			}
+		}
+	}
+}
+
+//前置过滤
+func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInfo) map[string]interface{} {
+	defer qu.Catch()
+	before := ju.DeepCopy(doc).(map[string]interface{})
+	extinfo := map[string]interface{}{}
+	if in.IsLua {
+		lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
+		if j != nil {
+			lua.Block = j.Block
+		}
+		extinfo = lua.RunScript("pre")
+		for k, v := range extinfo { //结果覆盖原doc
+			doc[k] = v
+		}
+		AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
+	} else {
+		var key string
+		if !j.IsFile {
+			key = qu.If(in.Field == "", "detail", in.Field).(string)
+		} else {
+			key = qu.If(in.Field == "", "detailfile", in.Field).(string)
+		}
+		text := qu.ObjToString(doc[key])
+		extinfo[key] = in.RegPreBac.Reg.ReplaceAllString(text, "")
+		doc[key] = extinfo[key]                                      //结果覆盖原doc
+		AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
+	}
+	return doc
+}

+ 754 - 0
src/jy/extract/extractsave.go

@@ -0,0 +1,754 @@
+package extract
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	log "github.com/donnie4w/go-logger/logger"
+	"jy/clear"
+	db "jy/mongodbutil"
+	ju "jy/util"
+	qu "qfw/util"
+	"sort"
+	"strings"
+	"time"
+	"unicode/utf8"
+)
+
+//分析抽取结果并保存
+func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
+	qu.Try(func() {
+		//(j.Category == "招标" || j.Category == "预告")
+		if (j.Category == "招标" || j.Category == "预告") && (len(j.BlockPackage) > 0 || len(j.PackageInfo) > 0 || len(j.Result) > 0) {
+			if j.CategorySecond != "单一" {
+				delete(j.Result, "winner")
+				delete(j.Result, "bidamount")
+				for _, v := range j.BlockPackage {
+					v.Bidamount = 0
+					v.IsTrueBidamount = false
+					if v.Winner != "" {
+						v.Winner = ""
+						if v.SpaceKV != nil {
+							delete(v.SpaceKV.KvTags, "中标单位")
+						}
+						if v.TableKV != nil {
+							delete(v.TableKV.KvTags, "中标单位")
+						}
+						if v.ColonKV != nil {
+							delete(v.ColonKV.KvTags, "中标单位")
+						}
+					}
+				}
+				for _, v := range j.PackageInfo {
+					delete(v, "winner")
+					delete(v, "bidamount")
+				}
+				j.Winnerorder = nil
+				if jf != nil && jf.Winnerorder != nil {
+					jf.Winnerorder = nil
+				}
+			}
+		}
+		//重新取出清理过后的中标候选人重置候选人
+		resetWinnerorder(j)
+		//打分
+		doc, result, _id := funcAnalysis(j, e)
+		//标签是否保存
+		if ju.IsSaveTag {
+			go otherNeedSave(j, result, e)
+		}
+		//从排序结果中取值
+		tmp := map[string]interface{}{} //抽取值
+		tmp["spidercode"] = j.SpiderCode
+		tmp["site"] = j.Site
+		if len(*j.Jsondata) > 0 {
+			tmp["jsondata"] = j.Jsondata
+		}
+		//字段-抽取来源
+		fieldSource := make(map[string]interface{}, 0)
+		//字段-抽取来源
+		for k, val := range result {
+			if k == "qualifies" {
+				squalifies := make([]interface{}, 0)
+				squalifiesMap := make(map[string]*scoreIndex, 0)
+				for _, kv := range val {
+					skey := kv.RuleText
+					if kv.Score > 0 {
+						if squalifiesMap[skey] == nil {
+							squalifiesMap = map[string]*scoreIndex{
+								skey: &scoreIndex{
+									Score: kv.Score,
+									Index: len(squalifies),
+								},
+							}
+							squalifies = append(squalifies, map[string]interface{}{
+								"key":   skey,
+								"value": kv.Value,
+							})
+						} else {
+							if squalifiesMap[skey].Score < kv.Score {
+								squalifies[squalifiesMap[skey].Index] = map[string]interface{}{
+									"key":   skey,
+									"value": kv.Value,
+								}
+							}
+						}
+					}
+				}
+				tmp[k] = squalifies
+				continue
+			}
+
+			//预算-中标金额字段-特殊情况特殊处理
+			if k == "bidamount" || k == "budget" {
+				b, index := calculateAbnormalMoney(val)
+				if b {
+					new_v := val[index]
+					tmp[new_v.Field] = new_v.Value
+					fieldSource[new_v.Field] = map[string]interface{}{
+						"ext_type": new_v.Type,
+						"ext_from": new_v.ExtFrom,
+					}
+					tmp["is_dif_ratioMoney"] = true
+					continue
+				}
+			}
+
+			for _, v := range val { //取第一个非负数,项目名称除外//存0是否有效
+				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Score > -1 {
+					tmp[v.Field] = v.Value
+					fieldSource[v.Field] = map[string]interface{}{
+						"ext_type": v.Type,
+						"ext_from": v.ExtFrom,
+					}
+					break
+				}
+				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
+					tmp[v.Field] = v.Value
+					fieldSource[v.Field] = map[string]interface{}{
+						"ext_type": v.Type,
+						"ext_from": v.ExtFrom,
+					}
+					//中标单位~含字母判断~对比企业库
+					if (v.Field == "winner" || v.Field == "buyer") && letter_entity.MatchString(qu.ObjToString(v.SourceValue)) {
+						qyxy_data := ju.Qyxy_Mgo.FindOne("qyxy_std", map[string]interface{}{
+							"company_name": qu.ObjToString(v.SourceValue),
+						})
+						if qyxy_data != nil && len(qyxy_data) > 0 {
+							tmp[v.Field] = v.SourceValue
+						}
+					}
+
+					break
+				}
+			}
+		}
+		tmp["winner"] = strings.ReplaceAll(qu.ObjToString(tmp["winner"]), ",,", ",")
+		if len(j.PackageInfo) > 15 {
+			for k, v := range j.PackageInfo {
+				j.PackageInfo = map[string]map[string]interface{}{}
+				j.PackageInfo[k] = v
+				break
+			}
+		}
+		if len(j.PackageInfo) > 0 { //分包信息
+			tmp["package"] = j.PackageInfo
+			//包预算,中标金额合并大于抽取就覆盖
+			tmpBidamount, tmpBudget, tmpAgencyfee := qu.Float64All(0), qu.Float64All(0), qu.Float64All(0)
+			//s_winner逗号分隔拼接,分包中标人
+			var tmpstr, savewinner []string
+			//按包排序
+			for b, v := range j.PackageInfo {
+				if v["winner"] != nil && v["winner"] != "" {
+					tmpstr = append(tmpstr, b)
+				}
+			}
+			//包预算,中标金额合并大于抽取就覆盖
+			if len(j.PackageInfo) > 1 {
+				//包数大于1累加
+				for _, v := range j.PackageInfo {
+					if v["budget"] != nil {
+						tmpBudget = precisionAddFloat(tmpBudget, qu.Float64All(v["budget"]))
+					}
+					if v["bidamount"] != nil {
+						tmpBidamount = precisionAddFloat(tmpBidamount, qu.Float64All(v["bidamount"]))
+					}
+					if v["agencyfee"] != nil {
+						tmpAgencyfee = precisionAddFloat(tmpAgencyfee, qu.Float64All(v["agencyfee"]))
+					}
+				}
+				if qu.Float64All(tmp["budget"]) < tmpBudget {
+					fieldSource["budget"] = map[string]interface{}{
+						"ext_type": "",
+						"ext_from": "package",
+					}
+					tmp["budget"] = tmpBudget
+				}
+
+				if qu.Float64All(tmp["agencyfee"]) < tmpAgencyfee {
+					fieldSource["agencyfee"] = map[string]interface{}{
+						"ext_type": "",
+						"ext_from": "package",
+					}
+					tmp["agencyfee"] = tmpAgencyfee
+				}
+
+				if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
+					fieldSource["bidamount"] = map[string]interface{}{
+						"ext_type": "",
+						"ext_from": "package",
+					}
+					tmp["bidamount"] = tmpBidamount
+				}
+
+				//if qu.Float64All(tmp["bidamount"]) > 0 && qu.Float64All(tmp["budget"]) > 0 && (qu.Float64All(tmp["bidamount"])/10 > qu.Float64All(tmp["budget"])) {
+				//	fieldSource["bidamount"] = map[string]interface{}{
+				//		"ext_type": "",
+				//		"ext_from": "package",
+				//	}
+				//	tmp["bidamount"] = tmpBidamount
+				//} else if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
+				//	fieldSource["bidamount"] = map[string]interface{}{
+				//		"ext_type": "",
+				//		"ext_from": "package",
+				//	}
+				//	tmp["bidamount"] = tmpBidamount
+				//}
+			} else {
+				//包数等于1,tmp没有值取包里的值
+				if tmp["budget"] == nil || tmp["budget"] == 0 {
+					for _, v := range j.PackageInfo {
+						if v["budget"] != nil {
+							fieldSource["budget"] = map[string]interface{}{
+								"ext_type": "",
+								"ext_from": "package",
+							}
+							tmp["budget"] = v["budget"]
+						}
+					}
+				}
+
+				if tmp["agencyfee"] == nil || tmp["agencyfee"] == 0 {
+					for _, v := range j.PackageInfo {
+						if v["agencyfee"] != nil {
+							fieldSource["agencyfee"] = map[string]interface{}{
+								"ext_type": "",
+								"ext_from": "package",
+							}
+							tmp["agencyfee"] = v["agencyfee"]
+						}
+					}
+				}
+
+				if tmp["bidamount"] == nil || tmp["bidamount"] == 0 {
+					for _, v := range j.PackageInfo {
+						if v["bidamount"] != nil {
+							fieldSource["bidamount"] = map[string]interface{}{
+								"ext_type": "",
+								"ext_from": "package",
+							}
+							tmp["bidamount"] = v["bidamount"]
+						}
+					}
+				}
+			}
+			//s_winner逗号分隔拼接,分包中标人
+			sort.Strings(tmpstr)
+			for _, v := range tmpstr {
+				winner := qu.ObjToString(j.PackageInfo[v]["winner"])
+				new_winner := clearWinnerReg.ReplaceAllString(winner, "")
+				if new_winner == "" {
+					continue
+				}
+				//名称黑名单
+				if unPackageWinnerReg.MatchString(new_winner) {
+					continue
+				}
+				savewinner = append(savewinner, new_winner)
+			}
+			if (savewinner == nil || len(savewinner) == 0) && tmp["winner"] != nil {
+				tmp["s_winner"] = tmp["winner"]
+				fieldSource["s_winner"] = fieldSource["winner"]
+			} else if savewinner != nil {
+				if len(savewinner) == 1 && tmp["winner"] != nil {
+					tmp["s_winner"] = tmp["winner"]
+					fieldSource["s_winner"] = fieldSource["winner"]
+				} else {
+					savewinner = RemoveReplicaSliceString(savewinner)
+					tmp["s_winner"] = strings.Join(savewinner, ",")
+					fieldSource["s_winner"] = map[string]interface{}{
+						"ext_type": "",
+						"ext_from": "package",
+					}
+				}
+			}
+		} else if tmp["winner"] != nil {
+			//没有分包取winner
+			tmp["s_winner"] = tmp["winner"]
+			fieldSource["s_winner"] = fieldSource["winner"]
+		}
+
+		if len(j.Winnerorder) > 0 { //候选人信息
+			for i, v := range j.Winnerorder {
+				if v["price"] != nil {
+					tmpPrice := clear.ObjToMoney([]interface{}{v["price"], ""}, j.SpiderCode, j.IsClearnMoney)
+					if tmpPrice[len(tmpPrice)-1].(bool) {
+						j.Winnerorder[i]["price"] = tmpPrice[0]
+					} else {
+						delete(j.Winnerorder[i], "price")
+					}
+				}
+			}
+			tmp["winnerorder"] = j.Winnerorder
+		}
+		//处理附件
+		var resultf map[string][]*ju.ExtField
+		ffield := map[string]interface{}{}
+		if jf != nil {
+			_, resultf, _ = funcAnalysis(jf, e)
+			for _, val := range resultf {
+				for _, v := range val { //取第一个非负数
+					if v.Score > -1 {
+						ffield[v.Field] = v.Value
+						if tmp[v.Field] == nil || tmp[v.Field] == "" {
+							if v.Field == "addressing" {
+								break
+							}
+							if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Value.(float64) > 100 && v.Value.(float64) < 50000000000 {
+								tmp[v.Field] = v.Value
+								fieldSource[v.Field] = map[string]interface{}{
+									"ext_type": v.Type,
+									"ext_from": "ff",
+								}
+								break
+							}
+							if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
+								if v.Field == "winner" && j.Category == "招标" && j.CategorySecond != "单一" {
+									break //此方法逻辑上已经不会达到这里 winner
+								}
+								tmp[v.Field] = v.Value
+								fieldSource[v.Field] = map[string]interface{}{
+									"ext_type": v.Type,
+									"ext_from": "ff",
+								}
+								//中标单位~含字母判断~对比企业库
+								if (v.Field == "winner" || v.Field == "buyer") && letter_entity.MatchString(qu.ObjToString(v.SourceValue)) {
+									qyxy_data := ju.Qyxy_Mgo.FindOne("qyxy_std", map[string]interface{}{
+										"company_name": qu.ObjToString(v.SourceValue),
+									})
+									if qyxy_data != nil && len(qyxy_data) > 0 {
+										tmp[v.Field] = v.SourceValue
+									}
+								}
+
+								break
+							}
+						}
+						break
+					}
+				}
+			}
+			if len(jf.PackageInfo) > 0 { //分包信息
+				ffield["package"] = jf.PackageInfo
+			}
+			if len(jf.Winnerorder) > 0 { //候选人信息
+				ffield["winnerorder"] = jf.Winnerorder
+			}
+		}
+
+		//添加字段来源
+		tmp["field_source"] = fieldSource
+		//是否为不规则表格字段
+		if j.IsUnRulesTab {
+			tmp["is_UnRules_Tab"] = j.IsUnRulesTab
+		}
+		for k, v := range *doc {
+			if utf8.RuneCountInString(qu.ObjToString(v)) > 100000 {
+				(*doc)[k] = []rune(qu.ObjToString(v))[:100000]
+			}
+			//去重冗余字段
+			if delFiled(k) {
+				continue
+			}
+			if tmp[k] == nil && k != "project_completedate" && k != "project_startdate" {
+				tmp[k] = v
+			}
+		}
+		//质量审核
+		if ju.QualityAudit {
+			e.QualityAudit(tmp)
+		}
+		//落款识别
+		e.inscribeRecognize(&tmp, *j.Data)
+
+		//城市抽取
+		if e.IsExtractCity {
+			//e.NewExtractCity(j, &tmp) //旧版
+			e.ExtractRegionInfo(j, &tmp, true)
+			e.ExtractRegionClean(&tmp)
+		}
+		//品牌抽取
+		if ju.IsBrandGoods {
+			tmp["checkhas"] = map[string]int{
+				"hastable": j.HasTable,
+				"hasgoods": j.HasGoods,
+				"hasbrand": j.HasBrand,
+				"haskey":   j.HasKey,
+			}
+			if len(j.BrandData) > 0 {
+				tmp["tablebrand"] = j.BrandData
+			}
+		}
+		//prince和number抽取
+		if ju.IsPriceNumber {
+			priceNumberLen := len(j.PriceNumberData)
+			if priceNumberLen > 1 { //table数据去重
+				tmpPriceNumberData := []map[string]interface{}{}
+				tableStrs := map[string]bool{}
+				for _, tb := range j.PriceNumberData {
+					has := false
+					bytes, _ := json.Marshal(tb)
+					str := string(bytes)
+					if len(tableStrs) > 0 && tableStrs[str] {
+						has = true
+					} else {
+						tableStrs[str] = true
+					}
+					if !has {
+						for _, data := range tb {
+							tmpPriceNumberData = append(tmpPriceNumberData, data)
+						}
+					}
+				}
+				tmp["pricenumber"] = tmpPriceNumberData
+			} else if priceNumberLen == 1 {
+				tmp["pricenumber"] = j.PriceNumberData[0]
+			}
+		}
+		//所有kv组成的字符串
+		var kvtext bytes.Buffer
+		blocks := make([]ju.BlockAndTag, 0)
+		for _, v := range j.Block {
+			//分包和标签
+			if ju.SaveBlock {
+				xx, _ := json.Marshal(v)
+				tmpblock := new(ju.TmpBlock)
+				err := json.Unmarshal(xx, &tmpblock)
+				if err != nil {
+					if v.BPackage != nil {
+						bpb, _ := json.Marshal(v.BPackage)
+						tmpblock.BPackage = string(bpb)
+					}
+					tmpblock = rangeBlockToJson(v, *tmpblock)
+				}
+				blocks = append(blocks, ju.BlockAndTag{v.Tag, tmpblock})
+			}
+			//把所有kv组装成一个字符串,存库
+			for _, jv := range []*ju.JobKv{v.ColonKV, v.SpaceKV, v.TableKV} {
+				if jv == nil {
+					continue
+				}
+				for jv_k, jv_v := range jv.KvTags {
+					for _, jv_vv := range jv_v {
+						kvtext.WriteString(jv_k)
+						kvtext.WriteString(":")
+						kvtext.WriteString(jv_vv.Value)
+						kvtext.WriteString("\n")
+					}
+				}
+			}
+		}
+		if kvtext.Len() > 0 {
+			tmp["kvtext"] = kvtext.String()
+		}
+		if len(blocks) > 0 {
+			if blocksBytes, err := json.Marshal(blocks); err == nil {
+				if utf8.RuneCount(blocksBytes) < 100000 {
+					tmp["blocks"] = string(blocksBytes)
+				}
+			}
+		}
+		tmp["dataging"] = j.Dataging
+		/*for k, v := range *j.Data {
+			if f[k] {
+				tmp[k] = v
+			}
+		}
+		for k := range tmp {
+			if !f[k]{
+				delete(tmp,k)
+			}
+		}*/
+		//检查字段
+		tmp = checkFields(tmp, *j.Data)
+
+		if tmp["projectname"] == nil || tmp["projectname"] == "" {
+			tmp["projectname"] = j.Title
+		}
+		tmp["repeat"] = 0
+		if ju.Ffield {
+			if len(ffield) > 0 {
+				tmp["ffield"] = ffield
+			}
+		}
+		//临时保存指定字段数据
+		//new_tmp := map[string]interface{}{}
+		//new_tmp["area"] = qu.ObjToString(tmp["area"])
+		//new_tmp["city"] = qu.ObjToString(tmp["city"])
+		//new_tmp["district"] = qu.ObjToString(tmp["district"])
+		//tmp = new_tmp
+
+		if e.TaskInfo.TestColl == "" {
+			if len(tmp) > 0 { //保存抽取结果
+				delete(tmp, "_id")
+				tmparr := []map[string]interface{}{
+					map[string]interface{}{
+						"_id": qu.StringTOBsonId(_id),
+					},
+					map[string]interface{}{"$set": tmp},
+				}
+				e.RWMutex.Lock()
+				e.BidArr = append(e.BidArr, tmparr)
+				e.BidTotal++
+				e.RWMutex.Unlock()
+			}
+			if ju.SaveResult {
+				id := tmp["_id"]
+				tmp["result"] = result
+				tmp["resultf"] = resultf
+				delete(tmp, "_id")
+				tmparr := []map[string]interface{}{
+					map[string]interface{}{
+						"_id": id,
+					},
+					map[string]interface{}{"$set": tmp},
+				}
+				e.RWMutex.Lock()
+				e.ResultArr = append(e.ResultArr, tmparr)
+				e.RWMutex.Unlock()
+			}
+		} else { //测试结果~结果追踪
+			delete(tmp, "_id")
+			delete(tmp, "fieldall")
+			if len(j.BlockPackage) > 0 { //分包详情
+				if len(j.BlockPackage) > 10 {
+					tmp["epackage"] = "分包异常"
+				} else {
+					bs, _ := json.Marshal(j.BlockPackage)
+					tmp["epackage"] = string(bs)
+				}
+			}
+			tmp["result"] = result
+			//tmp["resultf"] = resultf
+			//_,err :=db.Mgo.Get().DB("zhengkun").C("result_data").Upsert(`{"_id":"`+_id+`"}`,map[string]interface{}{"$set": tmp})
+			//log.Debug("save:",err)
+			b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
+			if !b {
+				log.Debug(e.TaskInfo.TestColl, _id)
+			}
+		}
+	}, func(err interface{}) {
+		log.Debug("AnalysisSaveResult err", err)
+	})
+}
+
+//获取抽取结果map[string][]interface{},lua脚本使用
+func GetResultMapForLua(j *ju.Job) map[string][]map[string]interface{} {
+	defer qu.Catch()
+	result := map[string][]map[string]interface{}{}
+	for key, val := range j.Result {
+		if result[key] == nil {
+			result[key] = []map[string]interface{}{}
+		}
+		for _, v := range val {
+			tmp := map[string]interface{}{
+				"field":     v.Field,
+				"code":      v.Code,
+				"ruletext":  v.RuleText,
+				"value":     v.Value,
+				"type":      v.Type,
+				"matchtype": v.MatchType,
+				"extfrom":   v.ExtFrom,
+			}
+			result[key] = append(result[key], tmp)
+		}
+	}
+	return result
+}
+
+//抽取日志
+func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *RegLuaInfo, t *TaskInfo) {
+	defer qu.Catch()
+	if !t.IsEtxLog {
+		return
+	}
+	logdata := map[string]interface{}{
+		"code":       qu.If(v.Code == "", "kv", v.Code),
+		"name":       v.Name,
+		"type":       ftype,
+		"ruletext":   v.RuleText,
+		"islua":      v.IsLua,
+		"field":      v.Field,
+		"version":    t.Version,
+		"taskname":   t.Name,
+		"before":     before,
+		"extinfo":    extinfo,
+		"sid":        sid,
+		"comeintime": time.Now().Unix(),
+	}
+	lock.Lock()
+	ExtLogs[t] = append(ExtLogs[t], logdata)
+	lock.Unlock()
+}
+func BeforeAddClearFnLog(ftype, name, sid, before, matchtype string, ext *ju.ExtField, e *ExtractTask) {
+	exts := []map[string]interface{}{}
+	exts = append(exts, map[string]interface{}{
+		"field":     ext.Field,
+		"code":      ext.Code,
+		"type":      ftype,
+		"matchtype": matchtype,
+		"extfrom":   ext.ExtFrom,
+		"value":     ext.Value,
+	})
+	extinfo := map[string]interface{}{
+		ext.Field: exts,
+	}
+	AddClearFnLog(ftype, sid, before, extinfo, ext.Code, name, ext.Field, e.TaskInfo)
+}
+func AddClearFnLog(ftype, sid string, before interface{}, extinfo interface{}, code, name, field string, t *TaskInfo) {
+	defer qu.Catch()
+	if !t.IsEtxLog {
+		return
+	}
+	logdata := map[string]interface{}{
+		"code":       code,
+		"name":       name,
+		"type":       ftype,
+		"ruletext":   "",
+		"islua":      false,
+		"field":      field,
+		"version":    t.Version,
+		"taskname":   t.Name,
+		"before":     before,
+		"extinfo":    extinfo,
+		"sid":        sid,
+		"comeintime": time.Now().Unix(),
+	}
+	lock.Lock()
+	ExtLogs[t] = append(ExtLogs[t], logdata)
+	lock.Unlock()
+}
+
+//保存抽取日志
+func SaveExtLog() {
+	defer qu.Catch()
+	tmpLogs := map[*TaskInfo][]map[string]interface{}{}
+	lock.Lock()
+	tmpLogs = ExtLogs
+	ExtLogs = map[*TaskInfo][]map[string]interface{}{}
+	lock.Unlock()
+	for k, v := range tmpLogs {
+		if len(v) < saveLimit {
+			db.Mgo.SaveBulk(k.TrackColl, v...)
+		} else {
+			for {
+				if len(v) > saveLimit {
+					tmp := v[:saveLimit]
+					db.Mgo.SaveBulk(k.TrackColl, tmp...)
+					v = v[saveLimit:]
+				} else {
+					db.Mgo.SaveBulk(k.TrackColl, v...)
+					break
+				}
+			}
+		}
+	}
+	time.AfterFunc(10*time.Second, SaveExtLog)
+}
+
+//保存其他
+//kv、表格、块上的标签凡是新的标签都入库
+//val  type   times   firstid  createtime 判定field
+func otherNeedSave(j *ju.Job, result map[string][]*ju.ExtField, e *ExtractTask) {
+	now := time.Now().Unix()
+	coll := e.TaskInfo.TestColl
+	if coll == "" {
+		coll = "extract_tag_result"
+	} else {
+		coll += "_tag"
+	}
+	datas := []map[string]interface{}{}
+	kv := map[string]int{}
+	for _, v := range j.Block {
+		//
+		for _, vv := range []*ju.JobKv{v.ColonKV, v.TableKV, v.SpaceKV} {
+			if vv == nil || vv.KvTags == nil {
+				continue
+			}
+			for kkk, vvv := range vv.KvTags {
+				for _, vvvv := range vvv {
+					if vvvv.IsInvalid {
+						kv[kkk] = kv[kkk] + 1
+						break
+					}
+				}
+			}
+		}
+		for _, vv := range v.NotClassifyTitles {
+			datas = append(datas, map[string]interface{}{
+				"val":        vv,
+				"times":      0,
+				"type":       "block",
+				"firstid":    j.SourceMid,
+				"createtime": now,
+			})
+			if len(datas) == saveLimit {
+				db.Mgo.SaveBulk(coll, datas...)
+				datas = []map[string]interface{}{}
+			}
+		}
+	}
+	for k, v := range kv {
+		datas = append(datas, map[string]interface{}{
+			"val":        k,
+			"times":      v,
+			"type":       "kv",
+			"firstid":    j.SourceMid,
+			"createtime": now,
+		})
+		if len(datas) == saveLimit {
+			db.Mgo.SaveBulk(coll, datas...)
+			datas = []map[string]interface{}{}
+		}
+	}
+	if len(datas) > 0 {
+		db.Mgo.SaveBulk(coll, datas...)
+	}
+}
+
+func rangeBlockToJson(j *ju.Block, tmpblock ju.TmpBlock) (b *ju.TmpBlock) {
+	if j == nil {
+		return nil
+	}
+	if len(j.Block) > 0 {
+		for i, v := range j.Block {
+			rangetmp := new(ju.TmpBlock)
+			vb, _ := json.Marshal(v)
+			json.Unmarshal(vb, &rangetmp)
+			tmpblock.Block[i] = rangeBlockToJson(v, *rangetmp)
+		}
+	}
+	if j.ColonKV != nil {
+		cb, _ := json.Marshal(j.ColonKV)
+		tmpblock.ColonKV = string(cb)
+	}
+	if j.SpaceKV != nil {
+		sb, _ := json.Marshal(j.SpaceKV)
+		tmpblock.SpaceKV = string(sb)
+	}
+	if j.TableKV != nil {
+		tb, _ := json.Marshal(j.TableKV)
+		tmpblock.TableKV = string(tb)
+	}
+	return &tmpblock
+}

+ 4 - 2
src/jy/extract/extractudp.go

@@ -71,18 +71,21 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 				Udpclient.WriteUdp([]byte(skey), mu.OP_NOOP, ra)
 			} else if stype == "update_rule" {
 				ju.IsUpdateRule = true
+			} else if stype == "monitor" {
+				log.Debug("收到监测......")
+				Udpclient.WriteUdp([]byte("monitor ok"), mu.OP_NOOP, ra)
 			} else {
 				sid, _ := rep["gtid"].(string)
 				eid, _ := rep["lteid"].(string)
 				if sid == "" || eid == "" {
 					log.Debug("err", "sid=", sid, ",eid=", eid)
 				} else {
+					//新版本控制抽取
 					udpinfo, _ := rep["stype"].(string)
 					if udpinfo == "" {
 						udpinfo = "udpok"
 					}
 					IsExtStop = false
-					//新版本控制抽取
 					ExtractByUdp(sid, eid, ra)
 					if !IsExtStop {
 						log.Debug("抽取完成udp通知抽取id段-控制台", udpinfo, sid, "~", eid)
@@ -94,7 +97,6 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					//发布数据~重采数据~测试流程
 					//key := sid + "-" + eid + "-" + qu.ObjToString(rep["stype"])
 					//go Udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
-					//
 					//log.Debug("udp通知抽取id段", sid, " ", eid)
 					//ExtractByUdp(sid, eid, ra)
 					//for _, m := range nextNodes {

+ 562 - 0
src/jy/extract/extraxtmethod.go

@@ -0,0 +1,562 @@
+package extract
+
+import (
+	"fmt"
+	"github.com/shopspring/decimal"
+	"jy/clear"
+	"jy/pretreated"
+	ju "jy/util"
+	qu "qfw/util"
+	"qfw/util/redis"
+	"regexp"
+	"strings"
+	"sync"
+	"unicode/utf8"
+)
+
+type scoreIndex struct {
+	Score float64
+	Index int
+}
+
+var (
+	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
+	JYUrl                                        = "https://www.jianyu360.cn/article/content/%s.html"
+	cut                                          = ju.NewCut()                                 //获取正文并清理
+	ExtLogs                                      map[*TaskInfo][]map[string]interface{}        //抽取日志
+	TaskList                                     map[string]*ExtractTask                       //任务列表
+	ClearTaskList                                map[string]*ClearTask                         //清理任务列表
+	saveLimit                                                                           = 100  //抽取日志批量保存
+	PageSize                                                                            = 5000 //查询分页
+	Fields                                                                              = `{"jyfb_data":1,"approvecode":1,"approvenumber":1,"projecttype":1,"approvestatus":1,"total_investment":1,"funds":1,"owner":1,"projectaddr":1,"projectperiod":1,"project_scale":1,"project_person":1,"project_phone":1,"project_startdate":1,"project_completedate":1,"construction_area":1,"floor_area":1,"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"review_experts":1,"purchasing":1}`
+	Fields2                                                                             = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
+	NiJianField                                                                         = []string{
+		"string#approvecode",
+		"string#total_investment",
+		"string#funds",
+		"string#owner",
+		"string#projectaddr",
+		"string#projectperiod",
+		"string#project_scale",
+		"string#project_person",
+		"string#project_phone",
+		"string#approvenumber",
+		"string#projecttype",
+		"string#approvestatus",
+		"time#project_startdate",
+		"time#project_completedate",
+		"map#construction_area",
+		"map#floor_area",
+	}
+	spidercode = map[string]bool{
+		"gd_zhsggzyjyzx_jsgc_fjczbgg":     true,
+		"js_szgyyqggzyjyzx_jsgc_zjfbgs":   true,
+		"zj_tzsyhggzyjyzx_jsgc_kbqk":      true,
+		"hb_tmsggzyjyxxw_jsgc_kbqk":       true,
+		"zj_nbsyyggzyjyw_jsgc_kbqk":       true,
+		"zj_zjsggzyjyzx_jyxx_kbjg":        true,
+		"zj_zjzdgcjyw_ztbjglxx_kbjg":      true,
+		"zj_lssggzyjyw_jsgc_kbsk":         true,
+		"zj_qzslyxggzyjyzx_gggs_xkbjl":    true,
+		"sc_mssggzydzjypt_jsgc_kbjl":      true,
+		"sc_pzhsggzyjyfwzx_jsgc_kbylb":    true,
+		"a_zgzbtbggfwpt_wasjgf_ss_kbjl":   true,
+		"a_hbszbtbggfwpt_kbjl":            true,
+		"a_szsjsgcjyfwzxbafzx_kbqkgs":     true,
+		"a_szldzbyxgs_kbxx":               true,
+		"zj_zssssxggzyjyw_gcjs_kbjggs":    true,
+		"gd_szszfhjsj_kbqkgs":             true,
+		"a_gjggzyjypt_gcjs_kbjl":          true,
+		"a_gjggzyjypt_gcjs_kbjl_new":      true,
+		"zj_tzsyhggzyjyzx_kbjggg":         true,
+		"a_zgzbtbggfwpy_wasjgf_kbjl_lsbl": true,
+		"ah_czsggzyjyw_jsgc_kbjl":         true,
+		"ah_czsggzyjyw_zfcg_kbxx":         true,
+		"ah_whsggzyjyfww_kbxx_cgxm":       true,
+		"ah_whsggzyjyfww_kbxx_gcxm":       true,
+	}
+	clearMoneyReg *regexp.Regexp = regexp.MustCompile("(PPP[\\s]?项目|新城镇建设|国土资源局|基金管理|高速公路|水系生态治理|水生态建设|棚改旧改|棚户区改造|棚改项目|危房改造项目|土地整理|高速公路项目)")
+	sortStrReg    *regexp.Regexp = regexp.MustCompile("(招标|采购|需求|投标|[竞询议]报价|公示|单一来源|询价|成交|中标)")
+	clearStrReg   *regexp.Regexp = regexp.MustCompile("((设计|施工|招标)图|业绩|图纸)")
+	clearbondReg  *regexp.Regexp = regexp.MustCompile("(无|不|否|金额)") //保证金
+	textSelectReg *regexp.Regexp = regexp.MustCompile("(中标(单位|供应商|金额|价格))")
+)
+
+var clearWinnerReg = regexp.MustCompile("名称|施工|拟定供应商名称|:|:")
+var unPackageWinnerReg = regexp.MustCompile("(重新招标)")
+
+//包含字母的实体单位
+var letter_entity = regexp.MustCompile("^[\u4E00-\u9FA5]{1,10}[A-Za-z]{1,5}[\u4E00-\u9FA5]{1,10}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])$")
+
+//落款单位抽取
+var inscribe_entity_1 = regexp.MustCompile("\n([\\s]+)?([\u4E00-\u9FA5].{2,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会|体]))\n([\\s]+)?([0-9]+年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)")
+var inscribe_entity_2 = regexp.MustCompile("[\n。]([\\s]+)?([\u4E00-\u9FA5].{2,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会|体]))([\\s]+)?([0-9]+年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)\n([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))")
+
+//发布时间识别
+var inscribe_publishtime_1 = regexp.MustCompile("(\\d{4}[年-]\\d{1,2}[月-]\\d{1,2}[日-]*)")
+var exclude_entity = regexp.MustCompile("(咨询|工程造价|交易|代理|投资|(管理|工程)有限|(项目|工程)管理|采购|监理|服务|招标|招投标)")
+
+//清洗正文
+func CleanDetailText(detail string, summary string) string {
+	detail = regexp.MustCompile(`<!--[\w\W]*?-->`).ReplaceAllString(detail, "")
+	detail = pretreated.RepairCon(detail)
+	detail = ju.CutLableStr(summary + "\n" + detail)
+	detail = cut.ClearHtml(summary + "\n" + detail)
+	return detail
+}
+
+//综合选取detail与contenthtml情况  true采用正文
+func SelectDetailSourceText(detail string, contenthtml string) bool {
+	if len(detail) < 1000 {
+		return false
+	}
+	if textSelectReg.MatchString(detail) && !textSelectReg.MatchString(contenthtml) {
+		return true
+	}
+	return false
+}
+
+//遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
+func file2text(doc *map[string]interface{}) {
+	mnameone := map[string]bool{}
+	mname := map[string]bool{}
+	murl := map[string]string{}
+	//if attach_text, ok := (*doc)["new_attach_text"].(map[string]interface{}); ok {
+	if attach_text, ok := (*doc)["attach_text"].(map[string]interface{}); ok {
+		for _, attachs := range attach_text {
+			if fileinfos, ok := attachs.(map[string]interface{}); ok {
+				for _, fileinfo := range fileinfos {
+					if ff, ok := fileinfo.(map[string]interface{}); ok {
+						attach_url := qu.ObjToString(ff["attach_url"])
+						ffname := qu.ObjToString(ff["file_name"])
+						if clearStrReg.MatchString(ffname) {
+							continue
+						}
+						mname[ffname] = true
+						murl[ffname] = attach_url
+						if sortStrReg.MatchString(ffname) {
+							mnameone[ffname] = true
+						}
+					}
+				}
+			}
+		}
+	}
+	tmpstr := ""
+	for k := range mnameone {
+		if utf8.RuneCountInString(tmpstr) > qu.IntAllDef(ju.Config["filelength"], 150000) {
+			(*doc)["detailfile"] = tmpstr
+			return
+		}
+		bs := ju.OssGetObject(murl[k])
+		if utf8.RuneCountInString(bs) <= qu.IntAllDef(ju.Config["filelength"], 150000) {
+			tmpstr += bs + "\n"
+		} else {
+			tmpstr += bs[:qu.IntAllDef(ju.Config["filelength"], 150000)] + "\n"
+		}
+	}
+	for k := range mname {
+		if mnameone[k] {
+			continue
+		}
+		if utf8.RuneCountInString(tmpstr) > qu.IntAllDef(ju.Config["filelength"], 150000) {
+			(*doc)["detailfile"] = tmpstr
+			return
+		}
+		bs := ju.OssGetObject(murl[k])
+		if utf8.RuneCountInString(bs) <= qu.IntAllDef(ju.Config["filelength"], 150000) {
+			tmpstr += bs + "\n"
+		} else {
+			tmpstr += bs[:qu.IntAllDef(ju.Config["filelength"], 150000)] + "\n"
+		}
+	}
+	(*doc)["detailfile"] = strings.ReplaceAll(tmpstr, "附件", "")
+}
+
+//判断-附件分包是否有效
+func isUsedPackageJF(jf_package map[string]map[string]interface{}) bool {
+	if jf_package == nil || len(jf_package) == 0 {
+		return false
+	}
+	for _, pack := range jf_package {
+		budget := qu.Float64All(pack["budget"])
+		bidamount := qu.Float64All(pack["bidamount"])
+		//winner := qu.Float64All(pack["winner"])
+		//text := qu.ObjToString(pack["text"])
+		//13.投标报价\n13.1本次报价
+		//14.投标报价\n14.1投标报价
+		if budget > 0.0 && budget <= 1.0 {
+			return false
+		}
+		if bidamount > 0.0 && bidamount <= 1.0 {
+			return false
+		}
+	}
+	return true
+}
+
+//落款识别~采购单位
+func (e *ExtractTask) inscribeRecognize(tmp *map[string]interface{}, j_data map[string]interface{}) {
+	//落款实体
+	if qu.ObjToString((*tmp)["buyer"]) == "" && ju.Inscribe &&
+		!(qu.ObjToString((*tmp)["toptype"]) == "拟建" && qu.ObjToString((*tmp)["subtype"]) == "拟建") {
+		if new_buyer := InscribeEntity(qu.ObjToString(j_data["detail"])); new_buyer != "" {
+			(*tmp)["buyer"] = new_buyer
+		}
+	}
+	//拟建不能存buyer
+	if qu.ObjToString((*tmp)["toptype"]) == "拟建" &&
+		qu.ObjToString((*tmp)["subtype"]) == "拟建" {
+		delete((*tmp), "buyer")
+	}
+
+	//识别发布时间
+	if qu.IntAll(j_data["publishtime"]) == -1 {
+		if qu.IntAll((*tmp)["ext_publishtime"]) == 0 {
+			if ext_publishtime := InscribePublishtime(j_data); ext_publishtime > int64(0) {
+				(*tmp)["ext_publishtime"] = ext_publishtime
+			}
+		}
+	} else {
+		delete((*tmp), "ext_publishtime")
+	}
+}
+
+//识别实体
+func InscribeEntity(detail string) string {
+	new_str := ""
+	new_detail := pretreated.TextAfterRemoveTable(detail)
+	if len(new_detail) > 200 {
+		new_detail = detail[len(new_detail)-200:]
+	}
+	new_str = inscribe_entity_1.FindString(new_detail)
+	if new_str == "" {
+		new_str = inscribe_entity_2.FindString(new_detail)
+		if new_str != "" {
+			str1 := inscribe_entity_2.ReplaceAllString(new_str, "${2}")
+			str2 := inscribe_entity_2.ReplaceAllString(new_str, "${6}")
+			if str1 == str2 && str1 != "" {
+				new_str = str1
+			}
+		}
+	} else {
+		new_str = inscribe_entity_1.ReplaceAllString(new_str, "${2}")
+	}
+	if new_str != "" && exclude_entity.MatchString(new_str) {
+		new_str = ""
+	}
+	return new_str
+}
+
+//识别发布时间
+func InscribePublishtime(j_data map[string]interface{}) int64 {
+	//落款文本识别
+	detail := pretreated.TextAfterRemoveTable(qu.ObjToString(j_data["detail"]))
+	if len(detail) > 200 {
+		detail = detail[len(detail)-200:]
+	}
+	new_str := inscribe_entity_1.FindString(detail)
+	if new_str == "" {
+		new_str = inscribe_entity_2.FindString(detail)
+		if new_str != "" {
+			new_str = inscribe_entity_2.ReplaceAllString(new_str, "${5}")
+		}
+	} else {
+		new_str = inscribe_entity_1.ReplaceAllString(new_str, "${5}")
+	}
+	if data := clear.ObjToTimestamp([]interface{}{new_str}, ""); len(data) > 0 {
+		if ext_publishtime := qu.Int64All(data[0]); ext_publishtime > int64(0) {
+			return ext_publishtime
+		}
+	}
+	//附件名称识别
+	projectinfo := *qu.ObjToMap(j_data["projectinfo"])
+	attachments := *qu.ObjToMap(projectinfo["attachments"])
+	for _, v := range attachments {
+		info := *qu.ObjToMap(v)
+		filename := qu.ObjToString(info["filename"])
+		if pt_str := inscribe_publishtime_1.FindString(filename); pt_str != "" {
+			if data := clear.ObjToTimestamp([]interface{}{pt_str}, ""); len(data) > 0 {
+				if ext_publishtime := qu.Int64All(data[0]); ext_publishtime > int64(0) {
+					return ext_publishtime
+				}
+			}
+		}
+	}
+	return int64(0)
+}
+
+//处理折扣系数-
+func dealWithDiscountBid(tmp map[string]interface{}) float64 {
+	biddiscount := qu.Float64All(tmp["biddiscount"])
+	biddiscount_up := qu.Float64All(tmp["biddiscount_up"])
+	biddiscount_down := qu.Float64All(tmp["biddiscount_down"])
+	baseCount := float64(1)
+	if biddiscount_down > 0.0 {
+		num1 := decimal.NewFromFloat(baseCount)
+		num2 := decimal.NewFromFloat(biddiscount_down)
+		decimalValue := num1.Sub(num2)
+		res, _ := decimalValue.Float64()
+		return res
+	}
+	if biddiscount_up > 0.0 {
+		num1 := decimal.NewFromFloat(baseCount)
+		num2 := decimal.NewFromFloat(biddiscount_up)
+		decimalValue := num1.Add(num2)
+		res, _ := decimalValue.Float64()
+		//log.Debug("上浮后折扣系数:",res)
+		return res
+	}
+	if biddiscount > 0.0 {
+		if biddiscount > 1.0 && biddiscount <= 10.0 {
+			num1 := decimal.NewFromFloat(10.0)
+			num2 := decimal.NewFromFloat(biddiscount)
+			decimalValue := num2.Div(num1)
+			res, _ := decimalValue.Float64()
+			return res
+		} else if biddiscount > 10.0 {
+			num1 := decimal.NewFromFloat(100.0)
+			num2 := decimal.NewFromFloat(biddiscount)
+			decimalValue := num2.Div(num1)
+			res, _ := decimalValue.Float64()
+			//log.Debug("标准-⑩折扣系数:",res)
+			return res
+		} else {
+			//log.Debug("标准折扣系数:",biddiscount)
+			return biddiscount
+		}
+	}
+
+	return 0.0
+}
+
+//精度丢失-相加
+func precisionAddFloat(tmp1, tmp2 float64) float64 {
+	num1 := decimal.NewFromFloat(tmp1)
+	num2 := decimal.NewFromFloat(tmp2)
+	decimalValue := num2.Add(num1)
+	res, _ := decimalValue.Float64()
+	return res
+}
+
+//特殊金额-处理判断-倍率关系
+func calculateAbnormalMoney(val []*ju.ExtField) (bool, int) {
+	//金额结果只有两种 - 倍率关系10000 - 过10E
+	moneyIndex := []int{}
+	moneyArr := []float64{}
+	first_money := float64(0)
+	difValue := map[string]interface{}{}
+	for k, v := range val { //取第一个非负数,项目名称除外
+		if v.IsTrue && v.Score > -1 {
+			moneyArr = append(moneyArr, qu.Float64All(v.Value))
+			moneyIndex = append(moneyIndex, k)
+			key := ""
+			if m, ok := v.Value.(float64); ok {
+				key = fmt.Sprintf("%f", m)
+			} else {
+				key = qu.ObjToString(v.Value)
+			}
+			if difValue[key] == nil {
+				difValue[key] = 1
+			}
+			//if len(difValue) > 2 {
+			//	return false, 0
+			//}
+		}
+	}
+	//计算金额数组
+	if len(difValue) == 2 {
+		money_1, money_2 := float64(0), float64(0)
+		for k, v := range moneyArr {
+			if k == 0 {
+				money_1 = v
+			} else {
+				if v != money_1 {
+					money_2 = v
+					break
+				}
+			}
+		}
+		isRatio, new_money := false, float64(0) //判断金额是否为倍率关系
+		if money_1 != float64(0) && money_2 != float64(0) {
+			if money_1 == money_2*float64(10000) && money_1 >= 100000000 {
+				isRatio = true
+				new_money = money_2
+			}
+			if money_2 == money_1*float64(10000) && money_2 >= 100000000 {
+				isRatio = true
+				new_money = money_1
+			}
+
+			if isRatio { //采用新值
+				for k, v := range moneyArr {
+					if v == new_money {
+						return true, moneyIndex[k]
+					}
+				}
+			}
+		}
+	} else if len(difValue) > 2 { //多组金额
+		is_exists := false
+		for _, v := range moneyArr {
+			if v >= 1000000000 {
+				is_exists = true
+				first_money = v
+			}
+		}
+		if is_exists {
+			for k, v := range moneyArr {
+				if v*10000 == first_money {
+					return true, moneyIndex[k]
+				}
+			}
+		}
+	} else {
+
+	}
+
+	return false, 0
+}
+
+//筛选重复候选人-相关
+func filterRepeatWinArr(j *ju.Job) {
+	if j.SpiderCode == "sh_shszfhcxjsglwyh_jsgc_zhbhxrgs" {
+		sort_WinOrder_Arr := make([][]map[string]interface{}, 0)
+		sort_arr := make([]map[string]interface{}, 0)
+		for _, v := range j.Winnerorder {
+			sort := qu.IntAll(v["sort"])
+			if sort == 1 { //为一组
+				if len(sort_arr) > 0 {
+					sort_WinOrder_Arr = append(sort_WinOrder_Arr, sort_arr)
+				}
+				sort_arr = make([]map[string]interface{}, 0)
+			}
+			sort_arr = append(sort_arr, v)
+		}
+
+		if len(sort_arr) > 0 {
+			sort_WinOrder_Arr = append(sort_WinOrder_Arr, sort_arr)
+		}
+		if len(sort_WinOrder_Arr) > 0 { //有重复排序组-开始筛选清理
+			isIndex := 0
+			for index, winArr := range sort_WinOrder_Arr {
+				if len(winArr) > 0 {
+					if qu.ObjToString(winArr[0]["price"]) != "" &&
+						qu.ObjToString(winArr[0]["entname"]) != "" {
+						isIndex = index
+						break
+					}
+				}
+			}
+			j.Winnerorder = sort_WinOrder_Arr[isIndex]
+		}
+	}
+}
+
+//中标候选人经过清理之后,重新取出赋值
+func resetWinnerorder(j *ju.Job) {
+	if len(j.Winnerorder) == 0 {
+		return
+	}
+	maxlen := len(j.Winnerorder) - 1
+	//中标单位
+	//i := 0
+	winners := []*ju.ExtField{}
+	bidamounts := []*ju.ExtField{}
+
+	if maxlen > 0 {
+		//新增-指定爬虫中标候选人过滤
+		filterRepeatWinArr(j)
+		if qu.Float64All(j.Winnerorder[0]["sort"]) != 1 {
+			return
+		}
+		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
+		if j.Winnerorder[0]["price"] != nil {
+			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
+			if tmpPrice[len(tmpPrice)-1].(bool) {
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5, IsTrue: true})
+			}
+		}
+	}
+	if j.Result["winner"] == nil && len(winners) > 0 {
+		j.Result["winner"] = winners
+	} else if len(winners) > 0 {
+		j.Result["winner"] = append(j.Result["winner"], winners...)
+	}
+	if j.Result["bidamount"] == nil && len(bidamounts) > 0 {
+		j.Result["bidamount"] = bidamounts
+	} else if len(bidamounts) > 0 {
+		j.Result["bidamount"] = append(j.Result["bidamount"], bidamounts...)
+	}
+	if j.Result["winner"] == nil && len(j.Winnerorder) > 0 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
+		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
+		j.Result["winner"] = winners
+		if j.Winnerorder[0]["price"] != nil {
+			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
+			if tmpPrice[len(tmpPrice)-1].(bool) {
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5, IsTrue: true})
+			}
+			j.Result["bidamount"] = bidamounts
+		}
+	}
+}
+func RemoveReplicaSliceString(slc []string) []string {
+	result := make([]string, 0)
+	tempMap := make(map[string]bool, len(slc))
+	for _, e := range slc {
+		if tempMap[e] == false {
+			tempMap[e] = true
+			result = append(result, e)
+		}
+	}
+	return result
+}
+
+//辅助信息,如果没有排序先排序
+func auxInfo(j *ju.Job) map[string][]map[string]interface{} {
+	fieldalls := map[string][]map[string]interface{}{}
+	if j == nil {
+		return fieldalls
+	}
+	qykredis := redis.RedisPool[ju.QYK_RedisName].Get()
+	defer qykredis.Close()
+	db := 0
+	for field, val := range j.Result {
+		//ju.Sort(val)
+		if field == "buyer" {
+			db = ju.BuyerDB
+		} else if field == "winner" {
+			db = ju.WinnerDB
+		} else if field == "agency" {
+			db = ju.AgencyDB
+		}
+		sfields := []map[string]interface{}{}
+		for _, v := range val {
+			standardized := false
+			if _, err := qykredis.Do("SELECT", db); err != nil {
+				fmt.Println("redis select err", err)
+			} else {
+				rep, err := qykredis.Do("GET", v.Value)
+				if rep != nil && err == nil {
+					standardized = true
+				}
+			}
+			if field == "budget" || field == "bidamount" {
+				if !v.IsTrue {
+					continue
+				}
+			}
+			sfield := map[string]interface{}{
+				"val":          v.Value,
+				"type":         v.Type,
+				"score":        v.Score,
+				"blocktag":     v.BlockTag,
+				"sourceval":    v.SourceValue,
+				"standardized": standardized,
+			}
+			sfields = append(sfields, sfield)
+		}
+		fieldalls[field] = sfields
+	}
+	return fieldalls
+}

+ 104 - 25
src/jy/extract/score.go

@@ -2,7 +2,9 @@
 package extract
 
 import (
+	"encoding/json"
 	"fmt"
+	"jy/clear"
 	ju "jy/util"
 	"log"
 	"os"
@@ -20,10 +22,14 @@ var (
 	TagConfig     map[string]map[string]float64
 	TagConfigDesc map[string]string
 
-	RepeatScore, BlockScore float64
-	CommonScore             map[string]float64
-	FieldsScore             map[string]map[string]float64
-	lengthValidReg          = regexp.MustCompile(`^(.{2}([大|小|中|学][学|院]|公司|某部|学社|大队|党校)|某(部|中心))$`)
+	RepeatScore     float64
+	BlockScore      float64
+	CommonScore     map[string]float64
+	FieldsScore     map[string]map[string]float64
+	lengthValidReg0 = regexp.MustCompile(`(金额|单价)`)
+	lengthValidReg1 = regexp.MustCompile(`^(.{2}([大|小|中|学][学|院]|公司|某部|学社|大队|党校)|某(部|中心))$`)
+	lengthValidReg2 = regexp.MustCompile(`([,,、])`)
+	lengthValidReg3 = regexp.MustCompile(`(.{4,20}公司)`)
 )
 
 func init() {
@@ -108,6 +114,61 @@ func init() {
 
 var CNreg = regexp.MustCompile("[\u4e00-\u9fa5]")
 
+//分析-打分排序
+func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[string][]*ju.ExtField, string) {
+	defer qu.Catch()
+	doc := j.Data
+	result := j.Result
+	_id := qu.BsonIdToSId((*doc)["_id"])
+	result = ScoreFields(j, e.Tag) //正负面词打分
+
+	//结果排序
+	for _, val := range result {
+		ju.Sort(val)
+	}
+	if !(len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0) {
+		clearJd(j.Jsondata, e, j.SpiderCode, j.IsClearnMoney)
+		marshalbt, _ := json.Marshal(j.Jsondata)
+		tmpjddata := make(map[string]interface{})
+		json.Unmarshal(marshalbt, &tmpjddata)
+		for _, jdkey := range ju.JsonData {
+			if (*j.Jsondata)[jdkey] != nil && (*j.Jsondata)[jdkey] != "" && len(j.Result[jdkey]) >= 5 {
+				for tmpk, tmpv := range j.Result[jdkey][:5] {
+					if jdkey == "budget" || jdkey == "bidamount" {
+						lockclear.Lock()
+						cfn := e.ClearFn[jdkey]
+						lockclear.Unlock()
+						if len(cfn) == 0 {
+							continue
+						}
+						newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[jdkey], ""}, j.SpiderCode, j.IsClearnMoney)
+						if tmpv.Value == newNum[0] {
+							extField := &ju.ExtField{Code: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), Field: jdkey, ExtFrom: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), SourceValue: (*j.Jsondata)[jdkey], Value: newNum[0], Score: 100, IsTrue: newNum[len(newNum)-1].(bool)}
+							j.Result[jdkey] = append(j.Result[jdkey], extField)
+							ju.Sort(j.Result[jdkey])
+							delete((*j.Jsondata), jdkey)
+							break
+						}
+					} else {
+						if (*j.Jsondata)[jdkey] == tmpv.Value {
+							extField := &ju.ExtField{Code: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), Field: jdkey, ExtFrom: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), SourceValue: (*j.Jsondata)[jdkey], Value: tmpv.Value, Score: 100}
+							j.Result[jdkey] = append(j.Result[jdkey], extField)
+							ju.Sort(j.Result[jdkey])
+							delete((*j.Jsondata), jdkey)
+							break
+						}
+					}
+				}
+			}
+		}
+		if len(*j.Jsondata) > 0 {
+			j.Result = JsonDataMergeProcessing(j, e)
+		}
+		j.Jsondata = &tmpjddata
+	}
+	return doc, result, _id
+}
+
 //结果打分
 func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 	qu.Catch()
@@ -118,9 +179,11 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 		}
 		if field == "budget" || field == "bidamount" {
 			for tmpsindex, tmpsvalue := range tmps {
-				if ((strings.Contains(tmpsvalue.RuleText, "总") && !strings.Contains(tmpsvalue.RuleText, "总投资")) || strings.Contains(tmpsvalue.Code, "总价")) && tmpsvalue.RuleText != "总价(元)" && (tmpsvalue.Type == "colon" || tmpsvalue.Type == "table") {
-					tmps[tmpsindex].Score += 1
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: field + `value结果含总字+1`, Code: field, Value: tmpsvalue.Value, Score: 1})
+				if strings.Contains(tmpsvalue.RuleText, "总") && !strings.Contains(tmpsvalue.RuleText, "总投资") && (tmpsvalue.Type == "colon" || tmpsvalue.Type == "table") {
+					if tmpsvalue.RuleText != "总价(元)" && tmpsvalue.RuleText == "总价(元)" {
+						tmps[tmpsindex].Score += 1
+						tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: field + `value结果含总字+1`, Code: field, Value: tmpsvalue.Value, Score: 1})
+					}
 				} else if strings.Contains(qu.ObjToString(tmpsvalue.SourceValue), "㎡") {
 					tmps[tmpsindex].Score -= 10
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: field + `value结果含㎡-10`, Code: field, Value: tmpsvalue.Value, Score: -10})
@@ -264,18 +327,20 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 							if ranges, ok := length["range"].([]interface{}); ok {
 								gt := qu.IntAll(ranges[0])
 								lte := qu.IntAll(ranges[1])
-								//临时调整人名
-								//if field == "winner" {
-								//	if utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value)) == 2 || utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value)) == 3 {
-								//		continue
-								//	}
-								//}
-								//针对指定 buyer -长度0-4 不打分
-								if field == "buyer" || field == "winner" {
+								//支持人名
+								if field == "winner" || field == "buyer" {
 									tmpValue := fmt.Sprint(tmpsvalue.Value)
-									if lengthValidReg.MatchString(tmpValue) && gt == 0 && lte == 4 {
+									if IsPersonName(tmpValue) && !lengthValidReg0.MatchString(tmpValue) {
+										continue
+									}
+									if lengthValidReg1.MatchString(tmpValue) && gt == 0 && lte == 4 {
 										continue
 									}
+									if lengthValidReg2.MatchString(tmpValue) {
+										if isMultiSupplier(tmpValue) {
+											continue
+										}
+									}
 								}
 								if lte < 0 { //∞
 									lte = 999999
@@ -296,19 +361,22 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 						if p, ok := position.(map[string]interface{}); ok {
 							qu.Try(func() {
 								if p["regexp"] != nil {
-									//临时调整人名
-									//if field == "winner" {
-									//	if utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value)) == 2 || utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value)) == 3 {
-									//		return
-									//	}
-									//}
-									if field == "buyer" || field == "winner" {
-										//针对指定 buyer -个别  不打分
+									//支持人名
+									if field == "winner" || field == "buyer" {
 										tmpValue := fmt.Sprint(tmpsvalue.Value)
-										if lengthValidReg.MatchString(tmpValue) && qu.ObjToString(p["describe"]) == "黑名单" {
+										if IsPersonName(tmpValue) && !lengthValidReg0.MatchString(tmpValue) {
+											return
+										}
+										if lengthValidReg1.MatchString(tmpValue) && qu.ObjToString(p["describe"]) == "黑名单" {
 											return
 										}
+										if lengthValidReg2.MatchString(tmpValue) {
+											if isMultiSupplier(tmpValue) {
+												return
+											}
+										}
 									}
+
 									reg := p["regexp"].(*regexp.Regexp)
 									if reg.MatchString(qu.ObjToString(tmpsvalue.Value)) {
 										tmps[tmpsindex].Score += qu.Float64All(p["score"])
@@ -435,3 +503,14 @@ func projectWeightClear(tmps []*ju.ExtField) []*ju.ExtField {
 	}
 	return newList
 }
+
+//多供应商,不减分评判标准
+func isMultiSupplier(str string) bool {
+	arr := lengthValidReg2.Split(str, -1)
+	for _, v := range arr {
+		if !lengthValidReg3.MatchString(v) {
+			return false
+		}
+	}
+	return true
+}

+ 1 - 1
src/jy/extract/score_person.go

@@ -15,7 +15,7 @@ func initFamilyNames() {
 	}
 }
 
-//是否为人名文本
+//是否为人名文本-支持2 3名字
 func IsPersonName(text string) bool {
 	if len(familyNames) == 0 {
 		initFamilyNames()

+ 26 - 14
src/jy/pretreated/analystep.go

@@ -36,6 +36,8 @@ var formattext4 = regexp.MustCompile("(中标金额[::])设计费用[::][0-9
 //特殊影响分包候选人抽取
 var formattext5 = regexp.MustCompile("投标报价[::]包件1[::][0-9.万元]+[,,]包件2[::][0-9.万元]+[,,]投标总价([::]+)([0-9.万元]+)")
 var formattext6 = regexp.MustCompile("(投标报价[::][0-9.]+)\n([万元]+)")
+var formattext7 = regexp.MustCompile("(报)\n(价)")
+
 var formattext10 = regexp.MustCompile(".*包号\n.*\n.*供应商名称\n.*\n.*(成交|中标)金额\n(.*单位\n)?" +
 	"<td.*>(.*)\n(<td>\n)?.*\n<td.*>[\n]?(.*公司)\n.*\n<td.*>([0-9.,,万元]+)\n")
 var formattext11 = regexp.MustCompile("(项目预算)\n(第[一1](包|标段)[::])([0-9.万元人民币]+)\n" +
@@ -165,9 +167,11 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con = formattext2.ReplaceAllString(con, "${1}")
 	con = formattext3.ReplaceAllString(con, "")
 	con = formattext4.ReplaceAllString(con, "\n${1}:${2}\n")
-	//特殊格式-影响分包候选人抽取-替换
+	//特殊格式-影响分包候选人抽取-候选人等识别-替换
 	con = formattext5.ReplaceAllString(con, "中标金额:${2}\n")
 	con = formattext6.ReplaceAllString(con, "$1$2")
+	con = formattext7.ReplaceAllString(con, "$1$2")
+
 	//改变特殊结构
 	con = formattext10.ReplaceAllString(con, "\n分包$3\n中标单位:$5 中标金额:$6\n")
 	con = formattext11.ReplaceAllString(con, "${1}\n${2}\n预算金额:${4}\n${5}\n预算金额:${7}\n${8}\n")
@@ -194,7 +198,6 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			}
 		}
 	}
-
 	con = formatText(con, "all")
 	job.Content = con
 	//计算表格占比,返回表格数组、占比
@@ -318,24 +321,33 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 
 		//如果表格查询分包-有分包-但是没有有效值的话 ,正文重新查找
 		if len(tabs) > 0 && job.BlockPackage != nil {
-			isUseful := false
-			for _, v := range job.BlockPackage {
-				p_winner := v.Winner
-				p_budget := v.Budget
-				p_bidamout := v.Bidamount
-				if p_winner != "" || p_budget > float64(0) || p_bidamout > float64(0) {
-					isUseful = true
-					break
+			if !isUsefulPackage(job.BlockPackage) { //表格未识别出有效分包-且文本里面无有效字样
+				text_pkg := FindPackageFromText(job.Title, bl.Text, isSite, codeSite)
+				if isUsefulPackage(text_pkg) {
+					job.BlockPackage = FindPackageFromText(job.Title, bl.Text, isSite, codeSite)
 				}
 			}
-			if !isUseful { //表格未识别出有效分包
-				job.BlockPackage = FindPackageFromText(job.Title, bl.Text, isSite, codeSite)
-			}
 		}
 		job.Block = append(job.Block, bl)
 	}
 }
 
+//是否有效分包
+func isUsefulPackage(pkg map[string]*util.BlockPackage) bool {
+	if pkg == nil || len(pkg) == 0 {
+		return false
+	}
+	for _, v := range pkg {
+		p_winner := v.Winner
+		p_budget := v.Budget
+		p_bidamout := v.Bidamount
+		if p_winner != "" || p_budget > float64(0) || p_bidamout > float64(0) {
+			return true
+		}
+	}
+	return false
+}
+
 func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite string) bool {
 	//块中再查找表格(块,处理完把值赋到块)
 	//bl.Text = formatText(bl.Text, "biangeng")
@@ -567,7 +579,7 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 			if bv == nil {
 				//特别处理...针对性处理
 				res := tablePackageWinnerReg.FindAllStringSubmatch(tv.Text, -1)
-				if len(res) > 0 {
+				if len(res) > 0 { //特别处理...针对性处理
 					tv.Winner = res[0][2]
 				}
 				pkgMap[tk] = tv

+ 165 - 33
src/jy/pretreated/analytable.go

@@ -35,12 +35,14 @@ var (
 	moneyReg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
 	//特殊文本-为表头
 	specHeadReg = regexp.MustCompile("(成交供应商|中选人)")
-
 	//key不需要清理-例如折扣 费率
-	no_clear_key_reg = regexp.MustCompile(`[((](费率|年|月|日|天|日历天|历天)[))]`)
+	noClearKeyReg = regexp.MustCompile(`[((](费率|年|月|日|天|日历天|历天)[))]`)
 	//根据表格的内容判断是不是表头,如果含有金额则不是表头
 	MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$")
-	GSReg    = regexp.MustCompile(".*公司.*")
+	//特殊情况值,不能为表头
+	noStartHeadReg = regexp.MustCompile("^(\\d标段)$")
+
+	GSReg = regexp.MustCompile(".*公司.*")
 	//判断分包时
 	moneyNum = regexp.MustCompile("[元整¥万]")
 	//对隐藏表格的判断
@@ -54,9 +56,9 @@ var (
 	FilterKey_2 = regexp.MustCompile("招标|投标|项目")
 	//根据表格的key进行分包打分
 	FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数]|包[组件])")
-	FindKey_3 = regexp.MustCompile("(标段编号|标包)")
+	FindKey_3 = regexp.MustCompile("(标段编号|标包|包件|包号)")
 	//对值进行分包判断
-	FindVal_1  = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
+	FindVal_1  = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|标的[一二三四五六七八九十1-9A-Za-z]+|((子|合同|分|施工|监理)?(包|包件|标)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
 	FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
 
 	//判断分包前排除  包件号?
@@ -122,11 +124,12 @@ var (
 	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式|号码)([//及]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
 	ContactInfoMustReg  = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
 	ContactType         = map[string]*regexp.Regexp{
-		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
+		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|招标|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心(地址)?|业主|收料人|采购部"),
 		"代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
 		"中标单位": regexp.MustCompile("^((拟(定)?|预|最终|唯一)?(中标|成交|中选|供(货|应))((成交))?)[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"),
 		"监督部门": regexp.MustCompile("投诉受理部门"),
 	}
+	ContactHeadReg              = regexp.MustCompile("^(招标人|采购人)$")
 	ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
 	MultipleValueSplitReg       = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
 	BuyerContacts               = []string{"采购单位联系人", "采购单位联系电话", "采购单位联系地址"}
@@ -142,8 +145,8 @@ var (
 
 	//新增-分包-表格-sortKV
 	budgetSortKVReg    = regexp.MustCompile("(预算)")
-	bidamountSortKVReg = regexp.MustCompile("(成交结果[((]万元[))])")
-	winnerSortKVReg    = regexp.MustCompile("(投标人[((]供应商[))]名称)")
+	bidamountSortKVReg = regexp.MustCompile("(成交结果[((]万元[))]|成交金额|履约金额|中[标选]金额)")
+	winnerSortKVReg    = regexp.MustCompile("(投标人[((]供应商[))]名称)|供应商名称|中标候选人|中[标选]人|中[标选]单位")
 )
 
 //在解析时,判断表格元素是否隐藏
@@ -178,7 +181,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
 	//u.Debug(1, k, v1)
 	//指定-key不清理  拦标价(费率或单价等)
 	k1 := ""
-	if !no_clear_key_reg.MatchString(k) {
+	if !noClearKeyReg.MatchString(k) {
 		k1 = ClearKey(k, 2)
 	}
 	//u.Debug(2, k)
@@ -300,21 +303,15 @@ func isResetUnitPriceSortKV(table *Table) {
 func isResetAmountAggregateSortKV(table *Table) {
 	keyGroup := [][]string{}
 	keyGroup = append(keyGroup, []string{"序号", "标项名称", "总价(元)"})
+	keyGroup = append(keyGroup, []string{"序号", "名称", "总价(元)"})
 	keyGroup = append(keyGroup, []string{"序号", "服务内容", "验收金额(元)"})
+	keyGroup = append(keyGroup, []string{"序号", "标项名称", "单价(元)", "数量"})
 	for _, v := range keyGroup {
-		temp := map[string]bool{}
-		for _, v1 := range v {
-			for _, v2 := range table.SortKV.Keys {
-				if v1 == v2 {
-					temp[v1] = true
-				}
-			}
-		}
-		if len(temp) == 3 {
+		if len(v) == 3 {
 			arr1 := u.ConvertInterface(table.SortKV.Map[v[0]])
 			arr2 := u.ConvertInterface(table.SortKV.Map[v[1]])
 			arr3 := u.ConvertInterface(table.SortKV.Map[v[2]])
-			if len(arr1) == len(arr2) && len(arr1) == len(arr3) && len(arr3) > 1 {
+			if len(arr1) > 1 && len(arr1) == len(arr2) && len(arr1) == len(arr3) {
 				amount := float64(0)
 				for _, nv := range arr3 {
 					amount = precisionFloat(amount, qutil.Float64All(nv))
@@ -325,6 +322,27 @@ func isResetAmountAggregateSortKV(table *Table) {
 				break
 			}
 		}
+		if len(v) == 4 {
+			arr1 := u.ConvertInterface(table.SortKV.Map[v[0]])
+			arr2 := u.ConvertInterface(table.SortKV.Map[v[1]])
+			arr3 := u.ConvertInterface(table.SortKV.Map[v[2]])
+			arr4 := u.ConvertInterface(table.SortKV.Map[v[3]])
+			if len(arr1) > 1 && len(arr1) == len(arr2) && len(arr1) == len(arr3) && len(arr1) == len(arr4) {
+				amount := float64(0)
+				for kv, nv := range arr3 {
+					amount = precisionFloat(amount, qutil.Float64All(nv)*qutil.Float64All(arr4[kv]))
+				}
+				if amount > float64(0) {
+					if table.SortKV.Map["总价(元)"] == nil {
+						table.SortKV.Map["总价(元)"] = fmt.Sprintf("%f", amount)
+						table.SortKV.Keys = append(table.SortKV.Keys, "总价(元)")
+					} else {
+						table.SortKV.Map["总价(元)"] = fmt.Sprintf("%f", amount)
+					}
+				}
+				break
+			}
+		}
 	}
 }
 func isReseterialNumberSortKV(table *Table) {
@@ -333,6 +351,15 @@ func isReseterialNumberSortKV(table *Table) {
 		table.SortKV.Map["序号"] = arr[:3]
 	}
 }
+func isResetWinnerRankingSortKV(table *Table) {
+	if len(table.SortKV.Map) == 2 && table.SortKV.Map["中标人"] != nil && table.SortKV.Map["中标价格"] != nil {
+		arr := u.ConvertInterface(table.SortKV.Map["中标人"])
+		if len(arr) > 1 && len(arr) <= 3 {
+			table.SortKV.Map["排名"] = []string{"1", "2"}
+			table.SortKV.Keys = append(table.SortKV.Keys, "排名")
+		}
+	}
+}
 
 //精度丢失-相加
 func precisionFloat(tmp1, tmp2 float64) float64 {
@@ -391,6 +418,8 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 	isResetAmountAggregateSortKV(table)
 	//特殊类~单价核算到金额上
 	isResetUnitPriceSortKV(table)
+	//特殊组-构建一个候选人编号
+	isResetWinnerRankingSortKV(table)
 	//针对序号组过长
 	isReseterialNumberSortKV(table)
 
@@ -447,7 +476,7 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 		for _, k := range table.SortKV.Keys {
 			if hxrRex.MatchString(k) {
 				v := table.SortKV.Map[k]
-				if new_v, ok := v.(string); ok && findCandidate2.MatchString(new_v) {
+				if new_v, ok := v.(string); ok && (findCandidate2.MatchString(new_v) || new_v == "/") {
 					winsArr = append(winsArr, new_v)
 					sortsArr = append(sortsArr, k)
 				}
@@ -520,7 +549,7 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 	}
 }
 
-var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标总价|含税总价[((]元[))]|总金额)")
+var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标报价|评审价|投标总价|含税总价[((]元[))]|总金额)")
 var winNoMoneyReg *regexp.Regexp = regexp.MustCompile("(得分|时间|序号)")
 
 //处理table.SortKV.value为数组的情况
@@ -578,7 +607,15 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 									if kt[0].Value == "单品报价" && winnertag {
 										kv = "中标金额"
 									} else {
-										kv = kt[0].Value
+										for _, v_kt := range kt {
+											if v_kt.Value == "中标金额" {
+												kv = "中标金额"
+												break
+											}
+										}
+										if kv == "" {
+											kv = kt[0].Value
+										}
 									}
 								}
 							}
@@ -597,6 +634,8 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 											}
 										} else if winMoneyReg.MatchString(k) && !winNoMoneyReg.MatchString(k) && len(tmpPrice[vsk]) == 0 {
 											kv = "中标金额"
+										} else if k == "金额" && len(tmpPrice[vsk]) == 0 {
+											kv = "中标金额"
 										} else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误
 											break
 										}
@@ -1615,6 +1654,10 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool, codeSite string) {
 										td.KeyDirect = 2
 										td.KVDirect = 1
 										td.BH = true
+										//特殊情况-不能为表头
+										if noStartHeadReg.MatchString(td.Val) {
+											td.BH = false
+										}
 									}
 								}
 							}
@@ -1713,10 +1756,12 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 		nextdirect, nextvdirect := 0, 0
 		//开始抽取
 		than_once_1, than_once_2 := false, false
+		//特殊候选人结构
+		winnerOrderHeader := false
+		if judgmentWinnerOrderHeaderInfo(table.TRs) {
+			winnerOrderHeader = true
+		}
 		for tr_index, tr := range table.TRs {
-			if tr_index == 7 {
-				//fmt.Println("调试")
-			}
 			bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
 			if bcon {
 				continue
@@ -1742,8 +1787,19 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 					direct, vdirect = 2, 1
 				}
 			}
-
+			//计算一下表格结构是否为指定候选人
 			for _, td := range tr.TDs {
+				if winnerOrderHeader { //特殊类-候选人结构表格
+					if tr_index == 0 {
+						td.KVDirect, td.KeyDirect = 2, 1
+					} else {
+						td.KVDirect, td.KeyDirect = 0, 0
+						td.BH = false
+						if td.Val == "" {
+							td.Val = "/"
+						}
+					}
+				}
 				if !td.BH && td.KVDirect < 3 {
 					if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
 						table.FindTdVal(td, vdirect, direct)
@@ -1847,6 +1903,47 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 	//qutil.Debug("Table-FindKV", table.SortKV.Map)
 }
 
+//判断是否特殊候选人结构表格
+func judgmentWinnerOrderHeaderInfo(TRs []*TR) bool {
+	if len(TRs) < 3 {
+		return false
+	}
+	//是否含有指定关键词
+	TR_0 := TRs[0]
+	isLen := 0
+	for k, v := range TRs {
+		if k > 0 {
+			if len(v.TDs) == len(TR_0.TDs) {
+				isLen++
+			}
+			if isLen >= 2 {
+				break
+			}
+		}
+	}
+	if isLen < 2 {
+		return false
+	}
+	textArr := [][]string{}
+	textArr = append(textArr, []string{"投标人", "中标候选人排序", "投标报价(万元)"})
+	textArr = append(textArr, []string{"投标人", "中标候选人排序", "投标总报价(万元)"})
+	for _, arr := range textArr {
+		isok := 0
+		for _, v := range arr {
+			for _, v1 := range TR_0.TDs {
+				if v1.Val == v {
+					isok++
+					break
+				}
+			}
+		}
+		if isok == 3 {
+			return true
+		}
+	}
+	return false
+}
+
 //初始化组装纵向数据
 func initLongitudinalData(table *Table) [][]string {
 	res := make([][]string, len(table.TRs[0].TDs)) //创建table第一行的列数长度
@@ -2076,6 +2173,8 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 				td.Val = tmp_tdVal
 			}
 			key = new_key
+		} else if zbhxrReg.MatchString(key) && findCandidate3.MatchString(tmp_tdVal) {
+			key = "中标单位名称"
 		} else if key == "投标人名称" || key == "单位名称" { //左临上临-拼接
 			tmpnewnear := table.FindNear(near, 1)
 			if tmpnewnear == nil {
@@ -2095,14 +2194,33 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 					if tmpnewnear.MustBH || tmpnewnear.BH {
 						if tmpnewnear.Val == "中标候选人情况" && zbhxrSortReg_3.MatchString(td.Val) {
 							key = "中标候选人第" + zbhxrSortReg_3.FindString(td.Val) + "名"
+						} else if tmpnewnear.Val == "名次" || tmpnewnear.Val == "排名" || tmpnewnear.Val == "序号" {
+
 						} else {
 							key = tmpnewnear.Val + near.Val
 						}
 					}
 				}
 			}
+		} else if key == "采购人(乙方)" || key == "采购人(乙方)" {
+			if findCandidate2.MatchString(td.Val) {
+				key = "中标单位名称"
+			}
+		} else if key == "金额" {
+			//父级表格
+			pre_near := table.FindNear(near, direct)
+			if pre_near != nil {
+				if pre_near.Val == "第一中标候选人" {
+					key = pre_near.Val + key
+				}
+			}
+		} else if key == "第一候选人" && qutil.Float64All(tmp_tdVal) > 0.0 {
+			if tmp_near := table.FindNear(td, 2); tmp_near != nil {
+				if winMoneyReg.MatchString(tmp_near.Val) {
+					key = tmp_near.Val
+				}
+			}
 		}
-
 		if near.Val == "" {
 			key = fmtkey("k", near.TR.RowPos, near.ColPos)
 		}
@@ -2504,6 +2622,7 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 						}
 					} else { //新增 - 特殊情况 - 查找sortKV - 预算 - 中标金额 - 中标单位
 						for k, v := range tn.SortKV.Map {
+							//kt := u.GetTags(k, isSite, codeSite)
 							if budgetSortKVReg.MatchString(k) {
 								if vs, ok := v.([]string); ok {
 									if len(index) == len(vs) {
@@ -2534,14 +2653,14 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 										moneys := clear.ObjToMoney([]interface{}{vs[nk], ""})
 										if len(moneys) > 0 {
 											if vf, ok := moneys[0].(float64); ok {
-												if !strings.Contains(vs[nk], "万") {
+												if !strings.Contains(vs[nk], "万") && strings.Contains(k, "万") {
 													vf = 10000.0 * vf
 												}
 												bp.Bidamount = vf
 												bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
 											} else if vi, ok := moneys[0].(int); ok {
-												if !strings.Contains(vs[nk], "万") {
-													vi = 10000 * vi
+												if !strings.Contains(vs[nk], "万") && strings.Contains(k, "万") {
+													vi = 10000.0 * vi
 												}
 												bp.Bidamount = float64(vi)
 												bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
@@ -2550,7 +2669,6 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 									}
 								}
 							}
-
 							if winnerSortKVReg.MatchString(k) {
 								if vs, ok := v.([]string); ok {
 									if len(index) == len(vs) {
@@ -2578,6 +2696,9 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 			if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
 				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2, isSite, codeSite)
 			}
+			//if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) && (tn.WinnerOrder == nil || len(tn.WinnerOrder) == 0) {
+			//	vv.WinnerOrder = tn.WinnerOrder
+			//}
 		}
 	}
 	return
@@ -2800,7 +2921,7 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
 			haspkgs := []string{}
 			for in2, v1 := range vs {
 				v1 = replPkgConfusion(v1) //替换分包中混淆的词
-				if len([]rune(v1)) < 8 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
+				if len([]rune(v1)) < 30 && !moneyNum.MatchString(v1) && FindVal_1.MatchString(v1) {
 					if key_index == -1 {
 						key_index = in
 					} else if key_index != in {
@@ -3059,6 +3180,8 @@ func replPkgConfusion(v1 string) string {
 	v1 = PreReg1.ReplaceAllString(v1, "")
 	v1 = PreCon.ReplaceAllString(v1, "")
 	v1 = PreCon2.ReplaceAllString(v1, "")
+	v1 = PreCon3.ReplaceAllString(v1, "${1}:")
+	v1 = PreCon5.ReplaceAllString(v1, "${2}")
 	return v1
 }
 
@@ -3121,7 +3244,16 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, co
 					continue
 				}
 				//采购单位,代理机构,中标单位
-				//qutil.Debug("td_k---", td_k, HasOrderContactType(td_k))
+				//fmt.Println("td_k---", td_k, HasOrderContactType(td_k))
+				//针对名称特殊类-采用临边查询方式-左临
+				if td.Val == "名称" {
+					near := tn.FindNear(td, 1)
+					if near != nil {
+						if ContactHeadReg.MatchString(near.Val) {
+							td_k = near.Val + td.Val
+						}
+					}
+				}
 				for _, k := range HasOrderContactType(td_k) {
 					if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构,中标单位跳过
 						continue

+ 38 - 23
src/jy/pretreated/division.go

@@ -96,18 +96,11 @@ var (
 	}
 
 	//非分包中标单位值
-	unPackageWinnerReg  = regexp.MustCompile("(重新招标|方案包)")
+	unPackageWinnerReg  = regexp.MustCompile("(重新招标|方案包|[((]?包[))]?编号)")
 	conformWinnerKVReg  = regexp.MustCompile("^(中标人|中标银行|第一名)[::](.{4,20}(分行|公司))")
 	conformWinnerKVReg1 = regexp.MustCompile("^[-].{4,15}公司$")
 	conformWinnerKVReg2 = regexp.MustCompile("(.*)?确定(.*公司)为中标人(.*)?")
-
-	conformWinnerTextReg3 = regexp.MustCompile("拟定供应商信息[::\\s]+名称[::](.*)[\\s]+地址")
-
-	/*
-		拟定供应商信息:
-		名称:郑州人民广播电台
-		地址:郑州市金水区内环路17号A座。
-	*/
+	conformWinnerKVReg3 = regexp.MustCompile("拟定供应商信息[::\\s]+名称[::](.*)[\\s]+地址")
 
 	//针对处理-替换敏感词-中标
 	packageReg1 = regexp.MustCompile("(包件[一二三四五1-9][::].*)\n1[、.\\s]+名称[::](.*)\n2[、.\\s]+")
@@ -116,21 +109,28 @@ var (
 	packageReg4 = regexp.MustCompile("([1-9](标段)[::])拟定供应商名称[::](.*公司)\n")
 	packageReg5 = regexp.MustCompile("(第[1-9一二三四五](标段))(中标人)[::](.*)\n")
 	packageReg6 = regexp.MustCompile("供应商名称[::](.{4,20}公司)[((]([0]?1包)[))][、,,](.{4,20}公司)[((]([0]?2包)[))]")
+	packageReg7 = regexp.MustCompile("(包[一二三123][::])\n(.{1,10}[::])(.{4,15}公司)")
+	packageReg8 = regexp.MustCompile("(第一包)供应商名称[::](.{4,15}公司)[\\s ,,]+(第二包)(.{4,15}公司)")
+	packageReg9 = regexp.MustCompile("((合同包|标包|包)[一二三0-9]+([((].{1,10}[))])?[::])(.{4,15}公司([、,,].{4,15}公司)?)\n")
 
 	//预算
 	packageReg20 = regexp.MustCompile("(最高投标限价为|投资预算约[为]?)([0-9.万元人民币]+)")
 	packageReg21 = regexp.MustCompile("(预算金额|项目预算)[::](包[\\s]?1|1[\\s]?包)[::]?([0-9.万元人民币]+)[,,](包[\\s]?2|2[\\s]?包)[::]?([0-9.万元人民币]+)")
+	packageReg22 = regexp.MustCompile("(投标报价[::][0-9.]+)\n(万元)")
 
-	untitleReg = regexp.MustCompile("(技术评分明细表)")
-	unpriceReg = regexp.MustCompile("(^([Xx]\\+[1-9\\.]+元/每)|分析)")
+	//无分包标识构建
+	packageReg31 = regexp.MustCompile("^(中标[((]成交[))]信息|中标信息)\n(1[::])(供应商名称.*)\n(2[::])(供应商名称.*)$")
+	packageReg32 = regexp.MustCompile("^(中标[((]成交[))]信息|中标信息)\n(标段号)(.*)\n(供应商名称.*\n.*\n.*)\n(标段号)(.*)\n(供应商名称.*\n.*\n.*)$")
+
+	//表格单元格-部分情况需要转换
+	pkgTableReg1 = regexp.MustCompile("(第[一二三1-3]包)(.{0,4}通信用户)[::](.{4,25}公司)")
 
+	//特殊
+	untitleReg = regexp.MustCompile("(技术评分明细表)")
+	unpriceReg = regexp.MustCompile("(^([Xx]\\+[1-9\\.]+元/每)|分析|第)")
 	//敏感词-影响分包-替换-分割
 	replaceSenstiveReg1 = regexp.MustCompile("([一二三四五六七八九十1-9][、]项目名称[::].*采购项目)([一二三四五六七八九十1-9][、]采购结果)")
-
-	//价格~单位换行  替换
-	packageReg50 = regexp.MustCompile("(投标报价[::][0-9.]+)\n(万元)")
-
-	blockValidReg = regexp.MustCompile("(采购人|招标人|代理机构)")
+	blockValidReg       = regexp.MustCompile("(采购人|招标人|代理机构)")
 )
 
 //分块
@@ -535,7 +535,7 @@ func DivideSegmentHtml(txt string) []*util.Segment {
 	segs := make([]*util.Segment, 0)
 	_index := 0
 	for _, seg := range _segs {
-		if seg != " " && len(seg) > 1 {
+		if seg != " " && len(seg) >= 1 {
 			_seg := util.Segment{}
 			_index = _index + 1
 			_seg.Index = _index
@@ -725,7 +725,7 @@ func findWinnerBugetBidmountByKv(v *util.BlockPackage, blockPackage map[string]*
 				}
 
 				//全文找
-				res = conformWinnerTextReg3.FindAllStringSubmatch(v.Text, -1)
+				res = conformWinnerKVReg3.FindAllStringSubmatch(v.Text, -1)
 				if len(res) > 0 {
 					text := res[0][1]
 					if text != "" {
@@ -793,19 +793,28 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	content = regEndWrap.ReplaceAllString(content, "")
 	content = regBZJWarap.ReplaceAllString(content, "")
 	//替换敏感词
-	content = packageReg1.ReplaceAllString(content, "${1}\n中标单位:${2}\n")
+	content = packageReg1.ReplaceAllString(content, "\n${1}\n中标单位:${2}\n")
 	content = packageReg2.ReplaceAllString(content, "\n标段${1}:")
 	content = packageReg3.ReplaceAllString(content, "\n${1}:")
 	content = packageReg4.ReplaceAllString(content, "\n${1}\n中标单位:${3}\n")
 	content = packageReg5.ReplaceAllString(content, "\n${1}\n中标单位:${4}\n")
-	content = packageReg6.ReplaceAllString(content, "\n$2\n中标单位:$1\n$4\n中标单位:$3")
+	content = packageReg6.ReplaceAllString(content, "\n${2}\n中标单位:${1}\n${4}\n中标单位:${3}\n")
+	content = packageReg7.ReplaceAllString(content, "\n${1}\n中标单位:${3}\n")
+	content = packageReg8.ReplaceAllString(content, "\n标一\n中标单位:${2}\n标二\n中标单位:${4}\n")
+	content = packageReg9.ReplaceAllString(content, "\n${1}\n中标单位:${4}\n")
 
 	//替换换行金额
-	content = packageReg50.ReplaceAllString(content, "$1$2")
-
+	content = packageReg22.ReplaceAllString(content, "$1$2")
 	content = packageReg20.ReplaceAllString(content, "\n预算金额:${2}\n")
 	content = packageReg21.ReplaceAllString(content, "\n${2}\n预算金额:${3}\n${4}\n预算金额:${5}")
 
+	//特殊结构重构
+	content = packageReg31.ReplaceAllString(content, "${1}\n包${2}${3}\n包${4}${5}")
+	content = packageReg32.ReplaceAllString(content, "${1}\n${3}${2}\n${4}\n${6}${5}\n${7}")
+
+	//针对单元格内容需要转换
+	content = pkgTableReg1.ReplaceAllString(content, "${1}:\n中标单位:${3}\n")
+
 	if untitleReg.MatchString(title) {
 		return false, ""
 	}
@@ -822,7 +831,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 		if len(pkg) == 1 && strings.HasSuffix(con, v[0]) {
 			return false, ""
 		}
-		is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
+		//查找分包-标准化冒号
+		reg_str := ""
+		if strings.Contains(v[0], ":") || strings.Contains(v[0], ":") {
+			reg_str = strings.ReplaceAll(v[0], ":", "")
+			reg_str = strings.ReplaceAll(v[0], ":", "")
+		}
+		is := regexp.MustCompile(reg_str+"[::]*").FindAllStringIndex(con, -1)
 		for _, sv := range is {
 			appendWarpIndex = append(appendWarpIndex, sv[0])
 		}

+ 3 - 2
src/jy/pretreated/multipackage.go

@@ -19,9 +19,10 @@ var (
 	PreCon  = regexp.MustCompile("([\r\n]|^)[\u3000\u2003\u00a0\\s]*(^标项)(\\d\\.)+\\d|[一二三四五1-9、.]+[  \u3000\u2003\u00a0]*((标段|分包)(划分|情况)|(标书))|([上下]一[条页篇][::]?[^,,。\\n]{0,120}|备注[::][^\\n]{0,120}|业绩[::][^\\n,。,]{0,80}|三包(手册|服务|政策|凭证|期|标准|规定|责任|要求|售后)|(要求|提供|质量|国家|享受|负责|实行|执行|承诺|门前|法定|规定).{0,6}三包|“三包”|\\d+万?([个套只支分名][^标包])|[?]|[((]请?注意[::][^((]+[))])")
 	PreCon2 = regexp.MustCompile("[评中开定]\\s?标\\s?[0-9一二三四五六七八九十]+|标[准尺高书注]|[^中]标价|[开鼠投招军指企目]标|包[括含装为内]|[承树]包|CA证书|地点[::].*标|.{30,}合同段")
 	PreCon3 = regexp.MustCompile("(标段[一二三四五六七八九十0-9A-Za-z])[((].*[))][::]")
+	PreCon5 = regexp.MustCompile("^(标一)(包[一二三四五六七八九十0-9A-Za-z]+)$")
 
 	//提取分包标识
-	MultiReg  = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段|分标))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|合同|施工|监理|标包|标|包)(标|包段|项|段|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))|((项目|包件)([一二三四五六七八九十1-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+))|((包组|包件)[::\\s]+([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))|((施工|监理)(标段))[::\n]")
+	MultiReg  = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段|分标))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|合同|施工|监理|标包|标|包)(标|包段|项|段|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))|((项目|包件)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+))|((包组|包件)[::\\s]+([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))|((施工|监理)(标段))[::\n]")
 	PreCon4   = regexp.MustCompile("([一二三四五六七八九十]标段[::¥0-9.]*(万元)?)[、]?")
 	Precon4dw = regexp.MustCompile("(万元|元)")
 	//匹配到的包格式分类统计
@@ -61,7 +62,7 @@ func CheckMultiPackage(con string) (content string, m map[string][]string, b boo
 	con = PreCon3.ReplaceAllString(con, "${1}:")
 
 	//替换敏感词-分割
-	con = replaceSenstiveReg1.ReplaceAllString(con, "$1\n$2")
+	con = replaceSenstiveReg1.ReplaceAllString(con, "${1}\n${2}")
 
 	//修改 \nX标段
 	res := MultiReg.FindAllStringSubmatch(con, -1)

+ 4 - 4
src/jy/pretreated/tablev2.go

@@ -165,8 +165,8 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table, isSite bool, codeSi
 	td.tdIsHb(tr, table, bsontable, isSite, codeSite)
 	bhead := false
 	if td.TR.RowPos == 0 { //第一行
-		no_head_b, _, _, _, _ := CheckCommon(td.Val, "normalhead")
-		if !no_head_b && utf8.RuneCountInString(td.Val) < 15 && td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
+		normalhead, _, _, _, _ := CheckCommon(td.Val, "normalhead")
+		if !normalhead && utf8.RuneCountInString(td.Val) < 15 && td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
 			bhead = true
 		}
 	}
@@ -313,7 +313,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite stri
 	}
 	ub := []*u.Block{}
 	//经过处理的td内容长度大于50,划块,分包
-	if lenval > 50 { //看是否划块
+	if lenval >= 1000 { //看是否划块
 		//u.Debug(txt)
 		ub, _ = DivideBlock("", td.Text, 2, table.TableResult.RuleBlock, isSite, codeSite) //对td的原始值
 		//看是否划块
@@ -331,7 +331,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite stri
 		}
 	}
 	//经过处理的td内容长度小于50,冒号kv,td表头
-	if lenval < 50 {
+	if lenval < 1000 {
 		//		td.SortKV = FindKv(text, "")
 		kvTitle := ""
 		if len(td.TR.TDs) > 0 {

+ 105 - 124
src/jy/pretreated/winnerorder.go

@@ -29,83 +29,75 @@ var (
 	numberReg2        = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+")
 	thisNumberReg     = regexp.MustCompile("第" + numberReg.String())
 
-	winnerReg0        = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选(人|供应商)|中标人[1-9]|[第|弟][一二三四五0-9]中标人)")
-	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
-	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|[第|弟][一二三四五六七八九十1-9]+(中标|中选)?[候|侯]选人|中标候选人排名[::]\\d)")
-
-	winnerReg3     = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+[\\s]?名|(中标候选人)[1-9])")
-	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
-	winnerReg5     = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
-	winnerReg6     = regexp.MustCompile("(^(排名)?(第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*|^[中标成交备选候人单位供应商]*[第|弟][一二三四五六七八九十1-9]名))")
-	winnerReg7     = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
+	winnerReg0 = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选(人|供应商)|中标人[1-9]|[第|弟][一二三四五0-9]中标人)")
+	winnerReg1 = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
+	winnerReg2 = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|[第|弟][一二三四五六七八九十1-9]+(中标|中选)?([((]成交[))])?[候|侯]选人|中标候选人排名[::]\\d)")
+	winnerReg3 = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+[\\s]?名|(中标候选人)[1-9])")
+	winnerReg4 = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
+	winnerReg5 = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
+	winnerReg6 = regexp.MustCompile("(^(排名)?(第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*|^[中标成交备选候人单位供应商]*[第|弟][一二三四五六七八九十1-9]名))")
+	winnerReg7 = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
 
 	//带金额
-	winnerReg8  = regexp.MustCompile("(第[一二三四五六七八九十]中选候选人)[::\\s]+?[((]1[))][\\s]+?(单位名称)[::]?(.*)[\\s]+?[((]2[))][\\s]+(参选报价|投标报价(含税))[::]?(.*)")
-    winnerReg9  = regexp.MustCompile("(第[一二三四五六七八九十]中[选|标]?候选人|中标人[1-9])[::\\s]+投标人名称([\\s]+)?([\u4E00-\u9FA5]{4,25})[\\s]+(投标报价)([\\s]+)?([0-9\\.\\s万元]+)")
-    winnerReg10 = regexp.MustCompile("(第[一二三四五六七八九十]中标人)[::\\s]+?报价[¥]?([0-9\\.\\s万元]+)[;;]([\u4E00-\u9FA5]{4,20})")
-	winnerReg11 = regexp.MustCompile("([弟|第][一二三四五六七八九十]中[标|选]候选人)[::\\s]+?(单位名称|投标人名称)[::]?(.*)[\\s]+?(参选报价|投标报价[((]含税[))]|投标报价[((]元[))])[::]?(.*)")
-    winnerReg12 = regexp.MustCompile("(中[标|选]候选人[弟|第][一二三四五六七八九十0-9]名|[弟|第][一二三四五六七八九十0-9](中标)?候选人)[为]?([::\\s ]+)?(.*)[ \\s,,]+?(投标报价|投标价格|投标总报价|金额|投标价为人民币)[::]?([0-9\\.\\s万元]+)")
-	winnerReg13 = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9])\n(成交候选人|成交供应商)\n(.*)\n([0-9\\.\\s万元]+)")
-	winnerReg14 = regexp.MustCompile("(中标候选人|成交候选人)\n.*\n.*\n第[1-9][\\s]+?名")
- 	winnerReg14_1  = regexp.MustCompile("(第[1-9])[\\s]+?名[::](.{4,20}公司)[\\s]+中标价[::]([0-9\\.\\s万元]+)")
-    winnerReg15 = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9](中标|中选)?候选人)[::](.*)[ \\s\\n,,]+(最终报价[::\\s]+不含税单价.*)?不含税总价[::]?([0-9\\.()\\s万元]+)")
-    winnerReg16 = regexp.MustCompile("(中[标|选]候选人)排序[::]([1-9一二三四五六七])[\\s]+.{1,4}名称[::](.*公司)[\\s]+.{1,4}报价[::]([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
-    winnerReg17 = regexp.MustCompile("(报价金额|投标金额|应答含税总价|预期中标价格)[::]?[\n]?([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
-    winnerReg18 = regexp.MustCompile("([中|投]标候选人[弟|第])([1-9一二三四五])[\\s]?名[::]([\u4E00-\u9FA5]{4,20})([((].*公司[))])?[,,\\s]+投标报价[::]([0-9\\.\\s 万元]+)")
-    winnerReg19 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五]中标候选人)[::]?([\u4E00-\u9FA5]{4,20}公司)[((]?[,,]?(报价|投标报价|投标含税总价)[::]?([0-9\\.\\s万元]+)")
-
-    //名称-金额
-    winnerReg_1 = regexp.MustCompile("(第[一二三1-3]候选人)[::]([\u4E00-\u9FA5()()]{4,25}公司)[((]([0-9.,,万元]+)[))]")
-    winnerReg_2 = regexp.MustCompile("(中标候选人第[一二三1-3][\\s]?名)[::]([\u4E00-\u9FA5()()]{4,25}公司)[,,]其他类型投标报价[::][不]?含税报价[\n]?[((]元[))][::][\n]?([0-9.,,万元]+)[,,]质量")
-    winnerReg_3 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五](中标|成交)候选人)[::\\s]+([\u4E00-\u9FA5]{4,20}公司)[,,;]?(报价|投标报价|投标含税总价|投标报含税总价)[为]?[::]?([0-9\\.\\s万元]+)")
-	winnerReg_4 = regexp.MustCompile("(第[一二三]中标候选人)(.{4,15}(公司|院)).*\n投标报价[((]元[))]([\\s]+)?([0-9.,,万元]+)")
-
-
-
-
+	winnerReg8    = regexp.MustCompile("(第[一二三四五六七八九十]中选候选人)[::\\s]+?[((]1[))][\\s]+?(单位名称)[::]?(.*)[\\s]+?[((]2[))][\\s]+(参选报价|投标报价(含税))[::]?(.*)")
+	winnerReg9    = regexp.MustCompile("(第[一二三四五六七八九十]中[选|标]?候选人|中标人[1-9])[::\\s]+投标人名称([\\s]+)?([\u4E00-\u9FA5]{4,25})[\\s]+(投标报价)([\\s]+)?([0-9\\.\\s万元]+)")
+	winnerReg10   = regexp.MustCompile("(第[一二三四五六七八九十]中标人)[::\\s]+?报价[¥]?([0-9\\.\\s万元]+)[;;]([\u4E00-\u9FA5]{4,20})")
+	winnerReg11   = regexp.MustCompile("([弟|第][一二三四五六七八九十]中[标|选]候选人)[::\\s]+?(单位名称|投标人名称)[::]?(.*)[\\s]+?(参选报价|投标报价[((]含税[))]|投标报价[((]元[))])[::]?(.*)")
+	winnerReg12   = regexp.MustCompile("(中[标|选]候选人[弟|第][一二三四五六七八九十0-9]名|[弟|第][一二三四五六七八九十0-9](中标)?候选人)[为]?([::\\s ]+)?(.*)[ \\s,,]+?(投标报价|投标价格|投标总报价|金额|投标价为人民币)[::]?([0-9\\.\\s万元]+)")
+	winnerReg13   = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9])\n(成交候选人|成交供应商)\n(.*)\n([0-9\\.\\s万元]+)")
+	winnerReg14   = regexp.MustCompile("(中标候选人|成交候选人)\n.*\n.*\n第[1-9][\\s]+?名")
+	winnerReg14_1 = regexp.MustCompile("(第[1-9])[\\s]+?名[::](.{4,20}公司)[\\s]+中标价[::]([0-9\\.\\s万元]+)")
+	winnerReg15   = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9](中标|中选)?候选人)[::](.*)[ \\s\\n,,]+(最终报价[::\\s]+不含税单价.*)?不含税总价[::]?([0-9\\.()\\s万元]+)")
+	winnerReg16   = regexp.MustCompile("(中[标|选]候选人)排序[::]([1-9一二三四五六七])[\\s]+.{1,4}名称[::](.*公司)[\\s]+.{1,4}报价[::]([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
+	winnerReg17   = regexp.MustCompile("(报价金额|投标金额|应答含税总价|预期中标价格)[::]?[\n]?([¥〇0-9\\.人民币零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整]+)")
+	winnerReg18   = regexp.MustCompile("([中|投]标候选人[弟|第])([1-9一二三四五])[\\s]?名[::]([\u4E00-\u9FA5]{4,20})([((].*公司[))])?[,,\\s]+投标报价[::]([0-9\\.\\s 万元]+)")
+	winnerReg19   = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五]中标候选人)[::]?([\u4E00-\u9FA5]{4,20}公司)[((]?[,,]?(报价|投标报价|投标含税总价)[::]?([0-9\\.\\s万元]+)")
+
+	//名称-金额
+	winnerReg_1 = regexp.MustCompile("(第[一二三1-3]候选人)[::]([\u4E00-\u9FA5()()]{4,25}公司)[((]([0-9.,,万元]+)[))]")
+	winnerReg_2 = regexp.MustCompile("(中标候选人第[一二三1-3][\\s]?名)[::]([\u4E00-\u9FA5()()]{4,25}公司)[,,]其他类型投标报价[::][不]?含税报价[\n]?[((]元[))][::][\n]?([0-9.,,万元]+)[,,]质量")
+	winnerReg_3 = regexp.MustCompile("([弟|第][1-9一二三四五]名(中标候选人)?|[弟|第][1-9一二三四五](中标|成交)候选人)[::\\s]+([\u4E00-\u9FA5]{4,20}公司)[,,;]?(报价|投标报价|投标含税总价|投标报含税总价)[为]?[::]?([0-9\\.\\s万元]+)")
+	winnerReg_4 = regexp.MustCompile("(第[一二三]中标候选人)(.{4,15}(公司|院|研究所)).*\n投标报价[((]元[))]([\\s]+)?([0-9.,,万元]+)")
+
+	//特殊结构-转换-成对出现
+	winnerReg_11   = regexp.MustCompile("(成交候选人排序)\n[1][.、].*\n[2][.、].*\n[3][.、].*")
+	winnerReg_11_1 = regexp.MustCompile("\n([1-3])[.、](.{4,15}公司)[\\s  ]+([0-9.,,万元人民币]+)")
 
 	//特殊格式转化
 	winnerReg50 = regexp.MustCompile("(第[一二三]名)\n单位名称\n(.{4,20}公司)[\n\\s]+投标报价\n大写\n([\u4E00-\u9FA5]+)\n")
-
 	winnerReg51 = regexp.MustCompile("(中标候选人第[1-9一二三四五])[\\s](名)")
 	winnerReg52 = regexp.MustCompile("(中标金额[::][0-9.]+)\n([万元]+)")
-
-    winnerReg53 = regexp.MustCompile("中标候选人[\\s ](第一候选人)[\\s ](第二候选人)[\\s ]?(第三候选人)?\n单位名称[\\s ](.{4,20}公司)[\\s ](.{4,20}公司)[\\s ]?(.{4,20}公司)?\n投标价[((]万元[))]([\\s  ]+)([0-9.]+)([\\s  ]+)([0-9.]+)([\\s  ]+)?([0-9.]+)?")
-    winnerReg54 = regexp.MustCompile("(第[一二三1-3]中标候选人)[ \\s]([\u4E00-\u9FA5,]{4,20}公司)[ \\s](人民币|¥)([0-9,,.万元]+)")
-    winnerReg55 = regexp.MustCompile("排序\\s(中标候选人)名称\\s投标报价.*\n([1一])[\\s]+(.{4,20}公司)[\\s]+([0-9.]+).*\n([2二])[\\s]+(.{4,20}公司)[\\s]+([0-9.]+).*\n")
-
+	winnerReg53 = regexp.MustCompile("中标候选人[\\s ](第一候选人)[\\s ](第二候选人)[\\s ]?(第三候选人)?\n单位名称[\\s ](.{4,20}公司)[\\s ](.{4,20}公司)[\\s ]?(.{4,20}公司)?\n投标价[((]万元[))]([\\s  ]+)([0-9.]+)([\\s  ]+)([0-9.]+)([\\s  ]+)?([0-9.]+)?")
+	winnerReg54 = regexp.MustCompile("(第[一二三1-3]中标候选人)[ \\s]([\u4E00-\u9FA5,]{4,20}公司)[ \\s](人民币|¥)([0-9,,.万元]+)")
+	winnerReg55 = regexp.MustCompile("排序\\s(中标候选人)名称\\s投标报价.*\n([1一])[\\s]+(.{4,20}公司)[\\s]+([0-9.]+).*\n([2二])[\\s]+(.{4,20}公司)[\\s]+([0-9.]+).*\n")
 
 	//特殊爬虫-特殊情况
-    winnerReg80 = regexp.MustCompile("投标报价\n投标人.*\n.*[((]万元[))]\n"+
+	winnerReg80 = regexp.MustCompile("投标报价\n投标人.*\n.*[((]万元[))]\n" +
 		"(.*)[\n]?(第[1一]中标候选人).*\\s([0-9]+[.][0-9]+|[0-9]+)\n(.*司)\n" +
 		"(.*)[\n]?(第[2二]中标候选人).*\\s([0-9]+[.][0-9]+|[0-9]+)[\n]?(.*司)?")
-    winnerReg81 = regexp.MustCompile("投标报价\n[((]万元[))]\n"+
+	winnerReg81 = regexp.MustCompile("投标报价\n[((]万元[))]\n" +
 		"(.*)[\n]?(.*司)[\\s]+(第[1一]中标候选人).*\\s([0-9]+[.][0-9]+|[0-9]+)\n" +
 		"(.*)[\n]?(.*司)[\\s]+(第[2二]中标候选人).*\\s([0-9]+[.][0-9]+|[0-9]+)\n")
-    winnerReg82 = regexp.MustCompile("投标人.*\n.*[((]万元[))]\n元[))]\n"+
-	"(.*)\\s(第[1一]中标候)\n.*\\s([0-9]+[.][0-9]+|[0-9]+)\n(.*公司)\\s(选人)\n" +
-	"(.*)\\s(第[2二]中标候)\n.*\\s([0-9]+[.][0-9]+|[0-9]+)\n(.*公司)\\s(选人)\n")
+	winnerReg82 = regexp.MustCompile("投标人.*\n.*[((]万元[))]\n元[))]\n" +
+		"(.*)\\s(第[1一]中标候)\n.*\\s([0-9]+[.][0-9]+|[0-9]+)\n(.*公司)\\s(选人)\n" +
+		"(.*)\\s(第[2二]中标候)\n.*\\s([0-9]+[.][0-9]+|[0-9]+)\n(.*公司)\\s(选人)\n")
 
-//格式化中标金额换行
-    winnerReg100  = regexp.MustCompile("中标金额:[\\s]+([0-9\\.万元]+)")
+	//格式化中标金额换行
+	winnerReg100 = regexp.MustCompile("中标金额:[\\s]+([0-9\\.万元]+)")
 
 	//清洗影响候选人-抽取的文本
 	cleanWinnerReg1 = regexp.MustCompile("第[一二三123]中标候选人项目业绩[::]")
 	cleanWinnerReg2 = regexp.MustCompile("(第[一二三123])中标单位名称[::]")
 
-
-
-
-	 //不带金额
-	winnerReg20     = regexp.MustCompile("(中标单位候选人名称)[\\s]+(.*)[\\s]+(中标候选人单位名次)[\\s]+([弟|第][一二三四五六七八九十0-9]中标人)")
-    winnerReg21     = regexp.MustCompile("(石城(.*公司|.*厂|.*有\n限公司))[0-9.]+([弟|第][一二三四五六七八九十0-9])成交[\n]?候选人")
-	winnerReg22     = regexp.MustCompile("投标人[::](.{4,20}公司)[\\s-]+标段[::][1-3][\\s-]+排名[::]([1-9])")
-	winnerReg23     = regexp.MustCompile("([\u4E00-\u9FA5]{4,20})\n(有限公司|公司)[\\s]+(第[一二三四五1-9]中[选|标]候选人)")
-    winnerReg24 = regexp.MustCompile("[\\s\\n]+([\u4E00-\u9FA5,]{4,30}([((]集团[))])?(公司|有限公司)|)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
+	//不带金额
+	winnerReg20 = regexp.MustCompile("(中标单位候选人名称)[\\s]+(.*)[\\s]+(中标候选人单位名次)[\\s]+([弟|第][一二三四五六七八九十0-9]中标人)")
+	winnerReg21 = regexp.MustCompile("(石城(.*公司|.*厂|.*有\n限公司))[0-9.]+([弟|第][一二三四五六七八九十0-9])成交[\n]?候选人")
+	winnerReg22 = regexp.MustCompile("投标人[::](.{4,20}公司)[\\s-]+标段[::][1-3][\\s-]+排名[::]([1-9])")
+	winnerReg23 = regexp.MustCompile("([\u4E00-\u9FA5]{4,20})\n(有限公司|公司)[\\s]+(第[一二三四五1-9]中[选|标]候选人)")
+	winnerReg24 = regexp.MustCompile("[\\s\\n]+([\u4E00-\u9FA5,]{4,30}([((]集团[))])?(公司|有限公司)|)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
 	//winnerReg24     = regexp.MustCompile("[\\s\\n]+(.{4,20}公司)[\\s\\n]+(第[一二三四五六七八九十]中[选|标]?候选人)")
 
-
-    winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
+	winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
 	colonEndReg    = regexp.MustCompile("[::]$")
 	toWarpReg      = regexp.MustCompile("[,。,;;]+")
 	findamountReg  = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+")
@@ -115,16 +107,15 @@ var (
 	colonSpaceReg  = regexp.MustCompile("[::]\\s+")
 	findCandidate  = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])|工作室)")
 	findCandidate2 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合[会体]((成员|牵头人)[))]?)?|工作室|有限司)$)")
+	findCandidate3 = regexp.MustCompile("(^.{5,}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|局|站|城|处|行|部|队|联合[会体]((成员|牵头人)[))]?)?|工作室|有限司)([((]第一中标候选人[))])$)")
 	clearSpace1    = regexp.MustCompile("([((][\\d一二三四五六七八九十][))][\\s\u3000\u2003\u00a0\\t]*|<[^>].+?>)")
 	clearSpace2    = regexp.MustCompile("</?[^>]+>")
-	offerReg       = regexp.MustCompile("(中标|磋商|投标|报|单|成交)总?(价|金额)")
-	nofferReg       = regexp.MustCompile("(费率|折扣率)")
-	nobidValReg       = regexp.MustCompile("^(\\d{2}%|[0-9]+\\.[0-9]+%)$")
-
+	offerReg       = regexp.MustCompile("(中标|磋商|投标|报\\n|报|单|成交)总?(价|金额)")
+	nofferReg      = regexp.MustCompile("(费率|折扣率)")
+	nobidValReg    = regexp.MustCompile("^(\\d{2}%|[0-9]+\\.[0-9]+%)$")
 
 	//特殊 - 不分割
- 	winnerNoSplitReg   = regexp.MustCompile("^(第[一二三四五六七八九十]中[选|标]?候选人)[::]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))[,,]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))$")
-
+	winnerNoSplitReg = regexp.MustCompile("^(第[一二三四五六七八九十]中[选|标]?候选人)[::]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))[,,]([\u4E00-\u9FA5]{4,20}([((]集团[))])?(有限公司|公司))$")
 )
 
 /*
@@ -133,84 +124,74 @@ var (
  *from 来源
  */
 func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool, codeSite string) []map[string]interface{} {
-	if clearSpace2.MatchString(text){
+	if clearSpace2.MatchString(text) {
 		text = TextAfterRemoveTable(text)
-	}//评得分估|标的|班子成员|人员
-	text = winnerRegclear.ReplaceAllString(text,"")
-	if nswinnertabletag.MatchString(text) && !winnerReg0.MatchString(text){
+	} //评得分估|标的|班子成员|人员
+	text = winnerRegclear.ReplaceAllString(text, "")
+	if nswinnertabletag.MatchString(text) && !winnerReg0.MatchString(text) {
 		return []map[string]interface{}{}
 	}
 
-	if codeSite=="sh_shszfhcxjsglwyh_jsgc_zhbhxrgs" {
-		text = winnerReg80.ReplaceAllString(text,"\n${2}:${1}${4} 中标金额:${3} 万元\n${6}:${5}${8} 中标金额:${7} 万元\n")
-		text = winnerReg81.ReplaceAllString(text,"\n${3}:${1}${2} 中标金额:${4} 万元\n${7}:${5}${6} 中标金额:${8} 万元\n")
-		text = winnerReg82.ReplaceAllString(text,"\n${2}${5}:${1}${4} 中标金额:${3} 万元\n${7}${10}:${6}${9} 中标金额:${8} 万元\n")
+	if codeSite == "sh_shszfhcxjsglwyh_jsgc_zhbhxrgs" {
+		text = winnerReg80.ReplaceAllString(text, "\n${2}:${1}${4} 中标金额:${3} 万元\n${6}:${5}${8} 中标金额:${7} 万元\n")
+		text = winnerReg81.ReplaceAllString(text, "\n${3}:${1}${2} 中标金额:${4} 万元\n${7}:${5}${6} 中标金额:${8} 万元\n")
+		text = winnerReg82.ReplaceAllString(text, "\n${2}${5}:${1}${4} 中标金额:${3} 万元\n${7}${10}:${6}${9} 中标金额:${8} 万元\n")
 	}
 
-
 	//指定清理-替换-影响抽取候选人
-	text = cleanWinnerReg2.ReplaceAllString(text,"${1}中标候选人:")
-
+	text = cleanWinnerReg2.ReplaceAllString(text, "${1}中标候选人:")
 
 	//单位类型
 	text = winnerReg5.ReplaceAllString(text, "\n${3}:${1}\n")
-	text = winnerReg20.ReplaceAllString(text,"\n${4}:${2}\n")
-	text = winnerReg21.ReplaceAllString(text,"\n${3}成交候选人:${1}\n")
-	text = strings.ReplaceAll(text,"有\n限公司","有限公司")
-	text = winnerReg22.ReplaceAllString(text,"\n中标候选人第${2}名:${1}\n")
-	text = winnerReg23.ReplaceAllString(text,"\n${3}:${1}${2}\n")
-	text = winnerReg24.ReplaceAllString(text,"\n${4}:${1}\n")
+	text = winnerReg20.ReplaceAllString(text, "\n${4}:${2}\n")
+	text = winnerReg21.ReplaceAllString(text, "\n${3}成交候选人:${1}\n")
+	text = strings.ReplaceAll(text, "有\n限公司", "有限公司")
+	text = winnerReg22.ReplaceAllString(text, "\n中标候选人第${2}名:${1}\n")
+	text = winnerReg23.ReplaceAllString(text, "\n${3}:${1}${2}\n")
+	text = winnerReg24.ReplaceAllString(text, "\n${4}:${1}\n")
 
 	//带金额
-	text = winnerReg8.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
-	text = winnerReg9.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
-	text = winnerReg10.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${2}\n")
-	text = winnerReg11.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
-	text = winnerReg12.ReplaceAllString(text,"\n${1}:${4}\n中标金额:${6}\n")
-	text = winnerReg13.ReplaceAllString(text,"\n${1}${2}:${3}\n中标金额:${4}\n")
-	text = winnerReg15.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
-	text = winnerReg16.ReplaceAllString(text,"\n第${2}${1}:${3}\n中标金额:${4}\n")
-	text = winnerReg17.ReplaceAllString(text,"\n中标金额:${2}\n")
-	text = winnerReg18.ReplaceAllString(text,"\n${1}${2}名:${3}\n中标金额:${5}\n")
-	text = winnerReg19.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
-
-
+	text = winnerReg8.ReplaceAllString(text, "\n${1}:${3}\n中标金额:${5}\n")
+	text = winnerReg9.ReplaceAllString(text, "\n${1}:${3}\n中标金额:${5}\n")
+	text = winnerReg10.ReplaceAllString(text, "\n${1}:${3}\n中标金额:${2}\n")
+	text = winnerReg11.ReplaceAllString(text, "\n${1}:${3}\n中标金额:${5}\n")
+	text = winnerReg12.ReplaceAllString(text, "\n${1}:${4}\n中标金额:${6}\n")
+	text = winnerReg13.ReplaceAllString(text, "\n${1}${2}:${3}\n中标金额:${4}\n")
+	text = winnerReg15.ReplaceAllString(text, "\n${1}:${3}\n中标金额:${5}\n")
+	text = winnerReg16.ReplaceAllString(text, "\n第${2}${1}:${3}\n中标金额:${4}\n")
+	text = winnerReg17.ReplaceAllString(text, "\n中标金额:${2}\n")
+	text = winnerReg18.ReplaceAllString(text, "\n${1}${2}名:${3}\n中标金额:${5}\n")
+	text = winnerReg19.ReplaceAllString(text, "\n${1}:${3}\n中标金额:${5}\n")
 
 	//带金额
-	text = winnerReg_1.ReplaceAllString(text,"\n${1}:${2} 中标金额:${3}\n")
-	text = winnerReg_2.ReplaceAllString(text,"\n${1}:${2} 中标金额:${3}\n")
-	text = winnerReg_3.ReplaceAllString(text,"\n${1}:${4} 中标金额:${6}\n")
-	text = winnerReg_4.ReplaceAllString(text,"\n${1}:${2} 中标金额:${5}\n")
-
-
-
-
-
+	text = winnerReg_1.ReplaceAllString(text, "\n${1}:${2} 中标金额:${3}\n")
+	text = winnerReg_2.ReplaceAllString(text, "\n${1}:${2} 中标金额:${3}\n")
+	text = winnerReg_3.ReplaceAllString(text, "\n${1}:${4} 中标金额:${6}\n")
+	text = winnerReg_4.ReplaceAllString(text, "\n${1}:${2} 中标金额:${5}\n")
+
+	//结构转换-与识别
+	if winnerReg_11.MatchString(text) {
+		text = winnerReg_11_1.ReplaceAllString(text, "\n第${1}中标候选人:${2} 中标金额:${3}\n")
+	}
 
 	//中标金额格式化
-	text = winnerReg100.ReplaceAllString(text,"中标金额:${1}")
-
+	text = winnerReg100.ReplaceAllString(text, "中标金额:${1}")
 	//特殊格式
-	text = winnerReg14_1.ReplaceAllString(text,"\n中标候选人${1}名:${2}\n中标金额:${3}\n")
-
-
+	text = winnerReg14_1.ReplaceAllString(text, "\n中标候选人${1}名:${2}\n中标金额:${3}\n")
 
-	if winnerReg50.MatchString(text) && strings.Contains(text,"中标候选人公示") {
-		text = winnerReg50.ReplaceAllString(text,"\n中标候选人${1}\n中标单位:${2}\n中标金额:${3}\n")
+	if winnerReg50.MatchString(text) && strings.Contains(text, "中标候选人公示") {
+		text = winnerReg50.ReplaceAllString(text, "\n中标候选人${1}\n中标单位:${2}\n中标金额:${3}\n")
 	}
 
-	text = winnerReg51.ReplaceAllString(text,"${1}${2}")
-	text = winnerReg52.ReplaceAllString(text,"${1}${2}")
-
-	text = winnerReg53.ReplaceAllString(text,"\n${1}:${4} 中标金额:${8} 万元\n${2}:${5} 中标金额:${10} 万元\n")
-	text = winnerReg54.ReplaceAllString(text,"\n${1}:${2} 中标金额:${4}\n")
-	text = winnerReg55.ReplaceAllString(text,"\n第${2}${1}\n中标单位:${3} 中标金额:${4}\n第${5}${1}\n中标单位:${6} 中标金额:${7}\n\n")
+	text = winnerReg51.ReplaceAllString(text, "${1}${2}")
+	text = winnerReg52.ReplaceAllString(text, "${1}${2}")
 
+	text = winnerReg53.ReplaceAllString(text, "\n${1}:${4} 中标金额:${8} 万元\n${2}:${5} 中标金额:${10} 万元\n")
+	text = winnerReg54.ReplaceAllString(text, "\n${1}:${2} 中标金额:${4}\n")
+	text = winnerReg55.ReplaceAllString(text, "\n第${2}${1}\n中标单位:${3} 中标金额:${4}\n第${5}${1}\n中标单位:${6} 中标金额:${7}\n\n")
 
 	//清洗一下影响候选人-抽取的文字
-	text = cleanWinnerReg1.ReplaceAllString(text,"")
-
-
+	text = cleanWinnerReg1.ReplaceAllString(text, "")
 
 	text = clearSpace1.ReplaceAllString(text, "") //清理(1)	单位名称:成都维诺信科技有限公司-->单位名称:成都维诺信科技有限公司
 	if strings.TrimSpace(text) == "" {
@@ -275,7 +256,7 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 		for _, v := range array {
 			var wrfg *WinnerFlag
 			if isWinnerReg1 {
-				if v[4]<0 || v[5]<0 {
+				if v[4] < 0 || v[5] < 0 {
 					continue
 				}
 				wrfg = &WinnerFlag{
@@ -284,7 +265,7 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 					textEnd:   v[5],
 				}
 			} else {
-				if v[2]<0 || v[3]<0 {
+				if v[2] < 0 || v[3] < 0 {
 					continue
 				}
 				wrfg = &WinnerFlag{
@@ -357,7 +338,7 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 			if !winnerNoSplitReg.MatchString(v) {
 				v = toWarpReg.ReplaceAllString(v, "\n")
 				text += v
-			}else {
+			} else {
 				text += v
 			}
 
@@ -402,9 +383,9 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 					object["sortstr"] = thisNumberReg.FindString(k)
 					object["type"] = i
 				}
-			}else { //中标金额  - 折扣率系数-待定
+			} else { //中标金额  - 折扣率系数-待定
 				findOfferFlag := false
-				if offerReg.MatchString(k) && !nofferReg.MatchString(k){
+				if offerReg.MatchString(k) && !nofferReg.MatchString(k) {
 					findOfferFlag = true
 				} else {
 					kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"}, isSite, codeSite)
@@ -413,12 +394,12 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 					}
 				}
 				//找到了中标金额
-				if findOfferFlag && object["entname"] != nil  {
+				if findOfferFlag && object["entname"] != nil {
 					val := wo.clear("中标金额", v+GetMoneyUnit(k, v))
 					if val != nil && !nobidValReg.MatchString(qutil.ObjToString(val)) {
 						moneys := clear.ObjToMoney([]interface{}{val, ""})
 						if len(moneys) > 0 {
-							if vf, ok := moneys[0].(float64); ok &&  moneys[len(moneys)-1].(bool){
+							if vf, ok := moneys[0].(float64); ok && moneys[len(moneys)-1].(bool) {
 								object["price"] = float64(vf)
 							}
 						}

+ 8 - 4
src/jy/util/util.go

@@ -261,11 +261,15 @@ func ConvertInterface(t interface{}) []string {
 
 func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 	p_list := []map[string]interface{}{}
-	if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
-		p_list = qu.ObjArrToMapArr(yl_list_1)
+	if yl_list_0, ok_0 := t.([]map[string]interface{}); ok_0 {
+		return yl_list_0
 	} else {
-		if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
-			p_list = qu.ObjArrToMapArr(yl_list_2)
+		if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
+			p_list = qu.ObjArrToMapArr(yl_list_1)
+		} else {
+			if yl_list_2, ok_2 := t.([]interface{}); ok_2 {
+				p_list = qu.ObjArrToMapArr(yl_list_2)
+			}
 		}
 	}
 	return p_list

+ 7 - 7
src/main.go

@@ -6,7 +6,6 @@ import (
 	_ "jy/admin/audit"
 	_ "jy/admin/distribution"
 	_ "jy/admin/task"
-	"jy/clear"
 	"jy/extract"
 	_ "jy/front"
 	. "jy/router"
@@ -14,6 +13,7 @@ import (
 	"net/http"
 	_ "net/http/pprof"
 	qu "qfw/util"
+	"regexp"
 )
 
 func init() {
@@ -29,6 +29,7 @@ func init() {
 }
 
 func main() {
+	testMain()
 	extract.ExtractUdpUpdateMachine() //节点上传~构建
 	extract.ExtractUdp()              //udp通知抽取
 	go Router.Run(":" + qu.ObjToString(u.Config["port"]))
@@ -42,10 +43,9 @@ func main() {
 
 //验证规则
 func testMain() {
-	text := "2023年5月10日下午1点30分"
-	if data := clear.ObjToTimestamp([]interface{}{text}, ""); len(data) > 0 {
-		if ext_publishtime := qu.Int64All(data[0]); ext_publishtime > int64(0) {
-			log.Debug(ext_publishtime)
-		}
-	}
+	con := `。
+	撒大声地
+`
+	var pkgTableReg1 = regexp.MustCompile("(第[一二三1-3]包)(.{0,4}通信用户)[::](.{4,25}公司)")
+	con = pkgTableReg1.ReplaceAllString(con, "${1}\n中标单位:${2}")
 }

+ 3 - 8
src/res/tablev1.json

@@ -5,10 +5,9 @@
 		"(名单|证号|名称|要求|时间|日期|地点|单位|条款|机构|范围|情况|概况|品名|规格|参数|标准|指标|型号|限价|数量|方式|等级|依据|明细|概况|内容|次数|产品|性质|地区|地址|币种|主题|详情|说明|代理(公司|机构)|节支率|名单|结果|结果公示)$|^(职称|姓名|级别|职称专业)$__",
 		"^(包号|联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
 		"(专家|评委|[打得]分|附件材料)[a-zA-Z0-9]*$__M",
-		"(基本需求.{0,15}|.*联系方式|总计|包组|证书名称|证书编号|合同包|排序|二级建造师|项目负责人及资格证书编号)__M",
+		"(基本需求.{0,15}|.*联系方式|总计|包组^[一二三123]|总监|经理|负责人|证书名称|证书编号|合同包|排序|二级建造师|项目负责人及资格证书编号)__M",
 		"(品牌|份额|姓名|起讫桩号|服务期|建设期限|限价|邮编|面积|组织形式|发布单位|招标方式|修建宽度|类别|备注|合计|电话|评审|原因|行业|价格|注册资金|印刷服务|业绩奖项)__",
-		"[\\d]+标段$__M",
-		"(\\W{2,10}(名称|参数[及]?要求))$__M"
+		"(\\W{2,10}(名称|参数[及]?要求)|[\\d]+标段)$__M"
 	],
 	"jghead":[
 		"(报价|(元(/人|/间/天))|中转|航延|代理机构名称|地址和联系方式|联系电话|项目负责人及注册号|不含税预算金额\n(万元))__M",
@@ -36,11 +35,7 @@
 		"(业绩|资质|原因|相关资料)$__",
 		"([废流落]标|评审)(原因|情况)__",
 		"(中标|成交)(候选)*(人|供应商|单位)((类似)*业绩|资质)__",
-		"否决投标情况",
-		"落标供应商及落标原因",
-		"被废标供应商名称",
-		"主要人员",
-		"年估算额年(万元)"
+		"(否决投标情况|落标供应商及落标原因|被废标供应商名称|主要人员|年估算额年[((]万元[))])__"
 	],
 	"bidorder":[
 		"(.{0,8}排[序名]$|名次|^序号$)__sort",

+ 1 - 1
udpcontrol/src/method.go

@@ -123,7 +123,7 @@ func lastUdpMonitoring() {
 	}
 }
 
-//监控~
+//监控~下节点
 func nextUdpMonitoring() {
 	for {
 		udptaskmap.Range(func(k, v interface{}) bool {

+ 3 - 2
udpcontrol/src/updprocess.go

@@ -167,12 +167,13 @@ func sendStopExtractNode(splitArr []map[string]interface{}) {
 func sendNextNode(sid string, eid string) {
 	//更新记录状态
 	updateProcessUdpIdsInfo(sid, eid)
-
 	for _, node := range nextNode {
+		key := sid + "-" + eid + "-" + qu.ObjToString(node["stype"])
 		by, _ := json.Marshal(map[string]interface{}{
 			"gtid":  sid,
 			"lteid": eid,
 			"stype": qu.ObjToString(node["stype"]),
+			"key":   key,
 		})
 		addr := &net.UDPAddr{
 			IP:   net.ParseIP(node["addr"].(string)),
@@ -183,7 +184,7 @@ func sendNextNode(sid string, eid string) {
 		//只监控清洗流程
 		if qu.IntAll(node["port"]) == 1799 {
 			new_node := &udpNode{by, addr, time.Now().Unix()}
-			udptaskmap.Store(string(by), new_node)
+			udptaskmap.Store(key, new_node)
 		}
 	}
 	log.Debug("udp通知抽取完成...通知下阶段udp-敏感词,补城市", sid, "~", eid)

+ 0 - 53
udpcontrol/src/zk_taskmail.go

@@ -1,53 +0,0 @@
-package main
-//
-//import (
-//	"fmt"
-//	"io/ioutil"
-//	"log"
-//	"net/http"
-//	"sync"
-//	"time"
-//)
-//
-//var udptaskmap = &sync.Map{}
-//var tomail string
-//var api string
-//
-//type udpNode struct {
-//	timestamp int64
-//}
-//
-//func checkMailJob() {
-//
-//	//阿里云内网无法发送邮件
-//	jkmail, _ := sysconfig["jkmail"].(map[string]interface{})
-//	if jkmail != nil {
-//		tomail, _ = jkmail["to"].(string)
-//		api, _ = jkmail["api"].(string)
-//	}
-//	for {
-//		udptaskmap.Range(func(k, v interface{}) bool {
-//			now := time.Now().Unix()
-//			node, _ := v.(*udpNode)
-//			if now-node.timestamp >= 600 {
-//				udptaskmap.Delete(k)
-//				res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", api, tomail, "警告:抽取十分钟段落未抽完",k.(string)))
-//				if err == nil {
-//					defer res.Body.Close()
-//					read, err := ioutil.ReadAll(res.Body)
-//					log.Println("控制中心-邮件发送成功:", string(read), err)
-//				}else {
-//					log.Println("控制中心-邮件发送异常:", err)
-//				}
-//			}
-//			return true
-//		})
-//		time.Sleep(60 * time.Second)
-//	}
-//}
-//
-///*
-//key:=fmt.Sprintf("%s~%s",sid,eid)
-//node := &udpNode{time.Now().Unix()}
-//udptaskmap.Store(key, node)
-//*/

Some files were not shown because too many files changed in this diff