Bladeren bron

抽取优化备份 11月华设

zhengkun 2 jaren geleden
bovenliggende
commit
3b4bba8709

+ 3 - 0
extcity/src/mark

@@ -22,3 +22,6 @@
     "company_address": 1
   }
 }
+
+
+

+ 1 - 0
src/config.json

@@ -16,6 +16,7 @@
     "iscltlog": false,
     "brandgoods": false,
     "pricenumber":true,
+    "inscribe": false,
     "udpport": "6601",
     "udptaskid": "60b493c2e138234cb4adb640",
     "nextNode": [],

+ 14 - 8
src/jy/clear/cutspace.go

@@ -17,6 +17,7 @@ var (
 )
 
 var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n", "\u0001"}
+
 func init() {
 	cutSpace, _ = regexp.Compile(`^[\s]*|[\s]*$`)
 	cutAllSpace, _ = regexp.Compile(`\s*`)
@@ -77,7 +78,7 @@ func CutLableStr(con string) string {
 }
 
 //清理开始、结尾的空白字符
-func CutSpace(data []interface{},spidercode ...string) []interface{} {
+func CutSpace(data []interface{}, spidercode ...string) []interface{} {
 	tmp := cutSpace.ReplaceAllString(strings.Replace(fmt.Sprint(data[0]), " ", " ", -1), "")
 	tmp = replaceSymbol(tmp, spaces)
 	//fmt.Println("cutspace", tmp)
@@ -86,7 +87,7 @@ func CutSpace(data []interface{},spidercode ...string) []interface{} {
 }
 
 //清理所有空白符
-func CutAllSpace(data []interface{},spidercode ...string) []interface{} {
+func CutAllSpace(data []interface{}, spidercode ...string) []interface{} {
 	tmp := cutAllSpace.ReplaceAllString(fmt.Sprint(data[0]), "")
 	tmp = replaceSymbol(tmp, spaces)
 	data[0] = tmp
@@ -94,7 +95,7 @@ func CutAllSpace(data []interface{},spidercode ...string) []interface{} {
 }
 
 //清理尾部符号
-func ClearEndSymblo(data []interface{},spidercode ...string) []interface{} {
+func ClearEndSymblo(data []interface{}, spidercode ...string) []interface{} {
 	text := fmt.Sprint(data[0])
 	for i := 0; i <= 2; i++ {
 		text = endSymblo.ReplaceAllString(text, "")
@@ -104,7 +105,7 @@ func ClearEndSymblo(data []interface{},spidercode ...string) []interface{} {
 }
 
 //清理符号
-func CutSymbol(data []interface{},spidercode ...string) []interface{} {
+func CutSymbol(data []interface{}, spidercode ...string) []interface{} {
 	value := fmt.Sprint(CutSpace(data)[0])
 	symbol := ",,;;::'\"“”。.\\??、/+=\\_—\\-*&……\\^%$¥@!!`~·"
 	startSymbol := "^[" + ")\\)>》】\\]}}〕" + symbol + "]+"
@@ -113,12 +114,16 @@ func CutSymbol(data []interface{},spidercode ...string) []interface{} {
 	endReg := regexp.MustCompile(endSymbol)
 	value = startReg.ReplaceAllString(value, "")
 	value = endReg.ReplaceAllString(value, "")
+	//替换指定符号
+	value = strings.ReplaceAll(value, "〉", ")")
+	value = strings.ReplaceAll(value, "〈", "(")
+
 	value = fmt.Sprint(CutSpace([]interface{}{value, data[1]})[0])
 	return []interface{}{value, data[1]}
 }
 
 //不成对出现的符号,把符号后面的内容清理掉
-func CutNotPrs(data []interface{},spidercode ...string) []interface{} {
+func CutNotPrs(data []interface{}, spidercode ...string) []interface{} {
 	return childCutNotPrs(data, 1)
 }
 
@@ -162,14 +167,15 @@ func childCutNotPrs(data []interface{}, count int) []interface{} {
 }
 
 //全部是汉字或者特殊符号的情况,清理掉
-func ClearAllWord(data []interface{},spidercode ...string) []interface{} {
+func ClearAllWord(data []interface{}, spidercode ...string) []interface{} {
 	value := fmt.Sprint(data[0])
 	reg := regexp.MustCompile("^[\u4e00-\u9fa5、,,.。??'\"“”‘’·~!@#¥$%…&*()()\\-—+=【】\\[\\]{}{}<>《》|\\/\\s]+$")
 	data[0] = reg.ReplaceAllString(value, "")
 	return data
 }
+
 //中文符号转英文
-func ChiToEng(data []interface{},spidercode ...string) []interface{} {
+func ChiToEng(data []interface{}, spidercode ...string) []interface{} {
 	value := fmt.Sprint(data[0])
 	startChars := []string{"(", "【", "{", "“", ")", "】", "}", "”"}
 	endChars := []string{"(", "[", "{", "\"", ")", "]", "}", "\""}
@@ -184,7 +190,7 @@ func ChiToEng(data []interface{},spidercode ...string) []interface{} {
 	return data
 }
 
-func ClearBuyerPerson(data []interface{},spidercode ...string) []interface{} {
+func ClearBuyerPerson(data []interface{}, spidercode ...string) []interface{} {
 	value := fmt.Sprint(data[0])
 	//tmp := []string{}
 	if len([]rune(value)) > 4 { //名字默认最长4

+ 14 - 4
src/jy/clear/totimestamp.go

@@ -9,7 +9,7 @@ import (
 	"time"
 )
 
-var reg, regA, regB, regC, regD, regAfter ,regAfterBool*regexp.Regexp
+var reg, regA, regB, regC, regD, regAfter, regAfterBool *regexp.Regexp
 
 const (
 	T = 365 * 86400
@@ -44,7 +44,7 @@ func init() {
 2006%01%02%15%04->时间戳
 2006%01%02%15%04%05->时间戳
 */
-func ObjToTimestamp(data []interface{},spidercode ...string) []interface{} {
+func ObjToTimestamp(data []interface{}, spidercode ...string) []interface{} {
 	tmp := fmt.Sprint(data[0])
 	//处理类似:二〇一五年十一月四日十五时
 	cht := regD.FindStringSubmatch(tmp)
@@ -106,11 +106,21 @@ func ObjToTimestamp(data []interface{},spidercode ...string) []interface{} {
 			timestr = tmps[0][0:4] + "-" + tmps[0][4:6] + "-" + tmps[0][6:8] + " " + tmps[0][8:10] + ":" + tmps[0][10:12] + ":" + tmps[0][12:14]
 			t, _ := time.ParseInLocation("2006-01-02 15:04:00", timestr, time.Local)
 			timestamp = t.Unix()
+		} else if len(tmps[0]) == 6 { //202209 ~ 年月
+			timestr = tmps[0][0:4] + "-" + tmps[0][4:6] + "-" + "01"
+			t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
+			timestamp = t.Unix()
 		}
-	} else if len(tmps) == 2 {
+
+	} else if len(tmps) == 2 { //补年月日~
 		timestr = fmt.Sprint(time.Now().Year()) + "-" + MDhmsRepair(tmps[0]) + "-" + MDhmsRepair(tmps[1])
 		t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
 		timestamp = t.Unix()
+		if timestamp <= 0 {
+			timestr = fmt.Sprint(MDhmsRepair(tmps[0]) + "-" + MDhmsRepair(tmps[1]) + "-01")
+			t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
+			timestamp = t.Unix()
+		}
 	} else if len(tmps) == 3 {
 		timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2])
 		t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
@@ -124,7 +134,7 @@ func ObjToTimestamp(data []interface{},spidercode ...string) []interface{} {
 		t, _ := time.ParseInLocation("2006-01-02 15:04", timestr, time.Local)
 		timestamp = t.Unix()
 	}
-	if timestamp <= 0 || timestamp > (time.Now().Unix()+T) {
+	if timestamp <= 0 || timestamp > (time.Now().Unix()+T*8) {
 		data[0] = ""
 	} else {
 		if addreptime > 0 {

+ 171 - 25
src/jy/extract/extract.go

@@ -33,9 +33,26 @@ var (
 	ClearTaskList                                map[string]*ClearTask                  //清理任务列表
 	saveLimit                                    = 100                                  //抽取日志批量保存
 	PageSize                                     = 5000                                 //查询分页
-	Fields                                       = `{"jyfb_data":1,"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"review_experts":1,"purchasing":1}`
-	//Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"new_attach_text":1,"createtime":1,"currency":1,"id":1,"company_email":1,"buyerclass":1,"tagname":1,"company_phone":1,"appid":1,"industry":1,"projectscope":1,"item":1,"s_subscopeclass":1,"matchkey":1,"jybxhref":1,"legal_person":1,"matchtype":1,"review_experts":1,"purchasing":1}`
-	Fields2    = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
+	Fields                                       = `{"jyfb_data":1,"approvecode":1,"approvenumber":1,"projecttype":1,"approvestatus":1,"total_investment":1,"funds":1,"owner":1,"projectaddr":1,"projectperiod":1,"project_scale":1,"project_person":1,"project_phone":1,"project_startdate":1,"project_completedate":1,"construction_area":1,"floor_area":1,"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1,"review_experts":1,"purchasing":1}`
+	Fields2                                      = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
+	NiJianField                                  = []string{
+		"string#approvecode",
+		"string#total_investment",
+		"string#funds",
+		"string#owner",
+		"string#projectaddr",
+		"string#projectperiod",
+		"string#project_scale",
+		"string#project_person",
+		"string#project_phone",
+		"string#approvenumber",
+		"string#projecttype",
+		"string#approvestatus",
+		"time#project_startdate",
+		"time#project_completedate",
+		"map#construction_area",
+		"map#floor_area",
+	}
 	spidercode = map[string]bool{
 		"gd_zhsggzyjyzx_jsgc_fjczbgg":     true,
 		"js_szgyyqggzyjyzx_jsgc_zjfbgs":   true,
@@ -65,8 +82,6 @@ var (
 	}
 )
 
-//var packageUnUsedReg = regexp.MustCompile("1[0-9].投标报价\n1[0-9].1")
-
 //启动测试抽取-、、、、结果追踪
 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool {
 	defer qu.Catch()
@@ -131,7 +146,7 @@ func BsonTOStringId(id interface{}) string {
 	return id.(primitive.ObjectID).Hex()
 }
 
-//开始测试任务抽取
+//开始测试任务抽取~结果追踪
 func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 	n, _ := strconv.Atoi(num)
 	id := IdTrans(startId)
@@ -161,6 +176,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 	} else {
 		return false
 	}
+
 }
 
 //启动抽取
@@ -312,12 +328,17 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		detail = d2
 	}
 	detail = regexp.MustCompile(`<!--[\w\W]*?-->`).ReplaceAllString(detail, "")
+
 	d3, _ := doc["summary"].(string)
 	//全文的需要修复表格
 	detail = pretreated.RepairCon(detail)
 	detail = ju.CutLableStr(d3 + "\n" + detail)
 	detail = cut.ClearHtml(d3 + "\n" + detail)
 
+	if len(detail) < 30 && len(d1) > len(detail) {
+		detail = d1
+	}
+
 	doc["detail"] = detail
 	isClearnMoney := !clearMoneyReg.MatchString(detail)
 	if isClearnMoney {
@@ -686,7 +707,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 					continue
 				}
 
-				if vc.Field == "addressing" {
+				if vc.Field == "winner" {
 					//log.Debug("调试抽取字段")
 				}
 				////抽取-前置规则
@@ -1907,13 +1928,20 @@ var clearWinnerReg = regexp.MustCompile("名称|施工|拟定供应商名称|:
 var unPackageWinnerReg = regexp.MustCompile("(重新招标)")
 
 //包含字母的实体单位
-var letter_entity = regexp.MustCompile("^[\u4E00-\u9FA5]{1,10}[A-Za-z]{1,5}[\u4E00-\u9FA5]{1,10}(公司|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])$")
+var letter_entity = regexp.MustCompile("^[\u4E00-\u9FA5]{1,10}[A-Za-z]{1,5}[\u4E00-\u9FA5]{1,10}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])$")
+
+//落款单位抽取
+var inscribe_entity_1 = regexp.MustCompile("\n([\\s]+)?([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))\n([\\s]+)?([0-9]+年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)")
+var inscribe_entity_2 = regexp.MustCompile("[\n。]([\\s]+)?([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))([\\s]+)?([0-9]+年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)\n([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))")
+
+var exclude_entity = regexp.MustCompile("(咨询|工程造价|交易|代理|投资|(管理|工程)有限|(项目|工程)管理|采购|监理|服务|招标|招投标)")
 
 //特殊金额-处理判断-倍率关系
 func calculateAbnormalMoney(val []*ju.ExtField) (bool, int) {
 	//金额结果只有两种 - 倍率关系10000 - 过10E
 	moneyIndex := []int{}
 	moneyArr := []float64{}
+	first_money := float64(0)
 	difValue := map[string]interface{}{}
 	for k, v := range val { //取第一个非负数,项目名称除外
 		if v.IsTrue && v.Score > -1 {
@@ -1928,9 +1956,9 @@ func calculateAbnormalMoney(val []*ju.ExtField) (bool, int) {
 			if difValue[key] == nil {
 				difValue[key] = 1
 			}
-			if len(difValue) > 2 {
-				return false, 0
-			}
+			//if len(difValue) > 2 {
+			//	return false, 0
+			//}
 		}
 	}
 	//计算金额数组
@@ -1965,7 +1993,25 @@ func calculateAbnormalMoney(val []*ju.ExtField) (bool, int) {
 				}
 			}
 		}
+	} else if len(difValue) > 2 { //多组金额
+		is_exists := false
+		for _, v := range moneyArr {
+			if v >= 1000000000 {
+				is_exists = true
+				first_money = v
+			}
+		}
+		if is_exists {
+			for k, v := range moneyArr {
+				if v*10000 == first_money {
+					return true, moneyIndex[k]
+				}
+			}
+		}
+	} else {
+
 	}
+
 	return false, 0
 }
 
@@ -2230,7 +2276,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					}
 				}
 			}
-		} else if tmp["winner"] != nil && tmp["winner"] != "" {
+		} else if tmp["winner"] != nil {
 			//没有分包取winner
 			tmp["s_winner"] = tmp["winner"]
 			fieldSource["s_winner"] = fieldSource["winner"]
@@ -2434,13 +2480,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 		}
 
-		//只要项目名称
-		//p_name := qu.ObjToString(tmp["projectname"])
-		//tmp = map[string]interface{}{}
-		//if p_name!="" {
-		//	tmp["projectname"] = p_name
-		//}
-
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
 				delete(tmp, "_id")
@@ -2497,6 +2536,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 
 //检查字段-
 func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[string]interface{} {
+
 	delete(tmp, "contenthtml")
 	delete(tmp, "detail")
 
@@ -2534,12 +2574,19 @@ func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[
 	//金额比例异常-
 	if _, ok := tmp["bidamount"].(string); ok {
 		delete(tmp, "bidamount")
-	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && (fb/10 > qu.Float64All(tmp["budget"]) || qu.Float64All(tmp["budget"])/1000 > fb) {
-		if fb > 1000.0 && fb < 100000000.0 {
-		} else {
-			delete(tmp, "bidamount")
-		}
 	}
+
+	/*
+		else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && (fb/10 > qu.Float64All(tmp["budget"]) || qu.Float64All(tmp["budget"])/1000 > fb) {
+			//比例限制打开
+			if fb > 1000.0 && fb < 100000000.0 {
+
+			} else {
+				delete(tmp, "bidamount")
+			}
+		}
+	*/
+
 	if _, ok := tmp["budget"].(string); ok {
 		delete(tmp, "budget")
 	}
@@ -2566,7 +2613,7 @@ func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[
 				delete(tmp, k)
 			}
 		}
-		if v == "" || len(strings.TrimSpace(fmt.Sprint(v))) == 0 || strings.Contains(fmt.Sprint(v), "**") {
+		if v == "" || len(strings.TrimSpace(fmt.Sprint(v))) == 0 {
 			delete(tmp, k)
 		}
 	}
@@ -2663,9 +2710,108 @@ func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[
 			tmp[k] = v
 		}
 	}
+
+	//return tmp
+
+	//针对拟建单位~需要验证~各种字段优先级
+	if qu.ObjToString(tmp["toptype"]) == "拟建" &&
+		qu.ObjToString(tmp["subtype"]) == "拟建" {
+		nj_record := map[string]interface{}{}
+		for _, v := range NiJianField {
+			arr := strings.Split(v, "#")
+			k_type, k_field := "", ""
+			if len(arr) == 2 {
+				k_type, k_field = arr[0], arr[1]
+			} else {
+				continue
+			}
+			tmpValue := tmp[k_field]
+			is_use := false
+			if k_type == "string" {
+				if qu.ObjToString(j_data[k_field]) != "" {
+					is_use = true
+					tmp[k_field] = qu.ObjToString(j_data[k_field])
+				}
+			} else if k_type == "time" {
+				//开竣工日期~采集为字符串
+				if qu.ObjToString(j_data[k_field]) != "" {
+					//特殊~需要转换
+					new_data := clear.ObjToTimestamp([]interface{}{j_data[k_field]}, "")
+					if len(new_data) > 0 {
+						if qu.Int64All(new_data[0]) > 0 {
+							is_use = true
+							tmp[k_field] = qu.Int64All(new_data[0])
+							//记录历史日期值
+							new_k := "s_" + k_field
+							nj_record[new_k] = map[string]interface{}{
+								k_field: j_data[k_field],
+							}
+						}
+					}
+				}
+			} else if k_type == "map" {
+				p_info := *qu.ObjToMap(j_data["project_scale_info"])
+				if qu.ObjToString(p_info[k_field]) != "" {
+					is_use = true
+					tmp[k_field] = qu.ObjToString(p_info[k_field])
+				}
+			}
+			if tmpValue != nil {
+				nj_record[k_field] = map[string]interface{}{
+					k_field:  tmpValue,
+					"is_use": is_use,
+				}
+			}
+		}
+		if len(nj_record) > 0 {
+			tmp["nj_record"] = nj_record
+		}
+	}
+
+	//落款实体
+	if qu.ObjToString(tmp["buyer"]) == "" && ju.Inscribe &&
+		!(qu.ObjToString(tmp["toptype"]) == "拟建" && qu.ObjToString(tmp["subtype"]) == "拟建") {
+		new_buyer := InscribeEntity(qu.ObjToString(j_data["detail"]))
+		if new_buyer != "" {
+			tmp["buyer"] = new_buyer
+		}
+	}
+
+	//拟建不能存buyer
+	if qu.ObjToString(tmp["toptype"]) == "拟建" &&
+		qu.ObjToString(tmp["subtype"]) == "拟建" {
+		delete(tmp, "buyer")
+	}
+
 	return tmp
 }
 
+func InscribeEntity(detail string) string {
+	//去除标签
+	new_str := ""
+	new_detail := pretreated.TextAfterRemoveTable(detail)
+	if len(new_detail) > 200 {
+		new_detail = detail[len(new_detail)-200:]
+	}
+	new_str = inscribe_entity_1.FindString(new_detail)
+	if new_str == "" {
+		new_str = inscribe_entity_2.FindString(new_detail)
+		if new_str != "" {
+			str1 := inscribe_entity_2.ReplaceAllString(new_str, "${2}")
+			str2 := inscribe_entity_2.ReplaceAllString(new_str, "${6}")
+			if str1 == str2 && str1 != "" {
+				new_str = str1
+			}
+		}
+	} else {
+		new_str = inscribe_entity_1.ReplaceAllString(new_str, "${2}")
+	}
+	if new_str != "" && exclude_entity.MatchString(new_str) {
+		new_str = ""
+	}
+	return new_str
+}
+
 //处理折扣系数-
 func dealWithDiscountBid(tmp map[string]interface{}) float64 {
 	biddiscount := qu.Float64All(tmp["biddiscount"])

+ 20 - 25
src/jy/extract/extractInit.go

@@ -90,10 +90,10 @@ type ExtractTask struct {
 	ResultChanel chan bool //抽取结果详情
 	sync.RWMutex
 	ResultArr [][]map[string]interface {
-	}                   //抽取结果详情
+	} //抽取结果详情
 	BidChanel chan bool //抽取结果
 	BidArr    [][]map[string]interface {
-	}            //抽取结果
+	} //抽取结果
 	BidTotal int //结果数量
 
 	RecogFieldMap map[string]map[string]interface {
@@ -101,7 +101,7 @@ type ExtractTask struct {
 	FidClassMap map[string][]map[string]interface {
 	} //分类
 	CidRuleMap map[string][]map[string]interface {
-	}                    //规则
+	} //规则
 	AuditFields []string //需要审核的字段名称
 
 	SiteCityMap          map[string]*SiteCity //站点对应的省市区
@@ -128,10 +128,10 @@ type ExtractTask struct {
 	PostCodeMap map[string]*PostCode //邮编
 	AreaCodeMap map[string]*AreaCode //区号
 
-	XjbtCityArr          []map[string]interface{}  		//新疆兵团相关数据
-	SensitiveFullCity  		 *sensitive.Filter
-	SensitiveSimCity		 *sensitive.Filter
-	InfoType []map[string]interface {
+	XjbtCityArr       []map[string]interface{} //新疆兵团相关数据
+	SensitiveFullCity *sensitive.Filter
+	SensitiveSimCity  *sensitive.Filter
+	InfoType          []map[string]interface {
 	}
 
 	Trie_Full_Province  *ju.Trie       //省全称 省、直辖市、自治区
@@ -157,7 +157,6 @@ type SiteCity struct {
 	D string //区全称
 }
 
-
 type ClearTaskInfo struct {
 	Name, Version, VersionId     string    //名称、版本、版本id
 	FromDbAddr, FromDB, FromColl string    //清理数据库地址、库名、表名
@@ -222,7 +221,7 @@ func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) {
 //加载任务信息
 func (e *ExtractTask) InitTaskInfo() {
 	task, _ := db.Mgo.FindById("task", e.Id, nil)
-	log.Debug("task", task)
+	log.Debug("task", task, "~", e.Id)
 	if len(*task) > 1 {
 		v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`)
 		strs := strings.Split((*task)["s_mgosavecoll"].(string), "/")
@@ -1035,9 +1034,9 @@ func (e *ExtractTask) InitXjbtCityInfo() {
 	query := map[string]interface{}{}
 	list, _ := db.Mgo.Find("area_xjbt", query, nil, nil, false, -1, -1)
 	arr := []map[string]interface{}{}
-	for _,v := range *list {
-		delete(v,"_id")
-		arr = append(arr,v)
+	for _, v := range *list {
+		delete(v, "_id")
+		arr = append(arr, v)
 	}
 	e.XjbtCityArr = arr
 }
@@ -1047,10 +1046,10 @@ func (e *ExtractTask) InitUpdateSite() {
 	defer qu.Catch()
 	e.SiteCityMap = make(map[string]*SiteCity)
 	for _, v := range InitSite() {
-		site:= qu.ObjToString(v["site"])
-		area:= qu.ObjToString(v["area"])
-		city:= qu.ObjToString(v["city"])
-		district:= qu.ObjToString(v["district"])
+		site := qu.ObjToString(v["site"])
+		area := qu.ObjToString(v["area"])
+		city := qu.ObjToString(v["city"])
+		district := qu.ObjToString(v["district"])
 		if area != "" && area != "全国" && site != "" {
 			s := &SiteCity{
 				P: area,
@@ -1060,13 +1059,10 @@ func (e *ExtractTask) InitUpdateSite() {
 			e.SiteCityMap[site] = s
 		}
 	}
-	log.Debug("有效站点数量:",len(e.SiteCityMap))
-
+	log.Debug("有效站点数量:", len(e.SiteCityMap))
 
 }
 
-
-
 func (e *ExtractTask) InitCityInfo() {
 	defer qu.Catch()
 	e.InitVar() //初始化变量
@@ -1159,9 +1155,9 @@ func (e *ExtractTask) InitCityInfo() {
 				c.Brief = jc_city                 //市简称:杭州
 				e.Trie_Sim_City.AddWords(c.Brief) //加入市简称Trie(k:杭州)
 				e.SensitiveSimCity.AddWord(c.Brief)
-				e.CityMap[qc_city] = c.Brief      //杭州市:杭州
-				e.CityBriefMap[c.Brief] = c       //杭州:市信息{}
-				e.CityFullMap[qc_city] = c        //杭州市:市信息{}
+				e.CityMap[qc_city] = c.Brief //杭州市:杭州
+				e.CityBriefMap[c.Brief] = c  //杭州:市信息{}
+				e.CityFullMap[qc_city] = c   //杭州市:市信息{}
 			}
 			c.P = p
 			if city_alias, ok := vcity["city_alias"].([]interface{}); ok {
@@ -1285,10 +1281,9 @@ func (e *ExtractTask) InitVar() {
 	e.Seg_PCD.LoadDict("./res/pcd.txt")
 	e.Seg_SV.LoadDict("./res/sv.txt")
 
-
 	//新疆兵团-数组
 	if e.XjbtCityArr == nil {
-		e.XjbtCityArr = make([]map[string]interface{},0)
+		e.XjbtCityArr = make([]map[string]interface{}, 0)
 	}
 
 	//敏感词-筛选

+ 0 - 20
src/jy/extract/extractudp.go

@@ -220,20 +220,10 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 			log.Debug("timestr", (*tsk)["timestr"], "count", count1+count2)
 			list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, -1, -1)
 			for _, v := range *list {
-				//if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
-				//	log.Debug(index, qu.BsonIdToSId(v["_id"]), "//去除含敏感词数据")
-				//	continue
-				//}
 				if spidercode[qu.ObjToString(v["spidercode"])] { //临时开标记录
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}
-				//if qu.ObjToString(v["subtype"])!="中标" &&
-				//	qu.ObjToString(v["subtype"])!="成交" &&
-				//	qu.ObjToString(v["subtype"])!="合同" {
-				//	continue
-				//}
-
 				var j, jf *ju.Job
 				var isSite bool
 				if ext.IsFileField && (v["projectinfo"] != nil || v["attach_text"] != nil) {
@@ -248,20 +238,10 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 			}
 			list2, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl+"_back", query, nil, Fields, false, -1, -1)
 			for _, v := range *list2 {
-				//if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
-				//	continue
-				//}
-
 				if spidercode[qu.ObjToString(v["spidercode"])] {
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}
-				//if qu.ObjToString(v["subtype"])!="中标" &&
-				//	qu.ObjToString(v["subtype"])!="成交" &&
-				//	qu.ObjToString(v["subtype"])!="合同" {
-				//	continue
-				//}
-
 				var j, jf *ju.Job
 				var isSite bool
 				if ext.IsFileField && (v["projectinfo"] != nil || v["attach_text"] != nil) {

+ 1 - 0
src/jy/extract/newextractcity.go

@@ -884,6 +884,7 @@ func MergeFullSimScore(j *ju.Job) {
 	for d_text, d_score := range j.SimDistrictScore {
 		j.FullDistrictScore[d_text] = j.FullDistrictScore[d_text] + d_score
 	}
+
 	//	if len(j.FullCityScore) == 0 {
 	//		j.FullCityScore = j.SimCityScore
 	//	} else {

+ 23 - 20
src/jy/extract/score.go

@@ -23,6 +23,7 @@ var (
 	RepeatScore, BlockScore float64
 	CommonScore             map[string]float64
 	FieldsScore             map[string]map[string]float64
+	lengthValidReg          = regexp.MustCompile(`^(.{2}([大|小|中|学][学|院]|公司|某部|学社|大队|党校)|某(部|中心))$`)
 )
 
 func init() {
@@ -117,10 +118,10 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 		}
 		if field == "budget" || field == "bidamount" {
 			for tmpsindex, tmpsvalue := range tmps {
-				if ((strings.Contains(tmpsvalue.RuleText, "总") && !strings.Contains(tmpsvalue.RuleText, "项目总投资"))||strings.Contains(tmpsvalue.Code, "总价")) && tmpsvalue.RuleText!="总价(元)" &&(tmpsvalue.Type == "colon"||tmpsvalue.Type == "table" ) {
+				if ((strings.Contains(tmpsvalue.RuleText, "总") && !strings.Contains(tmpsvalue.RuleText, "总投资")) || strings.Contains(tmpsvalue.Code, "总价")) && tmpsvalue.RuleText != "总价(元)" && (tmpsvalue.Type == "colon" || tmpsvalue.Type == "table") {
 					tmps[tmpsindex].Score += 1
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: field + `value结果含总字+1`, Code: field, Value: tmpsvalue.Value, Score: 1})
-				}else if strings.Contains(qu.ObjToString(tmpsvalue.SourceValue), "㎡"){
+				} else if strings.Contains(qu.ObjToString(tmpsvalue.SourceValue), "㎡") {
 					tmps[tmpsindex].Score -= 10
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: field + `value结果含㎡-10`, Code: field, Value: tmpsvalue.Value, Score: -10})
 				}
@@ -145,7 +146,7 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 
 			//没有抽取到值,不打分
 			if string_value := fmt.Sprint(tmpsvalue.Value); string_value == "" || string_value == "0" || string_value == "<nil>" {
-				if field == "budget" || field == "bidamount"{
+				if field == "budget" || field == "bidamount" {
 					if tmpsvalue.IsTrue {
 						//continue
 					} else {
@@ -192,9 +193,9 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 					titlescore = fieldscore["title"]
 				}
 				//抽取类型打分-针对处理-
-				if field == "bidamount" && j.SpiderCode=="a_hcsccz_cjgg" && tmpsvalue.Type=="table" {
+				if field == "bidamount" && j.SpiderCode == "a_hcsccz_cjgg" && tmpsvalue.Type == "table" {
 					//不打分+3分
-				}else {
+				} else {
 					typescore = fieldscore[tmpsvalue.Type]
 				}
 			} else { //通用抽取属性打分配置
@@ -261,10 +262,9 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 								gt := qu.IntAll(ranges[0])
 								lte := qu.IntAll(ranges[1])
 								//针对指定 buyer -长度0-4 不打分
-								if field=="buyer" || field=="winner" {
-									buyerValue := fmt.Sprint(tmpsvalue.Value)
-									reg := regexp.MustCompile(`^.{2}([大|小|中|学][学|院]|公司|某部)$`)
-									if reg.MatchString(buyerValue) && gt==0 && lte==4 {
+								if field == "buyer" || field == "winner" {
+									tmpValue := fmt.Sprint(tmpsvalue.Value)
+									if lengthValidReg.MatchString(tmpValue) && gt == 0 && lte == 4 {
 										continue
 									}
 								}
@@ -287,11 +287,10 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 						if p, ok := position.(map[string]interface{}); ok {
 							qu.Try(func() {
 								if p["regexp"] != nil {
-									if field=="buyer"|| field=="winner" {
+									if field == "buyer" || field == "winner" {
 										//针对指定 buyer -个别  不打分
-										buyerValue := fmt.Sprint(tmpsvalue.Value)
-										reg := regexp.MustCompile(`^.{2}([大|小|中|学][学|院]|公司|某部)$`)
-										if reg.MatchString(buyerValue) && qu.ObjToString(p["describe"])=="黑名单"{
+										tmpValue := fmt.Sprint(tmpsvalue.Value)
+										if lengthValidReg.MatchString(tmpValue) && qu.ObjToString(p["describe"]) == "黑名单" {
 											return
 										}
 									}
@@ -329,21 +328,25 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 			//4.数据范围打分
 			if scoreRule["type"] == "float" {
 				min := qu.IntAll(scoreRule["min"])
-				max := qu.IntAll(scoreRule["max"])
+				max1 := qu.IntAll(scoreRule["max1"])
+				max2 := qu.IntAll(scoreRule["max2"])
 				val := qu.IntAll(tmpsvalue.Value)
 				scores, _ := scoreRule["score"].([]interface{})
-				if len(scores) < 3 || val < 0 {
+				if len(scores) < 4 || val < 0 {
 					continue
 				}
 				if val < min && 0 < val {
 					tmps[tmpsindex].Score += qu.Float64All(scores[0])
 					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field + ".float", RuleText: fmt.Sprint(val, "<", min, "&&", 0, "<", val), ScoreFrom: "fieldscore.json." + field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[0])})
-				} else if val > max {
-					tmps[tmpsindex].Score += qu.Float64All(scores[2])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field + ".float", RuleText: fmt.Sprint(val, ">", max), ScoreFrom: "fieldscore.json." + field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
-				} else if val <= max && val >= min {
+				} else if val >= max2 {
+					tmps[tmpsindex].Score += qu.Float64All(scores[3])
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field + ".float", RuleText: fmt.Sprint(val, ">=", max2), ScoreFrom: "fieldscore.json." + field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[3])})
+				} else if val < max1 && val >= min {
 					tmps[tmpsindex].Score += qu.Float64All(scores[1])
-					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field + ".float", RuleText: fmt.Sprintln(val, "<=", max, "&&", val, ">=", min), ScoreFrom: "fieldscore.json." + field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field + ".float", RuleText: fmt.Sprintln(val, "<=", max1, "&&", val, ">=", min), ScoreFrom: "fieldscore.json." + field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[1])})
+				} else if val < max2 && val >= max1 {
+					tmps[tmpsindex].Score += qu.Float64All(scores[2])
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: "数据范围打分", Code: field + ".float", RuleText: fmt.Sprintln(val, "<=", max2, "&&", val, ">=", max1), ScoreFrom: "fieldscore.json." + field, Value: tmpsvalue.Value, Score: qu.Float64All(scores[2])})
 				}
 			}
 			//其他打分配置

+ 2 - 2
src/jy/pretreated/analykv.go

@@ -366,7 +366,7 @@ func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, ma
 					break
 				}
 				//if !filter_zbdw_ky.MatchString(k) && filter_zbdw_ky.MatchString(m.Keys[i]) && !IsContactKvHandle(k, matchMap["中标单位"]) {
-				if from == 1 && !ContactType["中标单位"].MatchString(k) && ContactType["中标单位"].MatchString(m.Keys[i]) && !IsContactKvHandle(k, matchMap["中标单位"]) {
+				if from == 1 && !ContactType["中标单位"].MatchString(k) && ContactType["中标单位"].MatchString(m.Keys[i]) && !tablekeyclear2.MatchString(m.Keys[i]) && !IsContactKvHandle(k, matchMap["中标单位"]) {
 					matchMap["中标单位"][k] = true
 					k = "中标单位" + k
 					bf = true
@@ -432,7 +432,7 @@ func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, ma
 			if vvv, ok := m.Map[k].([]string); ok {
 				vals = append(vals, vvv...)
 			} else {
-				vals = append(vals,  util.ObjToString(m.Map[k]))
+				vals = append(vals, util.ObjToString(m.Map[k]))
 			}
 			vals = append(vals, v)
 			m.AddKey(k, vals)

+ 13 - 1
src/jy/pretreated/analystep.go

@@ -19,6 +19,9 @@ var yjReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个
 var blTextReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|业绩奖项|主要人员相关资料|唱标记录|否决投标的?情况说明")
 var unblTextReg *regexp.Regexp = regexp.MustCompile("(项目业绩案例|类似项目业绩)")
 
+//置前规则
+var beforeTextReg *regexp.Regexp = regexp.MustCompile("(招标代理机构|招标单位|招标人)[::].{4,25}\n")
+
 var preConReg1 = regexp.MustCompile("(第[一二三1-3]中标候选人)\n(业绩奖项)")
 var hisReg = regexp.MustCompile("(开标记录|开标记录及投标报价|类似业绩|历史业绩|填报项目业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</td>)")
 var hisReg2 = regexp.MustCompile("(开标记录|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(.*原因及其依据.*[::]?)?[\n]?.*?[\n]?(</tr>|</table>|</td>)")
@@ -42,6 +45,7 @@ var formattext12 = regexp.MustCompile("((成交|中标)价格|本期预算)\n[(
 
 //业绩相关~分割
 var formattext20 = regexp.MustCompile("(工程业绩|投标文件中填报的单位项目业绩名称)[::][\n]?1[、.].*\n2[、.].*\n(3[、.].*\n)?")
+var formattext21 = regexp.MustCompile("(完成过)([ \\s]+)?[一二三1-9]([ \\s]+)?项总投资([0-9〇零点壹贰叁肆伍陆柒捌玖拾佰仟万亿元圆角分整,,\\.]{3,}[万亿元圆角分整]+)")
 
 //特殊文本提取-计算
 var formattext50 = regexp.MustCompile("主要标的数量[::]([0-9.]+)\n主要标的单价[::]([0-9.]+)\n合同金额[::].*\n履约期限")
@@ -142,6 +146,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 
 	//工程业绩描述影响抽取
 	con = formattext20.ReplaceAllString(con, "\n")
+	con = formattext21.ReplaceAllString(con, "")
 
 	//指定爬虫-特殊结构-计算抽取
 	if codeSite == "a_zgzfcgw_zfcghtgg_new" {
@@ -206,7 +211,6 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 				tmp_text := HtmlToText(bl.Text)
 				job.BlockPackage = FindPackageFromText(job.Title, tmp_text, isSite, codeSite)
 			}
-
 			job.Block = append(job.Block, bl)
 		}
 	} else { //未分块,创建分块
@@ -244,7 +248,15 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 		FindProjectCode(bl.Text, job) //匹配项目编号 ~~ 清洗无效信息文本
 		if blTextReg.MatchString(bl.Text) && !unblTextReg.MatchString(bl.Text) {
 			if strings.Index(bl.Text, "业绩") > 1 {
+				//如果有采购单位信息~置前
+				before_arr := []string{}
+				if beforeTextReg.MatchString(bl.Text) {
+					before_arr = beforeTextReg.FindAllString(bl.Text, -1)
+				}
 				bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
+				if len(before_arr) > 0 {
+					bl.Text = strings.Join(before_arr, "\n") + bl.Text
+				}
 			}
 		}
 		//特殊-指定处理-结构转化formattext100

+ 10 - 2
src/jy/pretreated/analytable.go

@@ -26,6 +26,8 @@ var (
 	tabletitleclear2 = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕]*")
 	//清理表格中是key中包含的空格或数字等
 	tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
+	//清理上阶段kv的匹配的短词
+	tablekeyclear2 = regexp.MustCompile("(供应商信用融资|供应商公章|主要标的名称|中标人推荐理由|成交供应商推荐理由)")
 	//清理表格td中的符号
 	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
 	//判断key是金额,对万元的处理
@@ -516,7 +518,7 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 											//hadSort = true
 											smap[vsk]["sortstr"] = vsv
 											smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
-										} else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" { //数据验证val是否是候选人
+										} else if findCandidate2.MatchString(vsv) && tmpEntname[vsk] == "" && k != "采购项目信息" { //数据验证val是否是候选人
 											entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
 											if entname != "" {
 												tmpEntname[vsk] = entname
@@ -1934,7 +1936,6 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 		near.KeyDirect = vdirect
 		td.KVDirect = direct
 		key := repSpace.ReplaceAllString(near.Val, "")
-
 		//临时去掉换行-进行判断
 		tmp_tdVal := strings.ReplaceAll(td.Val, "\n", "")
 
@@ -2009,6 +2010,12 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 				if (table.Tag == "成交候选人" || table.Tag == "中标候选人") &&
 					zbhxrSortReg_1.MatchString(tmpnewnear.Val) {
 					key = "中选候选人" + tmpnewnear.Val
+				} else if table.Tag == "中标情况" {
+					if tmpnewnear.MustBH || tmpnewnear.BH {
+						if tmpnewnear.Val == "标段名称" && findCandidate2.MatchString(td.Val) {
+							key = "中标单位名称"
+						}
+					}
 				} else {
 					if tmpnewnear.MustBH || tmpnewnear.BH {
 						if tmpnewnear.Val == "中标候选人情况" && zbhxrSortReg_3.MatchString(td.Val) {
@@ -2024,6 +2031,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 		if near.Val == "" {
 			key = fmtkey("k", near.TR.RowPos, near.ColPos)
 		}
+
 		val := table.SortKV.Map[key]
 		//qutil.Debug("====================", "key:", key, "val:", val)
 		bthiskey := false

+ 2 - 1
src/jy/pretreated/colonkv.go

@@ -20,7 +20,7 @@ var (
 	regReplKV2     = regexp.MustCompile("(.+?[\u4e00-\u9fa5))][\\s\u3000\u2003\u00a0]*[::].*[((]?[^\r\n\\s\u3000\u2003\u00a0标段包]+?[))]?)([一二三四五六七八九十]+[、..][^一二三四五六七八九十]+?)")
 	regKV          = regexp.MustCompile("([\\p{Han}][^,,。、.;;\r\n]{1,30}?)[::](.*)")
 	filterK        = regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
-	filterValue    = regexp.MustCompile("(^(无)$|^[\r\n\\s\u3000\u2003\u00a0]+$|^<.*>|(完全响应)$)")
+	filterValue    = regexp.MustCompile("(^(无)$|^[\r\n\\s\u3000\u2003\u00a0]+$|^<.*>|(完全响应|普通供应商)$)")
 	filterWinner   = regexp.MustCompile(".{2,40}(集团|公司|学校|中心|家具城|门诊|[大中小]学|部|院|局|厂|店|所|队|社|室|厅|会|场|行)")
 	regReplKey     = regexp.MustCompile("^(包(.+[A-Za-z\\d])?|本项目|推荐|的|本次)|([约为元万亿]+|[大小]写|人民币|[全]称|姓名)$")
 	buyerAndAgency = regexp.MustCompile("(代理(机构|人)|采购(人|单位))")
@@ -951,6 +951,7 @@ func FilterContactKey(key string) string {
 	}
 	key = filterK.ReplaceAllString(key, "")
 	key = tablekeyclear.ReplaceAllString(key, "")
+	key = tablekeyclear2.ReplaceAllString(key, "")
 	return key1 + key
 }
 

+ 44 - 48
src/jy/pretreated/division.go

@@ -96,23 +96,22 @@ var (
 	}
 
 	//非分包中标单位值
- 	unPackageWinnerReg = regexp.MustCompile("(重新招标|方案包)")
-	conformWinnerKVReg = regexp.MustCompile("^(中标人|中标银行|第一名)[::](.{4,20}(分行|公司))")
+	unPackageWinnerReg  = regexp.MustCompile("(重新招标|方案包)")
+	conformWinnerKVReg  = regexp.MustCompile("^(中标人|中标银行|第一名)[::](.{4,20}(分行|公司))")
 	conformWinnerKVReg1 = regexp.MustCompile("^[-].{4,15}公司$")
 	conformWinnerKVReg2 = regexp.MustCompile("(.*)?确定(.*公司)为中标人(.*)?")
 
-    conformWinnerTextReg3 = regexp.MustCompile("拟定供应商信息[::\\s]+名称[::](.*)[\\s]+地址")
-
+	conformWinnerTextReg3 = regexp.MustCompile("拟定供应商信息[::\\s]+名称[::](.*)[\\s]+地址")
 
 	/*
-	拟定供应商信息:
-	名称:郑州人民广播电台
-	地址:郑州市金水区内环路17号A座。
+		拟定供应商信息:
+		名称:郑州人民广播电台
+		地址:郑州市金水区内环路17号A座。
 	*/
 
 	//针对处理-替换敏感词-中标
 	packageReg1 = regexp.MustCompile("(包件[一二三四五1-9][::].*)\n1[、.\\s]+名称[::](.*)\n2[、.\\s]+")
-    packageReg2 = regexp.MustCompile("标段[((]包[))][\\[][O0]+([1-9一二三四五六七八九])[\\]]")
+	packageReg2 = regexp.MustCompile("标段[((]包[))][\\[][O0]+([1-9一二三四五六七八九])[\\]]")
 	packageReg3 = regexp.MustCompile("(中标价格)[::]")
 	packageReg4 = regexp.MustCompile("([1-9](标段)[::])拟定供应商名称[::](.*公司)\n")
 	packageReg5 = regexp.MustCompile("(第[1-9一二三四五](标段))(中标人)[::](.*)\n")
@@ -120,18 +119,18 @@ var (
 
 	//预算
 	packageReg20 = regexp.MustCompile("(最高投标限价为|投资预算约[为]?)([0-9.万元人民币]+)")
-    packageReg21 = regexp.MustCompile("(预算金额|项目预算)[::](包[\\s]?1|1[\\s]?包)[::]?([0-9.万元人民币]+)[,,](包[\\s]?2|2[\\s]?包)[::]?([0-9.万元人民币]+)")
-
+	packageReg21 = regexp.MustCompile("(预算金额|项目预算)[::](包[\\s]?1|1[\\s]?包)[::]?([0-9.万元人民币]+)[,,](包[\\s]?2|2[\\s]?包)[::]?([0-9.万元人民币]+)")
 
-	untitleReg  = regexp.MustCompile("(技术评分明细表)")
-    unpriceReg =  regexp.MustCompile("(^([Xx]\\+[1-9\\.]+元/每)|分析)")
+	untitleReg = regexp.MustCompile("(技术评分明细表)")
+	unpriceReg = regexp.MustCompile("(^([Xx]\\+[1-9\\.]+元/每)|分析)")
 
 	//敏感词-影响分包-替换-分割
 	replaceSenstiveReg1 = regexp.MustCompile("([一二三四五六七八九十1-9][、]项目名称[::].*采购项目)([一二三四五六七八九十1-9][、]采购结果)")
 
 	//价格~单位换行  替换
-	packageReg50  =  regexp.MustCompile("(投标报价[::][0-9.]+)\n(万元)")
+	packageReg50 = regexp.MustCompile("(投标报价[::][0-9.]+)\n(万元)")
 
+	blockValidReg = regexp.MustCompile("(采购人|招标人|代理机构)")
 )
 
 //分块
@@ -139,7 +138,7 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
 	defer qutil.Catch()
 	returnValue := 0
 	var blocks []*util.Block
-	if strings.TrimSpace(content) == "" || codeSite == "a_zgyc_ztbxx" || codeSite=="a_gyzbgfyxgs_zbjg" {
+	if strings.TrimSpace(content) == "" || codeSite == "a_zgyc_ztbxx" || codeSite == "a_gyzbgfyxgs_zbjg" || codeSite == "yn_ynstzxmzxspjgpt_bljggs" {
 		return blocks, -1
 	}
 	//table里面的内容不考虑,先把table清理掉
@@ -283,7 +282,8 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
 		}
 		//过滤
 		if regexp.MustCompile("投标文件格式|业绩").MatchString(title) &&
-			!regexp.MustCompile("拟定的唯一供应商名称").MatchString(title){
+			!regexp.MustCompile("拟定的唯一供应商名称").MatchString(title) &&
+			!blockValidReg.MatchString(blockText) {
 			continue
 		}
 		blockText = hasMergeKV(title, blockText)
@@ -294,7 +294,6 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
 		splitTitles := ProcTitle(title)
 		blockText = mergetext(splitTitles, blockText)
 
-
 		block := &util.Block{
 			Index:  index,     //序号
 			Text:   blockText, //内容
@@ -376,15 +375,15 @@ func mergetext(titles []string, text string) string {
 		if len(lentexts) == 2 {
 			if strings.Contains(titles[i], lentexts[0]) {
 				tt += titles[i] + ":" + lentexts[1] + "\n"
-			}else if strings.Contains(strings.ReplaceAll(titles[i],"的",""), strings.ReplaceAll(lentexts[0],"的","")){
+			} else if strings.Contains(strings.ReplaceAll(titles[i], "的", ""), strings.ReplaceAll(lentexts[0], "的", "")) {
 				tt += titles[i] + ":" + lentexts[1] + "\n"
-			}else if strings.Contains(strings.ReplaceAll(titles[i],"联系地址","地址"), strings.ReplaceAll(lentexts[0],"联系地址","地址")){
+			} else if strings.Contains(strings.ReplaceAll(titles[i], "联系地址", "地址"), strings.ReplaceAll(lentexts[0], "联系地址", "地址")) {
 				tt += titles[i] + ":" + lentexts[1] + "\n"
 			}
-		}else {
+		} else {
 			//特殊处理
-			if strings.Contains(v,"中标人 ") {
-				tt +=v+"\n"
+			if strings.Contains(v, "中标人 ") {
+				tt += v + "\n"
 			}
 		}
 	}
@@ -501,7 +500,7 @@ func getSerialType(content string, blockRegs []*regexp.Regexp) (*regexp.Regexp,
 	for k, v := range blockRegs {
 		indexs := v.FindStringIndex(content)
 		//只用最外层的序号,里面的过滤掉
-		if len(indexs) == 2  && !strings.Contains(content,"中标候选人排序") && !regSpliteSegment.MatchString(strings.TrimSpace(content[indexs[0]:indexs[1]])) && (contentStartIndex == -1 || indexs[0] < contentStartIndex) {
+		if len(indexs) == 2 && !strings.Contains(content, "中标候选人排序") && !regSpliteSegment.MatchString(strings.TrimSpace(content[indexs[0]:indexs[1]])) && (contentStartIndex == -1 || indexs[0] < contentStartIndex) {
 			regSerialTitleIndex = k
 			contentStartIndex = indexs[0]
 			regContenSerialTitle = v
@@ -675,7 +674,7 @@ func findWinnerBugetBidmountByKv(v *util.BlockPackage, blockPackage map[string]*
 						blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
 					}
 				}
-			} else if (kc == "中标金额"||kc=="各包中标/成交候选供应商及报价") && v.Bidamount <= 0 {
+			} else if (kc == "中标金额" || kc == "各包中标/成交候选供应商及报价") && v.Bidamount <= 0 {
 				//特殊金额类可避免
 				if unpriceReg.MatchString(cv[0].Value) {
 					continue
@@ -690,12 +689,12 @@ func findWinnerBugetBidmountByKv(v *util.BlockPackage, blockPackage map[string]*
 						blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
 					}
 				}
-			} else if (kc == "中标单位"||kc=="第1    名"||kc=="各包中标/成交候选供应商及报价") && v.Winner == "" {
+			} else if (kc == "中标单位" || kc == "第1    名" || kc == "各包中标/成交候选供应商及报价") && v.Winner == "" {
 				if !unPackageWinnerReg.MatchString(cv[0].Value) {
-					isW:=false
-					if len(cv)>1 {
-						for _,v_cv :=range cv{
-							if v_cv.Key=="中标单位" && v_cv.Value!="" {
+					isW := false
+					if len(cv) > 1 {
+						for _, v_cv := range cv {
+							if v_cv.Key == "中标单位" && v_cv.Value != "" {
 								isW = true
 								blockPackage[k].Winner = v_cv.Value
 								break
@@ -706,21 +705,21 @@ func findWinnerBugetBidmountByKv(v *util.BlockPackage, blockPackage map[string]*
 						blockPackage[k].Winner = cv[0].Value
 					}
 				}
-			}else { //特殊情况-特殊处理
+			} else { //特殊情况-特殊处理
 				res := conformWinnerKVReg.FindAllStringSubmatch(cv[0].Value, -1)
 				if len(res) > 0 {
 					text := res[0][2]
-					if text!="" {
+					if text != "" {
 						blockPackage[k].Winner = text
 						continue
 					}
 				}
-				if kc=="中标信息" && conformWinnerKVReg1.MatchString(cv[0].Value){
+				if kc == "中标信息" && conformWinnerKVReg1.MatchString(cv[0].Value) {
 					blockPackage[k].Winner = cv[0].Value
 					continue
 				}
 				if conformWinnerKVReg2.MatchString(cv[0].Value) {
-					blockPackage[k].Winner = conformWinnerKVReg2.ReplaceAllString(cv[0].Value,"${2}")
+					blockPackage[k].Winner = conformWinnerKVReg2.ReplaceAllString(cv[0].Value, "${2}")
 					continue
 				}
 
@@ -728,7 +727,7 @@ func findWinnerBugetBidmountByKv(v *util.BlockPackage, blockPackage map[string]*
 				res = conformWinnerTextReg3.FindAllStringSubmatch(v.Text, -1)
 				if len(res) > 0 {
 					text := res[0][1]
-					if text!="" {
+					if text != "" {
 						blockPackage[k].Winner = text
 						continue
 					}
@@ -785,7 +784,7 @@ func FindPackageFromText(title string, content string, isSite bool, codeSite str
 func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool, isSite bool, codeSite string) (bool, string) {
 
 	//查找知否有分包
-	content  = replaceSenstiveReg1.ReplaceAllString(content,"$1\n$2")
+	content = replaceSenstiveReg1.ReplaceAllString(content, "$1\n$2")
 	content = regFJWarap.ReplaceAllString(content, "\n")
 	content = regAZWarap.ReplaceAllString(content, "\n")
 	content = regStrWrap.ReplaceAllString(content, "\n")
@@ -793,24 +792,21 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	content = regEndWrap.ReplaceAllString(content, "")
 	content = regBZJWarap.ReplaceAllString(content, "")
 	//替换敏感词
-	content = packageReg1.ReplaceAllString(content,"${1}\n中标单位:${2}\n")
-	content = packageReg2.ReplaceAllString(content,"\n标段${1}:")
-	content = packageReg3.ReplaceAllString(content,"\n${1}:")
-	content = packageReg4.ReplaceAllString(content,"\n${1}\n中标单位:${3}\n")
-	content = packageReg5.ReplaceAllString(content,"\n${1}\n中标单位:${4}\n")
-	content  = packageReg6.ReplaceAllString(content,"\n$2\n中标单位:$1\n$4\n中标单位:$3")
+	content = packageReg1.ReplaceAllString(content, "${1}\n中标单位:${2}\n")
+	content = packageReg2.ReplaceAllString(content, "\n标段${1}:")
+	content = packageReg3.ReplaceAllString(content, "\n${1}:")
+	content = packageReg4.ReplaceAllString(content, "\n${1}\n中标单位:${3}\n")
+	content = packageReg5.ReplaceAllString(content, "\n${1}\n中标单位:${4}\n")
+	content = packageReg6.ReplaceAllString(content, "\n$2\n中标单位:$1\n$4\n中标单位:$3")
 
 	//替换换行金额
-	content  = packageReg50.ReplaceAllString(content,"$1$2")
+	content = packageReg50.ReplaceAllString(content, "$1$2")
 
-
-
-	content = packageReg20.ReplaceAllString(content,"\n预算金额:${2}\n")
-	content = packageReg21.ReplaceAllString(content,"\n${2}\n预算金额:${3}\n${4}\n预算金额:${5}")
+	content = packageReg20.ReplaceAllString(content, "\n预算金额:${2}\n")
+	content = packageReg21.ReplaceAllString(content, "\n${2}\n预算金额:${3}\n${4}\n预算金额:${5}")
 	//6、项目预算:1包3689028.00元,2包700000.00元。
 
-
-	if untitleReg.MatchString(title){
+	if untitleReg.MatchString(title) {
 		return false, ""
 	}
 	con, pkg, flag := CheckMultiPackage(content) //找pkg分包包名
@@ -854,7 +850,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	startEndMap := map[int]int{}
 	pkgIndexMap := map[string][]int{}
 	indexPkgMap := map[int]string{}
-	
+
 	//小标题
 	titleindexs := indexTile.FindAllStringIndex(con, -1)
 	if len(titleindexs) == 0 {

+ 2 - 1
src/jy/pretreated/multipackage.go

@@ -45,7 +45,8 @@ var (
 	clearPkgFlag = regexp.MustCompile("^[\\-]+|[\\-]+$")
 
 	//无效冗余包名
-	cleanPkgName = regexp.MustCompile("^(1[-][23456789]包|一[-][二三四五六七八九]包)$")
+	cleanPkgName = regexp.MustCompile("^(1[-][23456789]包|一[-][二三四五六七八九]包|(施工|监理)BIM)$")
+	cleanPkgCon  = regexp.MustCompile("(不足三家.*包采购.*终止)")
 )
 
 //判断分包

+ 6 - 5
src/jy/util/util.go

@@ -39,12 +39,12 @@ var GoodsGet *DFA     //商品
 var BrandGet *DFA     //品牌
 var IsBrandGoods bool //是否开启品牌抽取
 
-var SaveResult, FieldsFind, IsSaveTag, SaveBlock, QualityAudit, Ffield bool
+var SaveResult, FieldsFind, IsSaveTag, SaveBlock, QualityAudit, Ffield, Inscribe bool
 var AddrsSess *mgo.Collection
-var QyxySess  *mgo.Collection
+var QyxySess *mgo.Collection
 
-var IsUpdateSite	bool
-var IsUpdateRuleTag	bool
+var IsUpdateSite bool
+var IsUpdateRuleTag bool
 
 func init() {
 	syncint = make(chan bool, 1)
@@ -56,7 +56,7 @@ func UtilInit() {
 	dbname := qu.ObjToString(Config["dbname"])
 	Mgo = MgoFactory(initCap, initCap*3, 120, addr, dbname)
 	AddrsSess = Mgo.Get().DB(qu.ObjToString(Config["dbname_addrs"])).C(qu.ObjToString(Config["dbname_addrs_c"]))
-	QyxySess  = Mgo.Get().DB(qu.ObjToString(Config["dbname_addrs"])).C("qyxy_std")
+	QyxySess = Mgo.Get().DB(qu.ObjToString(Config["dbname_addrs"])).C("qyxy_std")
 
 	SaveResult, _ = Config["saveresult"].(bool)
 	FieldsFind, _ = Config["fieldsfind"].(bool)
@@ -64,6 +64,7 @@ func UtilInit() {
 	SaveBlock, _ = Config["saveblock"].(bool)
 	QualityAudit, _ = Config["qualityaudit"].(bool)
 	Ffield, _ = Config["ffield"].(bool)
+	Inscribe, _ = Config["inscribe"].(bool)
 	PriceNumberReg = make(map[string]*regexp.Regexp)
 	for k, v := range PriceNumberConfig {
 		PriceNumberReg[k] = regexp.MustCompile(v)

+ 0 - 5
src/main.go

@@ -38,7 +38,6 @@ func main() {
 	}()
 	lock := make(chan bool)
 	<-lock
-
 }
 
 //验证规则
@@ -53,7 +52,3 @@ func testMain() {
 		log.Debug("不匹配")
 	}
 }
-
-
-
-

+ 18 - 4
src/res/fieldscore.json

@@ -860,11 +860,13 @@
     "bidamount": {
         "type": "float",
         "describe": "min>val:1,min<=val<=max:3,max<val:1",
-        "min": 100,
-        "max": 100000000000,
+        "min": 1000,
+        "max1": 1000000000,
+        "max2": 10000000000,
         "score": [
-            -3,
+            -2,
             2,
+            1,
             -3
         ]
     },
@@ -872,10 +874,12 @@
         "type": "float",
         "describe": "min>val:1,min<=val<=max:3,max<val:1",
         "min": 1000,
-        "max": 10000000000,
+        "max1": 1000000000,
+        "max2": 10000000000,
         "score": [
             -3,
             2,
+            1,
             -3
         ]
     },
@@ -1041,6 +1045,16 @@
             }
         ]
     },
+    "owner": {
+        "type": "string",
+        "negativewords": [
+            {
+                "describe": "包含*符号",
+                "regstr": "(\\*)",
+                "score": -2
+            }
+        ]
+    },
     "qualifies": {
         "type": "string",
         "positivewords": [

+ 2 - 2
src/res/tablev1.json

@@ -6,7 +6,7 @@
 		"^(包号|联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
 		"(专家|评委|[打得]分|附件材料)[a-zA-Z0-9]*$__M",
 		"(基本需求.{0,15}|.*联系方式|总计|包组|证书名称|证书编号|合同包|排序|二级建造师|项目负责人及资格证书编号)__M",
-		"(品牌|姓名|起讫桩号|服务期|限价|邮编|面积|组织形式|招标方式|修建宽度|类别|备注|合计|电话|评审|原因|行业|价格|注册资金|印刷服务|业绩奖项)__",
+		"(品牌|姓名|起讫桩号|服务期|建设期限|限价|邮编|面积|组织形式|发布单位|招标方式|修建宽度|类别|备注|合计|电话|评审|原因|行业|价格|注册资金|印刷服务|业绩奖项)__",
 		"[\\d]+标段$__M",
 		"(\\W{2,10}(名称|参数[及]?要求))$__M"
 	],
@@ -28,7 +28,7 @@
 		".{2,20}元整|[\\d]+万?元__",
 		".{4,}采购(项目)?__",
 		"(首选|第[一二三四五1-5])(顺序|推荐)?(承包|中标|候选|成交)?(候选)?(人|单位|供应商)__M",
-		"(招单价|无供应商报价|全部内容|计量单位|符合国家[及和]行业|二级建造师|公示信息|[甲乙丙]级)__",
+		"(招单价|无供应商报价|全部内容|择优+竞价|计量单位|符合国家[及和]行业|二级建造师|公示信息|[甲乙丙]级)__",
 		"^采购包[0-9]+$__"
 	],
 	"abandontable":[

+ 74 - 43
udpcontrol/src/updprocess.go

@@ -11,15 +11,17 @@ import (
 	"sync"
 	"time"
 )
+
 var (
-	nextNode     		[]map[string]interface{}
-	udpclient    		mu.UdpClient
-	udplock 			sync.Mutex
-	extractAction    	map[string]map[string]interface{}
-	heartAction			map[string]interface{}
-	isAction			bool
-	using_ext_node,standby_ext_node,invalid_ext_node	[]map[string]interface{}
+	nextNode                                           []map[string]interface{}
+	udpclient                                          mu.UdpClient
+	udplock                                            sync.Mutex
+	extractAction                                      map[string]map[string]interface{}
+	heartAction                                        map[string]interface{}
+	isAction                                           bool
+	using_ext_node, standby_ext_node, invalid_ext_node []map[string]interface{}
 )
+
 //udp接收
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 	switch act {
@@ -29,7 +31,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 		if err != nil {
 			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
 		} else if mapInfo != nil {
-			sid ,eid:= qu.ObjToString(mapInfo["gtid"]), qu.ObjToString(mapInfo["lteid"])
+			sid, eid := qu.ObjToString(mapInfo["gtid"]), qu.ObjToString(mapInfo["lteid"])
 			if sid == "" || eid == "" {
 				log.Debug("接收id段异常-err ", "sid=", sid, ",eid=", eid)
 			} else {
@@ -37,7 +39,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 				key := sid + "-" + eid + "-" + qu.ObjToString(mapInfo["stype"])
 				go udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
 				udplock.Lock()
-				dealWithExtUdpData(sid,eid)
+				dealWithExtUdpData(sid, eid)
 				udplock.Unlock()
 			}
 		}
@@ -46,13 +48,13 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 		udplock.Lock()
 		str := string(data)
 		if isAction {
-			if strings.Contains(str,"heart_extract") {
-				dealWithHeartBackUdpData(strings.ReplaceAll(str,"heart_extract",""))
-			}else {
+			if strings.Contains(str, "heart_extract") {
+				dealWithHeartBackUdpData(strings.ReplaceAll(str, "heart_extract", ""))
+			} else {
 				dealWithCallBackUdpData(str)
 			}
-		}else {
-			log.Debug("其他节点回应:",str)
+		} else {
+			log.Debug("其他节点回应:", str)
 			udptaskmap.Delete(str)
 		}
 		udplock.Unlock()
@@ -60,31 +62,31 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 }
 
 //处理~新接收抽取段~
-func dealWithExtUdpData(sid,eid string) {
+func dealWithExtUdpData(sid, eid string) {
 	//获取最新-抽取节点状态
 	initExtractNode()
-	log.Debug("接收当前段落,udp通知抽取-需拆分",len(using_ext_node),"组", sid, "~~", eid)
-	if len(using_ext_node)>0 {
+	log.Debug("接收当前段落,udp通知抽取-需拆分", len(using_ext_node), "组", sid, "~~", eid)
+	if len(using_ext_node) > 0 {
 		//拆分段落方法~并附加抽取状态标记~有效期等
-		splitArr,lifeArr:=splitIdMethod(sid,eid)
-		log.Debug("最终分",len(splitArr),"段")
+		splitArr, lifeArr := splitIdMethod(sid, eid)
+		log.Debug("最终分", len(splitArr), "段")
 		extractAction = map[string]map[string]interface{}{}
 		heartAction = map[string]interface{}{}
-		for k,v:=range using_ext_node{
-			skey := fmt.Sprintf("%s:%d:%s",v["addr"],qu.IntAll(v["port"]),v["stype"])
+		for k, v := range using_ext_node {
+			skey := fmt.Sprintf("%s:%d:%s", v["addr"], qu.IntAll(v["port"]), v["stype"])
 			extractAction[skey] = map[string]interface{}{
-				"life":lifeArr[k],
-				"action":0,
-				"uid":BsonTOStringId(v["_id"]),
+				"life":   lifeArr[k],
+				"action": 0,
+				"uid":    BsonTOStringId(v["_id"]),
 			}
 			heartAction[skey] = 0
 		}
 		extractAction["extract_ids"] = map[string]interface{}{
-			"sid":sid,
-			"eid":eid,
+			"sid": sid,
+			"eid": eid,
 		}
 		sendRunExtractNode(splitArr) //通知抽取
-	}else {
+	} else {
 		log.Debug("无有效机器抽取...程序停止于此...")
 	}
 }
@@ -93,20 +95,23 @@ func dealWithExtUdpData(sid,eid string) {
 func dealWithCallBackUdpData(str string) {
 	if extractAction[str] != nil {
 		extractAction[str]["action"] = 1
-		log.Debug("抽取节点回应:",str)
+		log.Debug("抽取节点回应:", str)
 		f := validExtractFinish()
 		if f {
 			sid := qu.ObjToString(extractAction["extract_ids"]["sid"])
 			eid := qu.ObjToString(extractAction["extract_ids"]["eid"])
 			isAction = false
 			lastNodeResponse = time.Now().Unix()
-			sendNextNode(sid,eid)
+			sendNextNode(sid, eid)
+			//更新id段记录状态
+
 		}
-	}else {
-		log.Debug("其他节点回应:",str)
+	} else {
+		log.Debug("其他节点回应:", str)
 		udptaskmap.Delete(str)
 	}
 }
+
 //处理-心跳回调
 func dealWithHeartBackUdpData(str string) {
 	if heartAction[str] != nil {
@@ -114,12 +119,11 @@ func dealWithHeartBackUdpData(str string) {
 	}
 }
 
-
 //通知所有节点~进行抽取~
-func sendRunExtractNode(splitArr []map[string]interface{})  {
+func sendRunExtractNode(splitArr []map[string]interface{}) {
 	for index, node := range using_ext_node {
-		tmp:=splitArr[index]
-		skey := fmt.Sprintf("%s:%d:%s",node["addr"],qu.IntAll(node["port"]),node["stype"])
+		tmp := splitArr[index]
+		skey := fmt.Sprintf("%s:%d:%s", node["addr"], qu.IntAll(node["port"]), node["stype"])
 		by, _ := json.Marshal(map[string]interface{}{
 			"gtid":  qu.ObjToString(tmp["sid"]),
 			"lteid": qu.ObjToString(tmp["eid"]),
@@ -135,7 +139,7 @@ func sendRunExtractNode(splitArr []map[string]interface{})  {
 }
 
 //通知所有抽取节点~结束抽取
-func sendStopExtractNode(splitArr []map[string]interface{})  {
+func sendStopExtractNode(splitArr []map[string]interface{}) {
 	for _, node := range using_ext_node {
 		by, _ := json.Marshal(map[string]interface{}{
 			"stype": "stop_extract",
@@ -148,7 +152,7 @@ func sendStopExtractNode(splitArr []map[string]interface{})  {
 }
 
 //发送下阶段节点~
-func sendNextNode(sid string,eid string)  {
+func sendNextNode(sid string, eid string) {
 	for _, node := range nextNode {
 		by, _ := json.Marshal(map[string]interface{}{
 			"gtid":  sid,
@@ -162,18 +166,45 @@ func sendNextNode(sid string,eid string)  {
 		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr) //发送下节点
 
 		//只监控清洗流程
-		if qu.IntAll(node["port"])==1799 {
-			node := &udpNode{by, addr, time.Now().Unix()}
-			udptaskmap.Store(string(by), node)
+		if qu.IntAll(node["port"]) == 1799 {
+			new_node := &udpNode{by, addr, time.Now().Unix()}
+			udptaskmap.Store(string(by), new_node)
 		}
 	}
-	log.Debug("udp通知抽取完成...通知下阶段udp-敏感词,补城市",sid,"~",eid)
+	log.Debug("udp通知抽取完成...通知下阶段udp-敏感词,补城市", sid, "~", eid)
+	//更新记录状态
+	updateProcessUdpIdsInfo(sid, eid)
 }
 
 //发送单节点~
-func sendSingleOtherNode(by []byte,addr string,port string )  {
+func sendSingleOtherNode(by []byte, addr string, port string) {
 	udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
 		IP:   net.ParseIP(addr),
 		Port: qu.IntAll(port),
 	})
-}
+}
+
+//更新流程记录id段落
+func updateProcessUdpIdsInfo(sid string, eid string) {
+	query := map[string]interface{}{
+		"gtid":  sid,
+		"lteid": eid,
+	}
+	log.Debug("开始更新流程段落记录~~", query)
+	data := source_mgo.FindOne("bidding_processing_ids", query)
+	if len(data) > 0 {
+		up_id := BsonTOStringId(data["_id"])
+		if up_id != "" {
+			update := map[string]interface{}{
+				"$set": map[string]interface{}{
+					"dataprocess": 3,
+					"updatetime":  time.Now().Unix(),
+				},
+			}
+			source_mgo.UpdateById("bidding_processing_ids", up_id, update)
+			log.Debug("流程段落记录~~更新完毕~", update)
+		}
+	} else {
+		log.Debug("未查询到记录id段落~", query)
+	}
+}