Sfoglia il codice sorgente

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

maxiaoshan 4 anni fa
parent
commit
3391174552

+ 35 - 35
data_quality/src/config.json

@@ -1,62 +1,62 @@
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "192.168.3.207:27092",
-    "dbName": "extract_kf",
-    "collName": "zk_move",
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "result_20200715",
     "pool": 10,
     "site": {
-      "site_dbname": "extract_kf",
+      "site_dbname": "qfw",
       "site_coll": "site"
     }
   },
+  "qy_mongodb": {
+    "qy_addrName": "172.17.4.187:27081",
+    "qy_dbName": "mixdata",
+    "qy_collName": "qyxy_std",
+    "pool": 10
+  },
   "score_standard": {
     "total_score": 100,
-    "core_max": 70,
+    "core_max": 60,
     "core_each": 10,
-    "other_max": 30,
+    "other_max": 40,
     "other_each": 5,
     "deduct_each": 5
   },
-  "qy_mongodb": {
-    "qy_addrName": "192.168.3.207:27092",
-    "qy_dbName": "extract_kf",
-    "qy_collName": "zk",
-    "pool": 10
-  },
   "core_element":[
-    {"bidamount": {"type": "float", "large": 0}},
-    {"budget": {"type": "float", "large": 0}},
-    {"projectcode": {"type": "string", "length": 5}},
-    {"contractnumber": {"type": "string", "length": 5}},
-    {"title": {"type": "string", "length": 10}},
-    {"projectname": {"type": "string", "length": 10}},
-    {"buyer": {"type": "string", "length": 10}},
-    {"winner": {"type": "string", "length": 10}}
+    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
+    {"budget": {"type": "float", "min": 0,"max":50000000000}},
+    {"projectcode": {"type": "string", "min": 4,"max": 20}},
+    {"projectname": {"type": "string", "min": 4,"max": 40}},
+    {"buyer": {"type": "string", "min":4,"max": 15}},
+    {"winner": {"type": "string", "min": 4,"max": 15}}
   ],
   "other_element": [
-    {"bidopenaddress": {"type": "string","length": 0}},
-    {"winnertel": {"type": "string","length": 0}},
-    {"winnerperson": {"type": "string","length": 0}},
-    {"winneraddr": {"type": "string","length": 0}},
-    {"agencyaddr": {"type": "string","length": 0}},
-    {"buyeraddr": {"type": "string","length": 0}},
-    {"projectaddr": {"type": "string","length": 0}},
-    {"agencytel": {"type": "string","length": 0}},
-    {"agencyperson": {"type": "string","length": 0}},
-    {"buyerperson": {"type": "string","length": 0}},
-    {"agency": {"type": "string","length": 0}},
-    {"buyertel": {"type": "string","length": 0}},
-    {"bidopentime": {"type": "int","large": 0}},
-    {"signaturedate": {"type": "int","large": 0}}
+    {"agency": {"type": "string","min": 3,"max": 15}},
+    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
+    {"winneraddr": {"type": "string","min": 3,"max": 40}},
+    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
+    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
+    {"projectaddr": {"type": "string","min": 2,"max": 40}},
+    {"agencyperson": {"type": "string","min": 0,"max": 10}},
+    {"buyerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnertel": {"type": "string","min": 4,"max": 20}},
+    {"agencytel": {"type": "string","min": 4,"max": 20}},
+    {"buyertel": {"type": "string","min": 4,"max": 20}},
+    {"bidopentime": {"type": "int","min": 946656000}},
+    {"signaturedate": {"type": "int","min": 946656000}}
   ],
   "deduct_element": [
     "area",
     "city",
     "site",
     "toptype",
-    "subtype"
+    "subtype",
+    "title"
   ],
+  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
   "jkmail": {
     "to": "zhengkun@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail"

+ 23 - 24
data_quality/src/main.go

@@ -3,12 +3,12 @@ package main
 import (
 	"encoding/json"
 	"log"
+	mu "mfw/util"
 	"net"
 	"os"
 	"qfw/common/src/qfw/util"
 	qu "qfw/util"
 	"time"
-	mu "mfw/util"
 )
 
 
@@ -23,6 +23,7 @@ var (
 	core_element,other_element	[]map[string]interface{}	//要素
 	deduct_element 	[]string
 	total_score,core_max,core_each,other_max,other_each ,deduct_each int
+	specialaddr   string
 )
 
 func initSite()  {
@@ -77,6 +78,8 @@ func initMgo()  {
 	other_max = qu.IntAll(score_standard["other_max"])
 	other_each = qu.IntAll(score_standard["other_each"])
 	deduct_each = qu.IntAll(score_standard["deduct_each"])
+
+	specialaddr = sysconfig["specialaddr"].(string)
 }
 
 
@@ -86,6 +89,7 @@ func init() {
 	initMgo()
 	initSite()//加载站点
 	log.Println("采用udp模式")
+	//utf8.RuneCountInString()
 }
 
 
@@ -111,7 +115,6 @@ func main() {
 	}
 	mapinfo["gtid"] = sid
 	mapinfo["lteid"] = eid
-	mapinfo["stop"] = "true"
 	startTask([]byte{}, mapinfo)
 	time.Sleep(99999 * time.Hour)
 
@@ -176,7 +179,6 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 		element_score,element_reason:=dealWithElementRate(tmp)
 		error_score,abnormal_score,error_reason,abnormal_reason:=dealWithErrorRate(tmp)
 		//log.Println("元素分:",element_score,"错误分:",error_score,"异常分:",abnormal_score)
-
 		updateExtract = append(updateExtract, []map[string]interface{}{
 			map[string]interface{}{
 				"_id": tmp["_id"],
@@ -210,32 +212,29 @@ func startTask(data []byte, mapInfo map[string]interface{}) {
 
 	log.Println("task quality over - 总计数量",index)
 
-	time.Sleep(60 * time.Second)
+	time.Sleep(30 * time.Second)
 
 	//任务完成,开始发送广播通知下面节点
-	if mapInfo["stop"] == nil {
-		log.Println("评分统计完成-发送udp")
-		for _, to := range nextNode {
-			sid, _ := mapInfo["gtid"].(string)
-			eid, _ := mapInfo["lteid"].(string)
-			key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
-			by, _ := json.Marshal(map[string]interface{}{
-				"gtid":  sid,
-				"lteid": eid,
-				"stype": util.ObjToString(to["stype"]),
-				"key":   key,
-			})
-			addr := &net.UDPAddr{
-				IP:   net.ParseIP(to["addr"].(string)),
-				Port: util.IntAll(to["port"]),
-			}
-			node := &udpNode{by, addr, time.Now().Unix(), 0}
-			udptaskmap.Store(key, node)
-			udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
+	log.Println("评分统计完成-发送udp")
+	for _, to := range nextNode {
+		sid, _ := mapInfo["gtid"].(string)
+		eid, _ := mapInfo["lteid"].(string)
+		key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
+		by, _ := json.Marshal(map[string]interface{}{
+			"gtid":  sid,
+			"lteid": eid,
+			"stype": util.ObjToString(to["stype"]),
+			"key":   key,
+		})
+		addr := &net.UDPAddr{
+			IP:   net.ParseIP(to["addr"].(string)),
+			Port: util.IntAll(to["port"]),
 		}
+		node := &udpNode{by, addr, time.Now().Unix(), 0}
+		udptaskmap.Store(key, node)
+		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
 	}
 
 
 
-
 }

+ 122 - 36
data_quality/src/mark

@@ -1,56 +1,97 @@
-备份数据使用
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "172.17.4.85:27080",
-    "dbName": "qfw",
-    "collName": "result_20200715",
+    "addrName": "192.168.3.207:27092",
+    "dbName": "extract_kf",
+    "collName": "zk_move",
     "pool": 10,
     "site": {
-      "site_dbname": "qfw",
+      "site_dbname": "extract_kf",
       "site_coll": "site"
     }
   },
+  "score_standard": {
+    "total_score": 100,
+    "core_max": 60,
+    "core_each": 10,
+    "other_max": 40,
+    "other_each": 5,
+    "deduct_each": 5
+  },
+  "qy_mongodb": {
+    "qy_addrName": "192.168.3.207:27092",
+    "qy_dbName": "extract_kf",
+    "qy_collName": "zk",
+    "pool": 10
+  },
   "core_element":[
-    {"bidamount": {"type": "float", "large": 0}},
-    {"budget": {"type": "float", "large": 0}},
-    {"projectcode": {"type": "string", "length": 5}},
-    {"contractnumber": {"type": "string", "length": 5}},
-    {"title": {"type": "string", "length": 10}},
-    {"projectname": {"type": "string", "length": 10}},
-    {"buyer": {"type": "string", "length": 10}},
-    {"winner": {"type": "string", "length": 10}}
+    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
+    {"budget": {"type": "float", "min": 0,"max":50000000000}},
+    {"projectcode": {"type": "string", "min": 4,"max": 20}},
+    {"projectname": {"type": "string", "min": 4,"max": 40}},
+    {"buyer": {"type": "string", "min":4,"max": 15}},
+    {"winner": {"type": "string", "min": 4,"max": 15}}
   ],
   "other_element": [
-    {"bidopenaddress": {"type": "string","length": 0}},
-    {"winnertel": {"type": "string","length": 0}},
-    {"winnerperson": {"type": "string","length": 0}},
-    {"winneraddr": {"type": "string","length": 0}},
-    {"agencyaddr": {"type": "string","length": 0}},
-    {"buyeraddr": {"type": "string","length": 0}},
-    {"projectaddr": {"type": "string","length": 0}},
-    {"agencytel": {"type": "string","length": 0}},
-    {"agencyperson": {"type": "string","length": 0}},
-    {"buyerperson": {"type": "string","length": 0}},
-    {"agency": {"type": "string","length": 0}},
-    {"buyertel": {"type": "string","length": 0}},
-    {"bidopentime": {"type": "int","large": 0}},
-    {"signaturedate": {"type": "int","large": 0}}
+    {"agency": {"type": "string","min": 3,"max": 15}},
+    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
+    {"winneraddr": {"type": "string","min": 3,"max": 40}},
+    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
+    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
+    {"projectaddr": {"type": "string","min": 2,"max": 40}},
+    {"agencyperson": {"type": "string","min": 0,"max": 10}},
+    {"buyerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnertel": {"type": "string","min": 4,"max": 20}},
+    {"agencytel": {"type": "string","min": 4,"max": 20}},
+    {"buyertel": {"type": "string","min": 4,"max": 20}},
+    {"bidopentime": {"type": "int","min": 946656000}},
+    {"signaturedate": {"type": "int","min": 946656000}}
   ],
   "deduct_element": [
     "area",
     "city",
     "site",
     "toptype",
-    "subtype"
+    "subtype",
+    "title"
   ],
-  "score_standard": {
-    "total_score": 100,
-    "core_max": 70,
-    "core_each": 10,
-    "other_max": 30,
-    "other_each": 5,
-    "deduct_each": 5
+  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
+  "jkmail": {
+    "to": "zhengkun@topnet.net.cn",
+    "api": "http://10.171.112.160:19281/_send/_mail"
+  },
+  "nextNode": [
+
+  ]
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{
+  "udpport": ":17007",
+  "mongodb": {
+    "addrName": "172.17.4.85:27080",
+    "dbName": "qfw",
+    "collName": "result_20200715",
+    "pool": 10,
+    "site": {
+      "site_dbname": "qfw",
+      "site_coll": "site"
+    }
   },
   "qy_mongodb": {
     "qy_addrName": "172.17.4.187:27081",
@@ -58,7 +99,47 @@
     "qy_collName": "qyxy_std",
     "pool": 10
   },
-
+  "score_standard": {
+    "total_score": 100,
+    "core_max": 60,
+    "core_each": 10,
+    "other_max": 40,
+    "other_each": 5,
+    "deduct_each": 5
+  },
+  "core_element":[
+    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
+    {"budget": {"type": "float", "min": 0,"max":50000000000}},
+    {"projectcode": {"type": "string", "min": 4,"max": 20}},
+    {"projectname": {"type": "string", "min": 4,"max": 40}},
+    {"buyer": {"type": "string", "min":4,"max": 15}},
+    {"winner": {"type": "string", "min": 4,"max": 15}}
+  ],
+  "other_element": [
+    {"agency": {"type": "string","min": 3,"max": 15}},
+    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
+    {"winneraddr": {"type": "string","min": 3,"max": 40}},
+    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
+    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
+    {"projectaddr": {"type": "string","min": 2,"max": 40}},
+    {"agencyperson": {"type": "string","min": 0,"max": 10}},
+    {"buyerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnerperson": {"type": "string","min": 0,"max": 10}},
+    {"winnertel": {"type": "string","min": 4,"max": 20}},
+    {"agencytel": {"type": "string","min": 4,"max": 20}},
+    {"buyertel": {"type": "string","min": 4,"max": 20}},
+    {"bidopentime": {"type": "int","min": 946656000}},
+    {"signaturedate": {"type": "int","min": 946656000}}
+  ],
+  "deduct_element": [
+    "area",
+    "city",
+    "site",
+    "toptype",
+    "subtype",
+    "title"
+  ],
+  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
   "jkmail": {
     "to": "zhengkun@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail"
@@ -77,3 +158,8 @@
 
 
 
+
+
+
+
+

+ 2 - 2
data_quality/src/scoreExpError.go

@@ -217,7 +217,7 @@ func codesAnalysis(projectcode string,contractnumber string) bool {
 			return false
 		}
 		//符合-8长度-日期格式 yyyyMMdd
-		if !regAnalysis(projectcode) {
+		if !regAnalysis(projectcode) &&len(projectcode)==8 {
 			return false
 		}
 	}
@@ -227,7 +227,7 @@ func codesAnalysis(projectcode string,contractnumber string) bool {
 			return false
 		}
 
-		if !regAnalysis(contractnumber) {
+		if !regAnalysis(contractnumber)  && len(projectcode)==8  {
 			return false
 		}
 	}

+ 117 - 48
data_quality/src/scoreLogic.go

@@ -1,19 +1,102 @@
 package main
+
 import (
 	qu "qfw/util"
+	"strings"
+	"unicode/utf8"
 )
 var element_reason map[string]interface{}
 
 func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}) {
 
-	//score_standard 打分标准
+	//score_standard 打分标准  要素打分 - 需慎重
 	element_reason = map[string]interface{}{}
 	m,n,z :=0,0,0
 	core_value,other_value,deduct_value :="","",""
 
+	//第一次-配置验证
+	coreArr,otherArr:=filterConfiguration(tmp)
+	//细节过滤-验证
+	new_coreArr,new_otherArr:=[]string{},[]string{}
+	for _,v:=range coreArr{
+		if v=="projectname"||v=="buyer"||v=="winner" {
+			if isChinese(qu.ObjToString(tmp[v])) {
+				new_coreArr = append(new_coreArr,v)
+			}
+		}else {
+			new_coreArr = append(new_coreArr,v)
+		}
+	}
+
+	for _,v:=range otherArr{
+		if v=="agencyperson"||v=="buyerperson"||v=="winnerperson" {
+			if isChinese(qu.ObjToString(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else if v=="agencytel"||v=="buyertel"||v=="winnertel" {
+			if !isChinese(qu.ObjToString(tmp[v])) {
+				if isTelephone(qu.ObjToString(tmp[v])) {
+					new_otherArr = append(new_otherArr,v)
+				}
+			}
+		}else if v=="bidopentime"||v=="signaturedate" {
+			if isTimestamp(qu.Int64All(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else if v=="bidopenaddress"||v=="winneraddr"||v=="agencyaddr"||
+			v=="buyeraddr" {
+			if isPlaceAddr(qu.ObjToString(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else if v=="projectaddr" {
+			if isChinese(qu.ObjToString(tmp[v])) {
+				new_otherArr = append(new_otherArr,v)
+			}
+		}else {
+			new_otherArr = append(new_otherArr,v)
+		}
+	}
+
+	core_value = strings.Join(new_coreArr, ",")
+	m = len(new_coreArr)
 
-	//要素打分 - 需慎重 core_element other_element deduct_element
+	other_value = strings.Join(new_otherArr, ",")
+	n = len(new_otherArr)
 
+
+
+	//扣分项
+	for _,v:=range deduct_element{
+		if qu.ObjToString(tmp[v])=="" {
+			z--
+			if deduct_value == "" {
+				deduct_value = v
+			}else {
+				deduct_value = deduct_value+","+v
+			}
+		}
+	}
+
+	total,core_s,other_s,deduct_s:=calculateScore(m,n,z)
+	return total,map[string]interface{}{
+		"coreElement":map[string]interface{}{
+			"key":core_value,
+			"core_score":core_s,
+		},
+		"otherElement":map[string]interface{}{
+			"key":other_value,
+			"other_score":other_s,
+		},
+		"deductElement":map[string]interface{}{
+			"key":deduct_value,
+			"deduct_score":deduct_s,
+		},
+	}
+}
+
+func filterConfiguration(tmp map[string]interface{})([]string,[]string)  {
+	coreArr ,otherArr:= []string{},[]string{}
+	//核心要素   int - 时间-   float-金额区间  string-字符串长度
 	for _,v:=range core_element{
 		for k1,v1:=range v{
 			if tmp[k1]==nil {
@@ -21,23 +104,25 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 			}
 			dict :=*qu.ObjToMap(v1)
 			element_type := qu.ObjToString(dict["type"])
+
 			if element_type=="int" {
-				temp_num:=qu.IntAll(dict["large"])
-				if qu.IntAll(tmp[k1])>temp_num {
-					m++
-					core_value = core_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				if qu.IntAll(tmp[k1])>min {
+					coreArr = append(coreArr,k1)
 				}
 			}else if element_type=="float" {
-				temp_num:=qu.Float64All(dict["large"])
-				if qu.Float64All(tmp[k1])>temp_num {
-					m++
-					core_value = core_value+k1+"-"
+				min:=qu.Float64All(dict["min"])
+				max:=qu.Float64All(dict["max"])
+				if qu.Float64All(tmp[k1])>min && qu.Float64All(tmp[k1])<max{
+					coreArr = append(coreArr,k1)
 				}
 			}else if element_type=="string" {
-				temp_length:=qu.IntAll(dict["length"])
-				if len(qu.ObjToString(tmp[k1]))>temp_length {
-					m++
-					core_value = core_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				max:=qu.IntAll(dict["max"])
+
+				if utf8.RuneCountInString(qu.ObjToString(tmp[k1]))>min &&
+					utf8.RuneCountInString(qu.ObjToString(tmp[k1]))<max{
+					coreArr = append(coreArr,k1)
 				}
 			}else {
 
@@ -45,6 +130,7 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 		}
 	}
 
+
 	for _,v:=range other_element{
 		for k1,v1:=range v{
 			if tmp[k1]==nil {
@@ -52,23 +138,25 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 			}
 			dict :=*qu.ObjToMap(v1)
 			element_type := qu.ObjToString(dict["type"])
+
 			if element_type=="int" {
-				temp_num:=qu.IntAll(dict["large"])
-				if qu.IntAll(tmp[k1])>temp_num {
-					n++
-					other_value = other_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				if qu.IntAll(tmp[k1])>min {
+					otherArr = append(otherArr,k1)
 				}
 			}else if element_type=="float" {
-				temp_num:=qu.Float64All(dict["large"])
-				if qu.Float64All(tmp[k1])>temp_num {
-					n++
-					other_value = other_value+k1+"-"
+				min:=qu.Float64All(dict["min"])
+				max:=qu.Float64All(dict["max"])
+				if qu.Float64All(tmp[k1])>min && qu.Float64All(tmp[k1])<max{
+					otherArr = append(otherArr,k1)
 				}
 			}else if element_type=="string" {
-				temp_length:=qu.IntAll(dict["length"])
-				if len(qu.ObjToString(tmp[k1]))>temp_length {
-					n++
-					other_value = other_value+k1+"-"
+				min:=qu.IntAll(dict["min"])
+				max:=qu.IntAll(dict["max"])
+
+				if utf8.RuneCountInString(qu.ObjToString(tmp[k1]))>min &&
+					utf8.RuneCountInString(qu.ObjToString(tmp[k1]))<max{
+					otherArr = append(otherArr,k1)
 				}
 			}else {
 
@@ -76,30 +164,11 @@ func dealWithElementRate(tmp map[string]interface{}) (int,map[string]interface{}
 		}
 	}
 
+	return coreArr,otherArr
+}
+
 
-	for _,v:=range deduct_element{
-		if qu.ObjToString(tmp[v])=="" {
-			z--
-			deduct_value = deduct_value+v+"-"
-		}
-	}
 
-	total,core_s,other_s,deduct_s:=calculateScore(m,n,z)
-	return total,map[string]interface{}{
-		"coreElement":map[string]interface{}{
-			"key":core_value,
-			"core_score":core_s,
-		},
-		"otherElement":map[string]interface{}{
-			"key":other_value,
-			"other_score":other_s,
-		},
-		"deductElement":map[string]interface{}{
-			"key":deduct_value,
-			"deduct_score":deduct_s,
-		},
-	}
-}
 
 func calculateScore(core_num int,other_num int,deduct_num int) (int,int,int,int)  {
 

+ 73 - 0
data_quality/src/scoreMethod.go

@@ -0,0 +1,73 @@
+package main
+
+import (
+	"regexp"
+	"time"
+	"unicode"
+)
+
+func isPlaceAddr(str string) bool {
+
+	if isTelephone(str) {
+		return false
+	}
+	regx,_ := regexp.Compile(specialaddr)
+	result:=regx.FindString(str)
+	if result !="" {
+		return true
+	}
+	return false
+}
+
+
+
+func isTimestamp(i int64) bool {
+	now:=time.Now().Unix()
+	if i<now+86400*180 {
+		return true
+	}
+	return false
+}
+
+func isTelephone(str string) bool {
+
+	reg1:=`^1[3|4|5|6|7|8|9][0-9]\d{8}$`
+	regx1,_ := regexp.Compile(reg1)
+
+	arr1:=regx1.FindAllString(str,-1)
+	if len(arr1)>0 {
+		return true
+	}
+
+	reg2:=`^(\d{2,4}-)?\d{7,8}$`
+	regx2,_ := regexp.Compile(reg2)
+	arr2:=regx2.FindAllString(str,-1)
+	if len(arr2)>0 {
+		return true
+	}
+
+	return false
+}
+
+
+
+
+func isChinese(str string) bool {
+	var count int
+	for _, v := range str {
+		if unicode.Is(unicode.Han, v) {
+			count++
+			break
+		}
+	}
+	return count > 0
+}
+
+func isChineseChar(str string) bool {
+	for _, r := range str {
+		if unicode.Is(unicode.Scripts["Han"], r) || (regexp.MustCompile("[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]").MatchString(string(r))) {
+			return true
+		}
+	}
+	return false
+}

+ 3 - 2
fullproject/src_v1/config.json

@@ -8,6 +8,7 @@
 	"hints":"publishtime_1",
     "extractColl": "jh_info",
     "projectColl": "jh_project",
+    "updateColl": "jh_info",
     "backupFlag": false,
     "siteColl": "site",
     "thread": 1,
@@ -16,8 +17,8 @@
         "api": "http://10.171.112.160:19281/_send/_mail"
     },
     "es": {
-        "addr": "http://192.168.3.128:9800",
-        "index": "projectset_v3",
+        "addr": "http://127.0.0.1:9800",
+        "index": "projectset",
         "itype": "projectset",
         "pool": 10
     },

+ 2 - 0
fullproject/src_v1/init.go

@@ -21,6 +21,7 @@ var (
 	Sysconfig                                      map[string]interface{} //读取配置文件
 	MongoTool                                      *MongodbSim            //mongodb连接
 	ExtractColl, ProjectColl, BackupColl, SiteColl string                 //抽取表、项目表、项目快照表、站点表
+	UpdateColl									   string				  // 金额修改数据表
 	Thread                                         int                    //配置项线程数
 	//NextNode                 []interface{}
 )
@@ -63,6 +64,7 @@ func init() {
 
 	ExtractColl = Sysconfig["extractColl"].(string)
 	ProjectColl = Sysconfig["projectColl"].(string)
+	UpdateColl = Sysconfig["updateColl"].(string)
 	BackupColl = Sysconfig["projectColl"].(string) + "_back"
 	SiteColl = Sysconfig["siteColl"].(string)
 	Thread = util.IntAll(Sysconfig["thread"])

+ 14 - 4
fullproject/src_v1/main.go

@@ -91,8 +91,8 @@ func main() {
 
 //测试组人员使用
 func mainT() {
-	sid = "56388138af53745d9a000001"
-	eid = "5b671f32a5cb26b9b76ddbb6"
+	sid = "5e69e7cc85a9271abf1bdb0a"
+	eid = "5e993c6185a9271abf2f51b5"
 	//flag.StringVar(&sid, "sid", "", "开始id")
 	//flag.StringVar(&eid, "eid", "", "结束id")
 	//flag.Parse()
@@ -104,7 +104,7 @@ func mainT() {
 	}
 	mapinfo["gtid"] = sid
 	mapinfo["lteid"] = eid
-	mapinfo["stype"] = "ql"
+	mapinfo["stype"] = "updateMoneyMgo"
 	mapinfo["ip"] = "127.0.0.1"
 	mapinfo["port"] = Sysconfig["udpport"]
 	if Sysconfig["loadStart"] != nil {
@@ -116,7 +116,8 @@ func mainT() {
 	P_QL.loadSite()
 	P_QL.currentType = mapinfo["stype"].(string)
 	P_QL.pici = time.Now().Unix()
-	P_QL.taskQl(mapinfo)
+	//P_QL.taskQl(mapinfo)
+	P_QL.taskQuery()
 	time.Sleep(20 * time.Second)
 }
 
@@ -197,6 +198,15 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					P_QL.pici = time.Now().Unix()
 					P_QL.taskUpdateMoney(mapInfo)
 				}()
+			case "updateMoneyMgo": //修改金额
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					P_QL.currentType = tasktype
+					P_QL.pici = time.Now().Unix()
+					P_QL.taskQuery()
+				}()
 			case "history": //历史数据合并,暂时不写
 				go func() {
 					defer func() {

+ 3 - 3
fullproject/src_v1/project.go

@@ -1143,7 +1143,7 @@ func CountAmount(project *ProjectInfo, info *Info, tmp map[string]interface{}) {
 				}
 			}
 		} else {
-			if project.Budget < info.Budget {
+			if project.Budget > info.Budget {
 				project.Budget = info.Budget
 				project.Budgettag = 0
 			}
@@ -1200,7 +1200,7 @@ func CountAmount(project *ProjectInfo, info *Info, tmp map[string]interface{}) {
 				}
 			} else {
 				if info.SubType == "中标" || info.SubType == "成交" {
-					if project.Bidamount < info.Bidamount {
+					if project.Bidamount > info.Bidamount {
 						project.Bidamount = info.Bidamount
 						project.Bidamounttag = 0
 					} else {
@@ -1220,7 +1220,7 @@ func CountAmount(project *ProjectInfo, info *Info, tmp map[string]interface{}) {
 								project.Bidamount = project.Bidamount + info.Bidamount
 								project.Bidamounttag = 0
 							} else {
-								if project.Bidamount < info.Bidamount {
+								if project.Bidamount > info.Bidamount {
 									project.Bidamount = info.Bidamount
 									project.Bidamounttag = 0
 								}

+ 106 - 98
fullproject/src_v1/task.go

@@ -8,7 +8,6 @@ import (
 	mu "mfw/util"
 	"qfw/util"
 	"regexp"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -103,12 +102,10 @@ var sp = make(chan bool, 5)
 
 //初始化全量合并对象
 func init() {
-	util.Debug("task init...")
 	P_QL = NewPT()
 	log.Println(len(P_QL.updatePool))
 	go P_QL.updateAllQueue()
 	go P_QL.clearMem()
-	util.Debug("task init end")
 }
 
 func (p *ProjectTask) updateAllQueue() {
@@ -341,110 +338,118 @@ func (p *ProjectTask) taskUpdateInfo(udpInfo map[string]interface{}) {
 	p.enter(db, coll, q)
 }
 
+func (p *ProjectTask) taskQuery() {
+	defer util.Catch()
+	count := 0
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	fields := map[string]interface{} {"budget": 1, "bidamount": 1, "package": 1}
+	ms := sess.DB(MongoTool.DbName).C(UpdateColl).Find(map[string]interface{}{}).Select(fields)
+	query := ms.Iter()
+L:
+	for {
+		tmp := make(map[string]interface{})
+		if query.Next(&tmp) {
+			lastid := tmp["_id"]
+			tmp["id"] = tmp["_id"].(primitive.ObjectID).Hex();
+			if count%1000 == 0 {
+				log.Println("current modify", count, lastid)
+			}
+			p.taskUpdateMoney(tmp)
+			count++
+		} else {
+			break L
+		}
+	}
+}
+
 //修改公告信息的预算/中标金额
 func (p *ProjectTask) taskUpdateMoney(udpInfo map[string]interface{}) {
 	defer util.Catch()
 	id := udpInfo["id"].(string)
-	budget := util.ObjToString(udpInfo["budget"])
-	bidamount := util.ObjToString(udpInfo["bidamount"])
-	if budget == "" && bidamount == "" {
-		util.Debug("")
-		return
-	}
+	budget := util.Float64All(udpInfo["budget"])
+	bidamount := util.Float64All(udpInfo["bidamount"])
 
 	client := Es.GetEsConn()
 	defer Es.DestoryEsConn(client)
 	esquery := `{"query": {"bool": {"must": [{"term": {"list.infoid": "`+id+`"}}]}}}`
 	data := Es.Get(Index, Itype, esquery)
-	util.Debug(*data)
-	if data != nil {
+	if len(*data) > 0 {
 		pid := util.ObjToString((*data)[0]["_id"])
 		pro := MongoTool.FindById(ProjectColl, pid)
+		if len(pro) == 0 {
+			util.Debug("未找到项目, pid=", pid)
+			return
+		}
 		var info *map[string]interface{}
 		for _, v := range []interface{}(pro["list"].(primitive.A)){
 			v1 := v.(map[string]interface{})
 			if util.ObjToString(v1["infoid"]) == id {
 				info = util.ObjToMap(v)
 				infoField := util.ObjToMap(pro["infofield"])
-				if budget != "" {
-					if budget != "del" {
-						newBudget, _ := strconv.ParseFloat(budget, 64)
-						if pro["budget"] == (*info)["budget"] {
-							pro["budget"] = newBudget
-						}
-						if util.IntAll(pro["multipackage"]) == 1 {
-							if packages, ok := pro["package"].(map[string]interface{}); ok {
-							M :
-									for _, v := range packages{
-										v1 := []interface{}(v.(primitive.A))
-										for _, v2 := range v1{
-											v3 := v2.(map[string]interface{})
-											if util.ObjToString(v3["infoid"]) == id {
-												if v3["budget"] != nil {
-													v3["budget"] = newBudget
-												}
-											}else {
-												break M
-											}
+				if udpInfo["budget"] != nil{
+					util.Debug("update-------", (*info)["infoid"])
+					//if pro["budget"] == (*info)["budget"] {
+					//	pro["budget"] = budget
+					//}
+					//多包中的金额
+					if util.IntAll(pro["multipackage"]) == 1 {
+						if packages, ok := pro["package"].(map[string]interface{}); ok {
+						M :
+							for k, v := range packages{
+								v1 := []interface{}(v.(primitive.A))
+								for _, v2 := range v1{
+									v3 := v2.(map[string]interface{})
+									if util.ObjToString(v3["infoid"]) == id {
+										if v3["budget"] != nil {
+											pkg := udpInfo["package"].(map[string]interface{})
+											tmp := pkg[k].(map[string]interface{})
+											v3["budget"] = tmp["budget"]
 										}
+									}else {
+										break M
 									}
+								}
 							}
 						}
-						if pro["sortprice"] == (*info)["budget"] {
-							pro["sortprice"] = newBudget
-						}
-						(*info)["budget"] = newBudget
-						(*util.ObjToMap((*infoField)[id]))["budget"] = newBudget
-					}else {
-						delete(*info, "budget")
-						delete(*util.ObjToMap((*infoField)[id]), "budget")
-						if pro["budget"] == (*info)["budget"] {
-							money := FindMoney("budget", pro)
-							if money >= 0 {
-								pro["budget"] = money
-							}
-						}
 					}
+					(*info)["budget"] = budget
+					(*util.ObjToMap((*infoField)[id]))["budget"] = budget
+					if pro["sortprice"] == (*info)["budget"] {
+						pro["sortprice"] = budget
+					}
+				}else {
+					delete(*info, "budget")
 				}
-				if bidamount != "" {
-					if bidamount != "del" {
-						newBidamount, _ := strconv.ParseFloat(bidamount, 64)
-						if pro["bidamount"] == (*info)["bidamount"] {
-							pro["bidamount"] = newBidamount
-						}
-						if util.IntAll(pro["multipackage"]) == 1 {
-							if packages, ok := pro["package"].(map[string]interface{}); ok {
-							N :
-								for _, v := range packages{
-									v1 := []interface{}(v.(primitive.A))
-									for _, v2 := range v1{
-										v3 := v2.(map[string]interface{})
-										if util.ObjToString(v3["infoid"]) == id {
-											if v3["bidamount"] != nil {
-												v3["bidamount"] = newBidamount
-											}
-										}else {
-											break N
+				if udpInfo["bidamount"] != nil{
+					//if pro["bidamount"] == (*info)["bidamount"] {
+					//	pro["bidamount"] = bidamount
+					//}
+					v1["bidamount"] = bidamount
+					if util.IntAll(pro["multipackage"]) == 1 {
+						if packages, ok := pro["package"].(map[string]interface{}); ok {
+							for k, v := range packages{
+								v1 := []interface{}(v.(primitive.A))
+								for _, v2 := range v1{
+									v3 := v2.(map[string]interface{})
+									if util.ObjToString(v3["infoid"]) == id {
+										if v3["bidamount"] != nil {
+											pkg := udpInfo["package"].(map[string]interface{})
+											tmp := pkg[k].(map[string]interface{})
+											v3["bidamount"] = tmp["bidamount"]
 										}
 									}
 								}
 							}
 						}
-						if pro["sortprice"] == (*info)["bidamount"] {
-							pro["sortprice"] = newBidamount
-						}
-						(*info)["bidamount"] = newBidamount
-						(*util.ObjToMap((*infoField)[id]))["bidamount"] = newBidamount
-					}else {
-						delete((*info), "bidamount")
-						delete((*util.ObjToMap((*infoField)[id])), "bidamount")
-						if pro["bidamount"] == (*info)["bidamount"] {
-							money := FindMoney("bidamount", pro)
-							if money >= 0 {
-								pro["bidamount"] = money
-							}
-						}
 					}
+					(*info)["bidamount"] = bidamount
+					(*util.ObjToMap((*infoField)[id]))["bidamount"] = bidamount
+					if pro["sortprice"] == (*info)["bidamount"] {
+						pro["sortprice"] = bidamount
+					}
+				}else {
+					delete(*info, "bidamount")
 				}
 				break
 			}
@@ -455,17 +460,22 @@ func (p *ProjectTask) taskUpdateMoney(udpInfo map[string]interface{}) {
 		_ = json.Unmarshal(bys, &project)
 		bys1, _ := json.Marshal(info)
 		_ = json.Unmarshal(bys1, &pInfo)
-		CountAmount(project, pInfo, *info)
-
-		if project.Budget > 0 {
-			util.Debug(project.Budget)
-			pro["budget"] = project.Budget
-			pro["budgettag"] = 0
-		}
-		if project.Bidamount > 0 {
-			util.Debug(project.Bidamount)
-			pro["bidamount"] = project.Bidamount
-			pro["bidamounttag"] = 0
+		if len(project.Ids) > 1 {
+			CountAmount(project, pInfo, *info)
+			if project.Budget > 0 {
+				pro["budget"] = project.Budget
+			}
+			if project.Bidamount > 0 {
+				pro["bidamount"] = project.Bidamount
+			}
+		}else {
+			pro["budget"] = budget
+			pro["bidamount"] = bidamount
+			if budget > bidamount {
+				pro["sortprice"] = budget
+			}else {
+				pro["sortprice"] = bidamount
+			}
 		}
 		set := map[string]interface{}{
 			"$set": pro,
@@ -492,6 +502,7 @@ func (p *ProjectTask) taskUpdateMoney(udpInfo map[string]interface{}) {
 					}},
 				"stype": "project",
 			})
+			util.Debug(string(by))
 			_ = udpclient.WriteUdp(by, mu.OP_TYPE_DATA, toaddr[1])
 		}
 	}
@@ -525,14 +536,12 @@ func nextNode(mapInfo map[string]interface{}, pici int64) {
 	mapInfo["query"] = map[string]interface{}{
 		"pici": pici,
 	}
-	for n, to := range toaddr {
-		key := fmt.Sprintf("%d-%s-%d", pici, "project", n)
-		mapInfo["key"] = key
-		datas, _ := json.Marshal(mapInfo)
-		node := &udpNode{datas, to, time.Now().Unix(), 0}
-		udptaskmap.Store(key, node)
-		_ = udpclient.WriteUdp(datas, mu.OP_TYPE_DATA, to)
-	}
+	key := fmt.Sprintf("%d-%s-%d", pici, "project", 0)
+	mapInfo["key"] = key
+	datas, _ := json.Marshal(mapInfo)
+	node := &udpNode{datas, toaddr[0], time.Now().Unix(), 0}
+	udptaskmap.Store(key, node)
+	_ = udpclient.WriteUdp(datas, mu.OP_TYPE_DATA, toaddr[0])
 }
 
 func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
@@ -583,7 +592,6 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 				break L
 			}
 		}
-
 	}()
 	fields := map[string]interface{} {"area": 1, "city": 1, "district": 1, "comeintime": 1, "publishtime": 1, "bidopentime": 1, "title": 1, "projectname": 1, "href": 1,
 		"projectcode": 1, "buyerclass": 1, "winner": 1, "s_winner": 1, "buyer": 1, "buyerperson": 1, "buyertel": 1, "infoformat": 1, "toptype": 1, "subtype": 1, "spidercode": 1,

+ 11 - 3
src/jy/clear/tonumber.go

@@ -383,10 +383,18 @@ func ClearMaxAmount(data []interface{}, spidercode ...string) []interface{} {
 		if sp, ok := moneyClearSpidercode[spidercode[0]]; ok {
 			maxmoney := util.Float64All(sp.(map[string]interface{})["maxmoney"])
 			divisor := util.Float64All(sp.(map[string]interface{})["divisor"])
-			if value >= maxmoney {
-				value /= divisor
-				data[0] = value
+			invalid := util.ObjToString(sp.(map[string]interface{})["invalid"])
+			if len(invalid) > 0 { //无效值
+				data[len(data)-1] = false
 			}
+			isclear := spidercode[1]
+			if isclear == "T" {
+				if value >= maxmoney {
+					value /= divisor
+					data[0] = value
+				}
+			}
+			
 		}
 	}
 	if value >= 500000000000 {

+ 1 - 1
src/jy/cluster/ssh.go

@@ -49,7 +49,7 @@ var sshstr = `
 cd /opt
 kill -9 $(pidof extract_v3)
 rm -rf extract_v3*
-wget http://10.171.112.160:9090/res/extract_v3.tgz
+wget http://172.17.4.189:9080/res/extract_v3.tgz
 tar -xzvf extract_v3.tgz
 cd /opt/extract_v3
 chmod 777 extract_v3

+ 35 - 13
src/jy/extract/extpackage.go

@@ -11,20 +11,20 @@ import (
 	"sort"
 )
 
-func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string) {
+func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string, isclearMoney string) {
 
 	if pkg.ColonKV != nil {
-		kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite)
+		kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite, isclearMoney)
 	}
 	if pkg.TableKV != nil {
-		kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite)
+		kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite, isclearMoney)
 	}
 	if pkg.SpaceKV != nil {
-		kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite)
+		kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite, isclearMoney)
 	}
 }
 
-func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string) {
+func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string, isclearMoney string) {
 	if p != nil {
 		for pk, pv2 := range p.KvTags {
 			if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
@@ -56,7 +56,7 @@ func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{},
 					lock.Lock()
 					cfn := e.ClearFn["budget"]
 					lock.Unlock()
-					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
+					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""}, codeSite, isclearMoney)
 					if data[0] == 0 {
 						if istrue, ok := data[len(data)-1].(bool); istrue && ok {
 							(*sonJobResult)["budget"] = data[0]
@@ -68,7 +68,7 @@ func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{},
 					lock.Lock()
 					cfn := e.ClearFn["bidamount"]
 					lock.Unlock()
-					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
+					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""}, codeSite, isclearMoney)
 					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
 						(*sonJobResult)["bidamount"] = data[0]
 					}
@@ -100,6 +100,28 @@ var winnerorderNotReg = regexp.MustCompile(`(附件|否决原因|候选|招标
 func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 	qu.Try(func() {
 		if len(j.BlockPackage) > 0 {
+			for kk, vv := range j.BlockPackage {
+				if vv.Bidamount > 0 {
+					lock.Lock()
+					cfn := e.ClearFn["bidamount"]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{vv.Bidamount, ""}, codeSite, j.IsClearnMoney)
+					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
+						vv.Bidamount = qu.Float64All(data[0])
+						j.BlockPackage[kk] = vv
+					}
+				}
+				if vv.Budget > 0 {
+					lock.Lock()
+					cfn := e.ClearFn["budget"]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{vv.Budget, ""}, codeSite, j.IsClearnMoney)
+					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
+						vv.Budget = qu.Float64All(data[0])
+						j.BlockPackage[kk] = vv
+					}
+				}
+			}
 			for _, ev := range e.PkgRuleCores {
 				for _, eve := range ev.RuleCores {
 					if !eve.IsLua {
@@ -170,7 +192,7 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 							sonJobResult["winnerorder"] = ordertmp[ordertmpint[tmpindex]]
 							if sonJobResult["bidamount"] == nil || sonJobResult["bidamount"].(float64) <= 0 {
 								if ordertmp[ordertmpint[tmpindex]][0]["price"] != nil {
-									moneys := clear.ObjToMoney([]interface{}{ordertmp[ordertmpint[tmpindex]][0]["price"], ""})
+									moneys := clear.ObjToMoney([]interface{}{ordertmp[ordertmpint[tmpindex]][0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
 									if len(moneys) > 0 && moneys[len(moneys)-1].(bool) {
 										if vf, ok := moneys[0].(float64); ok {
 											sonJobResult["bidamount"] = vf
@@ -197,15 +219,15 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 							sonJobResult["winner"] = pkg.Winner
 						}
 					}
-					pkvdata(pkg, &sonJobResult, e, isSite, codeSite)
+					pkvdata(pkg, &sonJobResult, e, isSite, codeSite, j.IsClearnMoney)
 
 					sonJobResult["type"] = pkg.Type
 					if len(tmpkeys) == 1 {
 						if qu.Float64All(sonJobResult["budget"]) == 0 && pkg.IsTrueBudget {
 							for _, bv := range j.Block {
-								kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite)
-								kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite)
-								kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite)
+								kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
+								kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
+								kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
 							}
 						}
 					}
@@ -319,7 +341,7 @@ func extRegBackPack(j *ju.Job, e *ExtractTask) {
 				lock.Lock()
 				cfn := e.ClearFn[key]
 				lock.Unlock()
-				data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
+				data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
 				pack[key] = data[0]
 			}
 		}

+ 92 - 50
src/jy/extract/extract.go

@@ -62,13 +62,13 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitTag(true)
 	ext.InitClearFn(false)
 	ext.InitClearFn(true)
-	//if ext.IsExtractCity { //版本上控制是否开始城市抽取
-	//	//初始化城市DFA信息
-	//	ext.InitCityInfo()
-	//	//ext.InitCityDFA()
-	//	ext.InitAreaCode()
-	//	ext.InitPostCode()
-	//}
+	if ext.IsExtractCity { //版本上控制是否开始城市抽取
+		//初始化城市DFA信息
+		ext.InitCityInfo()
+		//ext.InitCityDFA()
+		ext.InitAreaCode()
+		ext.InitPostCode()
+	}
 	//质量审核
 	ext.InitAuditFields()
 	ext.InitAuditRule()
@@ -102,7 +102,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			//if qu.ObjToString(v["sensitive"]) != ""||ggtest.MatchString(qu.ObjToString(v[""])) { //去除含敏感词数据
 			//	continue
 			//}
-			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl"{ //临时
+			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时
 				continue
 			}
 			var j, jf *ju.Job
@@ -216,8 +216,7 @@ func RunExtractTask(taskId string) {
 			//}
 			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl"{ //临时
 				continue
-			}
-			//根据标题判断是否抽取
+			}			//根据标题判断是否抽取
 			b := IsExtract("title", qu.ObjToString(v["title"]), "")
 			if !b {
 				continue
@@ -253,7 +252,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
 	return (&ExtractTask{}).PreInfo(doc)
 }
 
-var clearnn *regexp.Regexp = regexp.MustCompile("名[\\s]+称")
+var clearMoneyReg *regexp.Regexp = regexp.MustCompile("(PPP[\\s]?项目|新城镇建设|国土资源局|基金管理|高速公路|水系生态治理|水生态建设|棚改旧改|棚户区改造|棚改项目|危房改造项目|土地整理|高速公路项目)")
 
 //信息预处理-和版本关联
 func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
@@ -278,6 +277,11 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	detail = ju.CutLableStr(d3 + "\n" + detail)
 	detail = cut.ClearHtml(d3 + "\n" + detail)
 	doc["detail"] = detail
+	isClearnMoney := !clearMoneyReg.MatchString(detail)
+	if isClearnMoney {
+		isClearnMoney = !clearMoneyReg.MatchString(qu.ObjToString(doc["title"]))
+	}
+	isClearnMoneystr := qu.ObjToString(qu.If(isClearnMoney, "T", ""))
 	if isextFile {
 		file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
 	}
@@ -337,18 +341,16 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		Site:           qu.ObjToString(doc["site"]),
 		//Domain:     qu.ObjToString(doc["domain"]),
 		//Href:       qu.ObjToString(doc["href"]),
-		Title:     qu.ObjToString(doc["title"]),
-		Data:      &doc,
-		City:      qu.ObjToString(doc["city"]),
-		Province:  qu.ObjToString(doc["area"]),
-		Jsondata:  toMap,
-		Result:    map[string][]*ju.ExtField{},
-		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
-		RuleBlock: e.RuleBlock,
-		Dataging:  qu.IntAll(doc["dataging"]),
-	}
-	if (j.Jsondata != nil || (*j.Jsondata) != nil) && (*j.Jsondata)["jsoncontent"] != nil {
-		delete((*j.Jsondata), "jsoncontent")
+		Title:         qu.ObjToString(doc["title"]),
+		Data:          &doc,
+		City:          qu.ObjToString(doc["city"]),
+		Province:      qu.ObjToString(doc["area"]),
+		Jsondata:      toMap,
+		Result:        map[string][]*ju.ExtField{},
+		BuyerAddr:     qu.ObjToString(doc["buyeraddr"]),
+		RuleBlock:     e.RuleBlock,
+		Dataging:      qu.IntAll(doc["dataging"]),
+		IsClearnMoney: isClearnMoneystr,
 	}
 	if isextFile {
 		jf = &ju.Job{
@@ -368,9 +370,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 			RuleBlock:      e.RuleBlock,
 			IsFile:         isextFile,
 			Dataging:       qu.IntAll(doc["dataging"]),
-		}
-		if (jf.Jsondata != nil || (*jf.Jsondata) != nil) && (*jf.Jsondata)["jsoncontent"] != nil {
-			delete((*jf.Jsondata), "jsoncontent")
+			IsClearnMoney:  isClearnMoneystr,
 		}
 	}
 	codeSite := j.SpiderCode
@@ -438,7 +438,6 @@ func file2text(doc *map[string]interface{}) {
 
 //抽取
 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
-	
 	e.ExtractDetail(j, isSite, j.SpiderCode)
 	if jf != nil && jf.IsFile {
 		e.ExtractFile(jf, isSite, j.SpiderCode)
@@ -613,7 +612,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 				if len(cfn) == 0 {
 					continue
 				}
-				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content}, j.SpiderCode)
+				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content}, j.SpiderCode, j.IsClearnMoney)
 				if key == "budget" || key == "bidamount" {
 					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
 						j.Result[key][i].IsTrue = true
@@ -711,7 +710,10 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
 				lockclear.Lock()
 				cfn := e.ClearFn[key]
 				lockclear.Unlock()
-				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content}, j.SpiderCode)
+				if len(cfn) == 0 {
+					continue
+				}
+				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content}, j.SpiderCode, j.IsClearnMoney)
 				v.Value = data[0]
 				//清理特殊符号
 				lockclear.Lock()
@@ -841,6 +843,18 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 				if isSite {
 					field.Score = 1
 				}
+				if (field.Field == "bidamount" || field.Field == "budget") && field.Type == "table" {
+					moneys := clear.ObjToMoney([]interface{}{field.Value, ""}, j.SpiderCode, j.IsClearnMoney)
+					if len(moneys) > 0 {
+						if vf, ok := moneys[0].(float64); ok {
+							field.Value = vf
+							field.IsTrue = moneys[len(moneys)-1].(bool)
+						} else if vi, ok := moneys[0].(int); ok {
+							field.Value = float64(vi)
+							field.IsTrue = moneys[len(moneys)-1].(bool)
+						}
+					}
+				}
 				if tmp["blocktag"] != nil {
 					btag := make(map[string]string)
 					for k := range tmp["blocktag"].(map[string]bool) {
@@ -1024,7 +1038,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 								lock.Lock()
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
-								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode)
+								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
 								if data[len(data)-1].(bool) {
 									j.BlockPackage[k].Budget = qu.Float64All(data[0])
 									j.BlockPackage[k].IsTrueBudget = true
@@ -1034,7 +1048,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 								lock.Lock()
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
-								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode)
+								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
 								if data[len(data)-1].(bool) {
 									j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
 									j.BlockPackage[k].IsTrueBidamount = true
@@ -1096,7 +1110,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 						lock.Lock()
 						cfn := e.ClearFn[in.Field]
 						lock.Unlock()
-						data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode)
+						data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
 						if data[len(data)-1].(bool) {
 							j.BlockPackage[k].Budget = qu.Float64All(data[0])
 							j.BlockPackage[k].IsTrueBudget = true
@@ -1107,7 +1121,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 						lock.Lock()
 						cfn := e.ClearFn[in.Field]
 						lock.Unlock()
-						data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode)
+						data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
 						if data[len(data)-1].(bool) {
 							j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
 							j.BlockPackage[k].IsTrueBidamount = true
@@ -1674,13 +1688,38 @@ var clearWinnerReg = regexp.MustCompile("名称|施工|拟定供应商名称|:
 //分析抽取结果并保存
 func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 	qu.Try(func() {
+		if (j.Category == "招标" || j.Category == "预告") && (len(j.BlockPackage) > 0 || len(j.PackageInfo) > 0 || len(j.Result) > 0) {
+			if j.CategorySecond != "单一" {
+				delete(j.Result, "winner")
+				delete(j.Result, "bidamount")
+				for _, v := range j.BlockPackage {
+					v.Bidamount = 0
+					v.IsTrueBidamount = false
+					if v.Winner != "" {
+						v.Winner = ""
+						if v.SpaceKV != nil {
+							delete(v.SpaceKV.KvTags, "中标单位")
+						}
+						if v.TableKV != nil {
+							delete(v.TableKV.KvTags, "中标单位")
+						}
+						if v.ColonKV != nil {
+							delete(v.ColonKV.KvTags, "中标单位")
+						}
+					}
+				}
+				for _, v := range j.PackageInfo {
+					delete(v, "winner")
+					delete(v, "bidamount")
+				}
+			}
+		}
 		//重新取出清理过后的中标候选人
 		resetWinnerorder(j)
 		doc, result, _id := funcAnalysis(j, e)
 		if ju.IsSaveTag {
 			go otherNeedSave(j, result, e)
 		}
-		auxinfo := auxInfo(j)
 		//从排序结果中取值
 		tmp := map[string]interface{}{} //抽取值
 		tmp["spidercode"] = j.SpiderCode
@@ -1689,7 +1728,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		for _, val := range result {
 			for _, v := range val { //取第一个非负数,项目名称除外
 				//存0是否有效
-				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue{
+				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue {
 					tmp[v.Field] = v.Value
 					break
 				}
@@ -1819,7 +1858,8 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if ju.QualityAudit {
 			e.QualityAudit(tmp)
 		}
-		if e.IsExtractCity { //城市抽取
+		//城市抽取
+		if e.IsExtractCity {
 			//e.ExtractCity(j, tmp, _id)
 			e.NewExtractCity(j, &tmp, _id)
 		}
@@ -1963,17 +2003,18 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 		} else { //测试结果
 			delete(tmp, "_id")
+			//auxinfo := auxInfo(j)
+			//auxinfof := auxInfo(jf)
+			//if len(auxinfo) > 0 {
+			//	tmp["fieldall"] = auxinfo
+			//}
+			//if len(auxinfof) > 0 {
+			//	tmp["fieldallf"] = auxinfof
+			//}
+			//if len(ffield) > 0 {
+			//	tmp["ffield"] = ffield
+			//}
 			delete(tmp, "fieldall")
-			auxinfof := auxInfo(jf)
-			if len(auxinfo) > 0 {
-				tmp["fieldall"] = auxinfo
-			}
-			if len(auxinfof) > 0 {
-				tmp["fieldallf"] = auxinfof
-			}
-			if len(ffield) > 0 {
-				tmp["ffield"] = ffield
-			}
 			if len(j.BlockPackage) > 0 { //分包详情
 				if len(j.BlockPackage) > 10 {
 					tmp["epackage"] = "分包异常"
@@ -1999,7 +2040,7 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	delete(tmp, "detail")
 	if _, ok := tmp["bidamount"].(string); ok {
 		delete(tmp, "bidamount")
-	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && fb/100 > qu.Float64All(tmp["budget"]) {
+	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && fb/100 > qu.Float64All(tmp["budget"]){
 		delete(tmp, "bidamount")
 	}
 	if _, ok := tmp["budget"].(string); ok {
@@ -2138,7 +2179,7 @@ func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[strin
 						if len(cfn) == 0 {
 							continue
 						}
-						newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[jdkey], ""}, j.SpiderCode)
+						newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[jdkey], ""}, j.SpiderCode, j.IsClearnMoney)
 						if tmpv.Value == newNum[0] {
 							extField := &ju.ExtField{Code: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), Field: jdkey, ExtFrom: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), SourceValue: (*j.Jsondata)[jdkey], Value: newNum[0], Score: 100, IsTrue: newNum[len(newNum)-1].(bool)}
 							j.Result[jdkey] = append(j.Result[jdkey], extField)
@@ -2334,9 +2375,9 @@ func resetWinnerorder(j *ju.Job) {
 	if maxlen > 0 {
 		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
 		if j.Winnerorder[0]["price"] != nil {
-			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode)
+			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
 			if tmpPrice[len(tmpPrice)-1].(bool) {
-				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 0.5})
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5})
 			}
 		}
 	}
@@ -2363,3 +2404,4 @@ func RemoveReplicaSliceString(slc []string) []string {
 	}
 	return result
 }
+

+ 11 - 6
src/jy/extract/extractudp.go

@@ -65,7 +65,6 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					go Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
 					log.Debug("udp通知抽取id段", sid, " ", eid)
 					ExtractByUdp(sid, eid, ra)
-					log.Debug("udp通知抽取完成,eid=", eid)
 					for _, m := range nextNodes {
 						by, _ := json.Marshal(map[string]interface{}{
 							"gtid":  sid,
@@ -80,6 +79,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 							log.Debug(err)
 						}
 					}
+					log.Debug("udp通知抽取完成,eid=", eid)
 				}
 			}
 		}
@@ -203,8 +203,9 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}, true, false)
 		}
 		log.Debug("抽取完成", "index:", index, "bidtotal:", ext.BidTotal)
-	} else { //普通抽取
-		query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(sid), "$lte": bson.ObjectIdHex(eid)}}
+	} else {
+		//普通抽取
+		query := bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(sid), "$lte": bson.ObjectIdHex(eid)}}
 		count := ext.TaskInfo.FDB.Count(ext.TaskInfo.FromColl, query)
 		log.Debug("查询条件为:", query, "查询条数:", count)
 		pageNum := (count + PageSize - 1) / PageSize
@@ -214,12 +215,16 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		}
 		wg := sync.WaitGroup{}
 		for i := 0; i < pageNum; i++ {
-			query = bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(sid)}}
+			query = bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(sid)}}
 			fmt.Printf("page=%d,query=%v\n", i+1, query)
 			list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
 			for _, v := range *list {
-				if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
-					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//去除含敏感词数据")
+				//if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
+				//	log.Debug(index, qu.BsonIdToSId(v["_id"]), "//去除含敏感词数据")
+				//	continue
+				//}
+				if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时开标记录
+					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}
 				_id := qu.BsonIdToSId(v["_id"])

+ 90 - 104
src/jy/pretreated/analystep.go

@@ -5,6 +5,8 @@ package pretreated
 
 import (
 	"encoding/json"
+	"fmt"
+	"jy/clear"
 	"jy/util"
 	"regexp"
 	"strings"
@@ -12,7 +14,6 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-
 var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|主要人员相关资料|投标文件格式|项目业绩|否决投标的?情况说明")
 
 func AnalyStart(job *util.Job, isSite bool, codeSite string) {
@@ -32,10 +33,11 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			ration = newration
 		}
 	}
+	job.BlockPackage = map[string]*util.BlockPackage{}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock, isSite, codeSite) //分块
 	if len(blockArrays) > 0 {                                                                  //有分块
 		//从块里面找分包
-		if !job.IsFile{
+		if !job.IsFile {
 			job.BlockPackage = FindPackageFromBlocks(&blockArrays, isSite, codeSite) //从块里面找分包
 		}
 		for _, bl := range blockArrays {
@@ -64,9 +66,11 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
 			//log.Println(newCon)
-			job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
+			if newCon!=""{
+				job.BlockPackage = FindPackageFromText(job.Title, newCon, isSite, codeSite)
+			}
 			for i := 0; i < len(tabs); i++ {
-				//log.Println(tabs[i].Html())
+				//fmt.Println(tabs[i].Html())
 				//添加标识:文本中有table
 				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock, isSite, codeSite) //解析表格入口 返回:汇总表格对象
 				processTableResult(tabres, bl, job, isSite, codeSite)
@@ -78,8 +82,10 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 		bl.Text = HtmlToText(con)
 		//log.Println(bl.Text)
 		FindProjectCode(bl.Text, job) //匹配项目编号
-		if yjReg.MatchString(bl.Text){
-			bl.Text = bl.Text[:strings.Index(bl.Text,"业绩")]
+		if yjReg.MatchString(bl.Text) {
+			if strings.Index(bl.Text, "业绩")>1{
+				bl.Text = bl.Text[:strings.Index(bl.Text, "业绩")]
+			}
 		}
 		//调用kv解析
 		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1, isSite, codeSite)
@@ -89,89 +95,13 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1, isSite, codeSite)
 			job.Winnerorder = bl.Winnerorder
 		}
-		if len(job.BlockPackage) > 0 {
-			tmpn := 0
-			for k, _ := range job.BlockPackage {
-				if k == "" {
-					continue
-				}
-				tmpn++
-			}
-			if tmpn == 1 {
-				for k, jv := range job.BlockPackage {
-					if k == "" {
-						continue
-					}
-					if len(jv.WinnerOrder) == 0 && jv.Winner == "" {
-						winbs := make(map[string]bool, 0)
-						tmpw := 0
-						if bl.TableKV != nil {
-							for kk, v := range bl.TableKV.KvTags {
-								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk) {
-									for _, vv := range v {
-										if winbs[vv.Value] {
-											continue
-										}
-										job.BlockPackage[k].WinnerOrder = append(job.BlockPackage[k].WinnerOrder, map[string]interface{}{
-											"type":    0,
-											"price":   0.0,
-											"entname": strings.TrimSpace(vv.Value),
-											"sort":    tmpw,
-										})
-										tmpw++
-										winbs[vv.Value] = true
-									}
-								}
-							}
-						}
-						if bl.SpaceKV != nil {
-							for kk, v := range bl.SpaceKV.KvTags {
-								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk) {
-									for _, vv := range v {
-										if winbs[vv.Value] {
-											continue
-										}
-										job.BlockPackage[k].WinnerOrder = append(job.BlockPackage[k].WinnerOrder, map[string]interface{}{
-											"type":    0,
-											"price":   0.0,
-											"entname": vv.Value,
-											"sort":    tmpw,
-										})
-										tmpw++
-										winbs[vv.Value] = true
-									}
-								}
-							}
-						}
-						if bl.ColonKV != nil {
-							for kk, v := range bl.ColonKV.KvTags {
-								if strings.Contains(kk, "中标候选人") && WinnerOrderStr.MatchString(kk) {
-									for _, vv := range v {
-										if winbs[vv.Value] {
-											continue
-										}
-										job.BlockPackage[k].WinnerOrder = append(job.BlockPackage[k].WinnerOrder, map[string]interface{}{
-											"type":    0,
-											"price":   0.0,
-											"entname": vv.Value,
-											"sort":    tmpw,
-										})
-										tmpw++
-										winbs[vv.Value] = true
-									}
-								}
-							}
-						}
-					}
-				}
-			}
-		}
 		job.Block = append(job.Block, bl)
 	}
 }
 
 func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite string) {
 	//块中再查找表格(块,处理完把值赋到块)
+	bl.Text = formatText(bl.Text, "biangeng")
 	tabs, _ := ComputeConRatio(bl.Text, 2)
 	for _, tab := range tabs {
 		job.HasTable = 1
@@ -264,9 +194,10 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 		block.TableKV = util.NewJobKv()
 	}
 	MergeKvTags(block.TableKV.KvTags, tabres.KvTags)
+	isorderwiner := true
 	//分包
 	tablePackage := map[string]*util.BlockPackage{}
-	if tabres.IsMultiPackage {
+	if tabres.IsMultiPackage && !job.IsFile {
 		//分包中的map
 		for _, v := range tabres.PackageMap.Keys {
 			blockPackage, ok := tabres.PackageMap.Map[v].(*util.BlockPackage)
@@ -291,34 +222,89 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 				blockPackage.TableKV = util.NewJobKv()
 			}
 			MergeKvTags(blockPackage.TableKV.KvTags, GetKvTags(labelKVs, "", nil, isSite, codeSite))
+			if blockPackage.WinnerOrder != nil && len(blockPackage.WinnerOrder) > 0 {
+				for i, v := range blockPackage.WinnerOrder {
+					if entName, ok := v["entname"].(string); ok {
+						v["entname"] = winnerOrderEntity.clear("中标单位", entName)
+						if i == 0 && blockPackage.Winner == "" {
+							blockPackage.Winner = fmt.Sprint(v["entname"])
+						}
+						if price, ok := v["price"].(string); ok && len(price) < 30 && len(price) > 0 && !clearnum.MatchString(price) {
+							v["price"] = winnerOrderEntity.clear("中标金额", price)
+							if !blockPackage.IsTrueBidamount {
+								moneys := clear.ObjToMoney([]interface{}{v["price"], ""}, job.SpiderCode, job.IsClearnMoney)
+								if len(moneys) > 0 {
+									if vf, ok := moneys[0].(float64); ok {
+										blockPackage.Bidamount = vf
+										blockPackage.IsTrueBidamount = moneys[len(moneys)-1].(bool)
+									} else if vi, ok := moneys[0].(int); ok {
+										blockPackage.Bidamount = float64(vi)
+										blockPackage.IsTrueBidamount = moneys[len(moneys)-1].(bool)
+									}
+								}
+							}
+						}
+						v["type"] = tabres.Toptype + "_" + tabres.BlockTag + "_" + blockPackage.Origin
+						job.Winnerorder = append(job.Winnerorder, v)
+					}
+				}
+				isorderwiner = false
+			}
 			tablePackage[v] = blockPackage
 		}
 	}
-	tmpWins := make(map[string]int)
-	for _, v := range job.Winnerorder {
-		if v["entname"] != nil && v["entname"] != "" {
-			tmpWins[v["entname"].(string)] = v["sort"].(int)
-		}
-	}
 	//处理中标人排序
-	wror := []map[string]interface{}{}
-	for _, v := range tabres.WinnerOrder {
-		entName, _ := v["entname"].(string)
-		v["entname"] = winnerOrderEntity.clear("中标单位", entName)
-		if price, ok := v["price"].(string); ok {
-			v["price"] = winnerOrderEntity.clear("中标金额", price)
+	if isorderwiner {
+		tmpWins := make(map[string]int)
+		for _, v := range job.Winnerorder {
+			if v["entname"] != nil && v["entname"] != "" {
+				tmpWins[v["entname"].(string)] = v["sort"].(int)
+			}
 		}
-		v["type"] = len(job.Winnerorder)
-		if tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] == nil {
-			continue
+		wror := []map[string]interface{}{}
+		if len(tmpWins) == 0 && len(tabres.WinnerOrder) > 0 {
+			for _, v := range tabres.WinnerOrder {
+				if entName, ok := v["entname"].(string); ok {
+					v["entname"] = winnerOrderEntity.clear("中标单位", entName)
+					if price, ok := v["price"].(string); ok {
+						v["price"] = winnerOrderEntity.clear("中标金额", price)
+					}
+					v["type"] = tabres.Toptype + "_" + tabres.BlockTag
+					wror = append(wror, v)
+				}
+			}
+		} else {
+			for _, v := range tabres.WinnerOrder {
+				if entName, ok := v["entname"].(string); ok {
+					v["entname"] = winnerOrderEntity.clear("中标单位", entName)
+					if v["entname"] == "" {
+						continue
+					}
+					if price, ok := v["price"].(string); ok {
+						v["price"] = winnerOrderEntity.clear("中标金额", price)
+					}
+					v["type"] = tabres.Toptype + "_" + tabres.BlockTag
+					if tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] == nil {
+						continue
+					} else if tmpWins[v["entname"].(string)] != v["sort"].(int) && v["type"] != tabres.BlockTag {
+						wror = append(wror, v)
+						continue
+					} else if tmpWins[v["entname"].(string)] > 0 && tmpWins[v["entname"].(string)] == v["sort"].(int) && v["price"] != nil {
+						if tmpWins[v["entname"].(string)]-1 >= 0 {
+							job.Winnerorder[tmpWins[v["entname"].(string)]-1] = v
+							continue
+						}
+					}
+				}
+			}
+		}
+		if len(wror) > 0 {
+			job.Winnerorder = append(job.Winnerorder, wror...)
+			block.Winnerorder = job.Winnerorder
 		}
-		wror = append(wror, v)
-	}
-	if len(wror) > 0 {
-		job.Winnerorder = append(job.Winnerorder, wror...)
 	}
 	//分包
-	if len(tablePackage) > 0  {
+	if len(tablePackage) > 0 && !job.IsFile {
 		pkgMap := map[string]*util.BlockPackage{}
 		for tk, tv := range tablePackage {
 			bv := job.BlockPackage[tk]

+ 177 - 218
src/jy/pretreated/analytable.go

@@ -20,21 +20,21 @@ var (
 	numclear = regexp.MustCompile("^[\\d一二三四五六七八九十.]+")
 	num1     = regexp.MustCompile("(\\d)")
 	//清理表格title中的不需要的内容
-	tabletitleclear  = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、.,.。_/((人民币万元件个公斤))]")
+	tabletitleclear  = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、.,.。_/((人民币万元件个公斤))]")
 	tabletitleclear2 = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕]*")
 	//清理表格中是key中包含的空格或数字等
-	tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
+	tablekeyclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n、.,.。_/]+|^[\\d一二三四五六七八九十]+[、.]*|[((【\\[].*?[))】\\]]")
 	//清理表格td中的符号
-	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
+	tabletdclear = regexp.MustCompile("[\\s\u3000\u2003\u00a0\\n\u001c、,。_??;;~\\-#\\\\()(){}【】\\[\\]<>《》{}〔〕¥$]*")
 	//判断key是金额,对万元的处理
 	moneyreg = regexp.MustCompile("(预算|费|价|额|规模|投资)")
 	//根据表格的内容判断是不是表头,如果含有金额则不是表头
 	MoneyReg = regexp.MustCompile("^[\\s  ::0-9.万元()()人民币¥$]+$")
-
+	GSReg    = regexp.MustCompile(".*公司.*")
 	//判断分包时
 	moneyNum = regexp.MustCompile("[元整¥万]")
 	//对隐藏表格的判断
-	display = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
+	display       = regexp.MustCompile("(?i).*?display\\s?[:]\\s?none.*")
 	//---------------
 	//求是分包的概率
 	//根据表格的标签对分包进行打分
@@ -58,7 +58,6 @@ var (
 
 	//清理表格标签正则
 	ClearTagReg = regexp.MustCompile("<[^>]*?>|[\\s\\n\\r]*$")
-
 	//查找表格标签正则
 	ttagreg = regexp.MustCompile("(?s)([^\\n::。,;\\s\u3000\u2003\u00a0]{2,30})[::]?[^::。;!\\n]{0,35}[\\s\\n]*$")
 
@@ -93,7 +92,8 @@ var (
 	filter_zbdw_jd = regexp.MustCompile("(投标|成交|中标|合同)(供应商|单位|人|名称).{0,4}$")
 	//且不包含以下字眼
 	filter_zbdw_kn = regexp.MustCompile("第[2二3三4四5五]|得分|地址|询价保证金") //且值包含以下字眼
-	filter_zbdw_v  = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
+		//且值包含以下字眼
+	filter_zbdw_v = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$|([^购]中心|办公|用品)")
 	//且值包含以下字眼
 	filter_zbdw_v2 = regexp.MustCompile("(公司|集团|研究院|设计院|局|厂|部|站|城|店|市|所|处)$")
 
@@ -135,6 +135,9 @@ func IsHide(g *goquery.Selection) (b bool) {
 	return
 }
 
+//59.992664,33.495715,20.001306
+var clearnum *regexp.Regexp = regexp.MustCompile("(([0-9.]{1,6}[,,]+){4,}|(\\d{6}[,,]\\d{2}.){2,})")
+
 //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
 //待扩展,暂不支持正则标签库
 func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool, codeSite string) (kvTags map[string][]*u.Tag, returntag string) {
@@ -174,12 +177,13 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
 			}
 			if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
 				kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
-			} else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" {
+			}else if regexp.MustCompile("(中标候选人|名单及其排序|排序)").MatchString(tabletag) && t1.Value == "采购单位" {
 				kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight - 150})
 			} else {
 				kvTags[t1.Value] = append(kvTags[t1.Value], &u.Tag{Key: k1, Value: v1, Weight: t1.Weight})
 			}
 		}
+		res[0].IsInvalid = true
 		//k1 = res[0].Value
 	} else {
 		kvTags[k] = append(kvTags[k], &u.Tag{Key: k, Value: v1, IsInvalid: true})
@@ -278,20 +282,20 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 			}
 		}
 		table.WinnerOrder = winnerOrder
-		winnerOrder = []map[string]interface{}{}
-	L: //遍历每个td,查询中标人
-		for _, tr := range table.TRs {
-			for _, td := range tr.TDs {
-				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3, isSite, codeSite)
-				if len(winnerOrder) > 0 {
-					break L
+		if len(table.WinnerOrder) == 0 {
+			winnerOrder = []map[string]interface{}{}
+			//遍历每个td,查询中标人
+			for _, tr := range table.TRs {
+				for _, td := range tr.TDs {
+					winnerOrder = winnerOrderEntity.Find(td.Val, true, 3, isSite, codeSite)
+					if len(winnerOrder) > 0 {
+						//中标候选人合并
+						winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
+					}
 				}
 			}
 		}
-		if len(table.WinnerOrder) > 0 {
-			//中标候选人合并
-			winnerOrderEntity.Merge(table.WinnerOrder, winnerOrder)
-		} else if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
+		if !table.BPackage { //没有table.WinnerOrder也没有分包 将td中的WinnerOrder赋值给table.WinnerOrder
 			if len(winnerOrder) > 1 {
 				table.WinnerOrder = winnerOrder
 			}
@@ -304,7 +308,7 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 		if table.BlockPackage.Map != nil {
 			onePkgKey := table.BlockPackage.Keys[0]
 			onePkg, _ := table.BlockPackage.Map[onePkgKey].(*u.BlockPackage)
-			if onePkg != nil && onePkg.WinnerOrder != nil && len(onePkg.WinnerOrder) == 0 {
+			if onePkg != nil && (onePkg.WinnerOrder != nil || len(onePkg.WinnerOrder) == 0) {
 				onePkg.WinnerOrder = table.WinnerOrder
 				table.BlockPackage.AddKey(onePkgKey, onePkg)
 			}
@@ -312,12 +316,20 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 	}
 }
 
+var winMoneyReg *regexp.Regexp = regexp.MustCompile("(报价|投标价|投标总价)")
+
 //处理table.SortKV.value为数组的情况
 func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 	winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
 	if !winnertag {
 		winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
 	}
+	if !winnertag {
+		winnertag = iswinnertabletag.MatchString(table.Desc)
+	}
+	if !winnertag {
+		winnertag = iswinnertabletag.MatchString(table.Html)
+	}
 	checkKey := map[int]bool{}
 	//tmpBidmout := []string{}
 	//log.Println(tmpBidmout)
@@ -330,7 +342,7 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 			table.WinnerOrder = append(table.WinnerOrder, vm...)
 		} else {
 			//增加候选人排序逻辑
-			if table.WinnerOrder == nil && !checkKey[kn] {
+			if (table.WinnerOrder == nil || len(table.WinnerOrder) == 0) && !checkKey[kn] {
 				if vs1, ok := v.([]string); ok {
 					smap := make([]map[string]interface{}, len(vs1))
 					for n1, _ := range vs1 {
@@ -350,7 +362,7 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 							res, _, _, _, repl := CheckCommon(k, "bidorder")
 							kv := ""
 							if !res {
-								kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""), isSite, codeSite)
+								kt := u.GetTags(k, isSite, codeSite)
 								if kt.Len() > 0 {
 									if kt[0].Value == "单品报价" && winnertag {
 										kv = "中标金额"
@@ -372,6 +384,8 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 											if entname != "" {
 												tmpEntname[vsk] = entname
 											}
+										} else if winMoneyReg.MatchString(k) && len(tmpPrice[vsk]) == 0 {
+											kv = "中标金额"
 										} else { //验证val时如果数组中的第一条数据既不满足sort或者entname 判定此数组数据错误
 											break
 										}
@@ -386,6 +400,12 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 									for vsk, vsv := range vs {
 										smap[vsk]["sortstr"] = vsv
 										smap[vsk]["sort"] = GetBidSort(vsv, vsk+1)
+										if findCandidate2.MatchString(vsv) && kv == "中标单位" && tmpEntname[vsk] == "" { //数据验证val是否是候选人
+											entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
+											if entname != "" {
+												tmpEntname[vsk] = entname
+											}
+										}
 									}
 								} else if repl == "entname" || kv == "中标单位" {
 									for vsk, vsv := range vs {
@@ -393,13 +413,12 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 											repl = "sort"
 											goto SORT
 										}
-										//										if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 {
-										//											break
-										//										}
-										//										entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
-										//										if entname != "" {
-										//											smap[vsk]["entname"] = entname
-										//
+										//if entname, _ := smap[vsk]["entname"].(string); entname != "" || len([]rune(vsv)) < 3 {
+										//	break
+										//}
+										//entname, _ := winnerOrderEntity.clear("中标单位", vsv).(string)
+										//if entname != "" {
+										//		smap[vsk]["entname"] = entname
 										if tmpEntname[vsk] != "" || len([]rune(vsv)) < 4 { //排除 单位:["台","个","套"]
 											break
 										}
@@ -455,39 +474,45 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 						//} else if len(tmpEntname) > 0 {
 						//fmt.Println("table winnerorder only has entname", tmpEntname)
 						//}
-						if len(smap_v) > 2 { //只有排序信息 sort和sortstr
+						if len(smap_v) >2{ //只有排序信息 sort和sortstr
 							newSmap = append(newSmap, smap_v)
 						}
 					}
 					if len(newSmap) > 0 {
 						table.WinnerOrder = newSmap
+						
 					}
 				}
-			}
-			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v, isSite, codeSite)
-			if tag != "" && table.Tag == "" {
-				table.Tag = tag
-			}
-			for kk, vv := range kvTags {
-				if vsss, ok := v.([]string); ok {
-					if len(vv) > 0 {
-						for _, vvvvvv := range vsss {
-							tmp := u.Tag{}
-							tmp.Weight = vv[0].Weight
-							tmp.Key = vv[0].Key
-							tmp.IsInvalid = vv[0].IsInvalid
-							if kk == "单品报价" || kk == "中标金额" || kk == "预算" {
-								if strings.Contains(k, "万") {
-									tmp.Value = vvvvvv + "万"
-								} else if strings.Contains(k, "亿") {
-									tmp.Value = vvvvvv + "亿"
-								} else {
-									tmp.Value = vvvvvv
-								}
+			} else if vsss, ok := v.([]string); ok {
+				if len(table.WinnerOrder) > 0 && table.WinnerOrder[0]["price"] == nil && len(vsss) == len(table.WinnerOrder) {
+					kv := ""
+					if winMoneyReg.MatchString(k) {
+						kv = "中标金额"
+					} else {
+						kt := u.GetTags(k, isSite, codeSite)
+						if kt.Len() > 0 {
+							if kt[0].Value == "单品报价" && winnertag {
+								kv = "中标金额"
 							} else {
-								tmp.Value = vvvvvv
+								kv = kt[0].Value
+							}
+						}
+					}
+					if kv == "中标金额" {
+						for i, vx := range vsss {
+
+							p1num := numberReg2.FindString(vx)
+							if strings.Contains(p1num, ",") && strings.Contains(p1num, ".") {
+								p1num = strings.ReplaceAll(p1num, ",", "")
+							}
+							p1 := qutil.Float64All(p1num)
+							if p1 > 0 {
+								//smap[vsk]["price"] = winnerOrderEntity.clear("中标金额", vsv+GetMoneyUnit(k, vsv))
+								price := winnerOrderEntity.clear(kv, vx+GetMoneyUnit(k, vx))
+								if pricestr, _ := price.(string); len(pricestr) < 30 && len(pricestr) > 0 && !clearnum.MatchString(pricestr) {
+									table.WinnerOrder[i]["price"] = pricestr
+								}
 							}
-							table.StandKV[kk] = append(table.StandKV[kk], &tmp)
 						}
 					}
 				}
@@ -599,15 +624,8 @@ func (table *Table) MergerToTableresult() {
 				if len(v1.WinnerOrder) > 0 && len(bp.WinnerOrder) == 0 {
 					bp.WinnerOrder = v1.WinnerOrder
 				}
-				//table.TableResult.PackageMap.AddKey(k, v)
 			}
 		}
-		//		str := ""
-		//		for _, k := range table.TableResult.PackageMap.Keys {
-		//			v := table.TableResult.PackageMap.Map[k].(*u.BlockPackage)
-		//			str += fmt.Sprintf("包号:%s,中标人:%s,中标价:%s,预算:%s,文本:%s,排名:%v ---\t", v.Index, v.TableKV["中标单位"]+v.ColonKV["中标单位"], v.TableKV["中标金额"]+v.ColonKV["中标金额"], v.TableKV["预算"]+v.ColonKV["预算"], v.Text, v.WinnerOrder)
-		//		}
-		//		u.Debug(table, table.TableResult, str)
 	}
 	//遍历标准key到tableresult.sortkv中
 	for _, v := range table.StandKV {
@@ -674,6 +692,9 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
 	}
 	//生成tableresult对象
 	tabres = NewTableResult(_id, toptype, blockTag, con, itype, ruleBlock)
+	if fblbReg.MatchString(blockTag) {
+		return
+	}
 	//可以有多个table
 	//for _, table := range tabs {
 	//隐藏表格跳过
@@ -802,7 +823,7 @@ func (table *Table) Analy(contactFormat *u.ContactFormat, isSite bool, codeSite
 		trs = table.Goquery.ChildrenFiltered("tr")
 	}
 	ztb := table.Goquery.Find("table").Size()
-	if ztb >= 10 {
+	if ztb >= 9 {
 		return []*Table{}
 	}
 	//遍历节点,初始化table 结构
@@ -817,6 +838,8 @@ func (table *Table) Analy(contactFormat *u.ContactFormat, isSite bool, codeSite
 	return ts
 }
 
+var fblbReg *regexp.Regexp = regexp.MustCompile("(废标|流标|负责人资格|负责人业绩|相关业绩|开标记录|附件[:0-9]|越南盾|技术分|填报项目业绩|未通过.*原因)")
+
 //遍历节点,初始化table 结构体
 func (table *Table) createTabe(trs *goquery.Selection, isSite bool, codeSite string) {
 	trs.Each(func(n int, sel *goquery.Selection) {
@@ -843,7 +866,7 @@ func (table *Table) createTabe(trs *goquery.Selection, isSite bool, codeSite str
 				if tds.Size() == empty {
 					tdTextIsNull = true
 				}
-			}
+			} 
 		})
 		//向table添加每行不为空的tr
 		if !tdTextIsNull {
@@ -1086,7 +1109,11 @@ func (table *Table) ComputeRowColSpan() {
 		nk := 0         //nk列的起始,k行的起始||如果有合并,起始就不是0
 		ball := true
 		rowspans := v.TDs[0].Rowspan //某一行第一个td的rowspan
-		for _, v1 := range v.TDs {
+		for k1, v1 := range v.TDs {
+			if k1 == 0 && k == 0 {
+				table.TRs[k].TDs[k1].MustBH = true
+				table.TRs[k].TDs[k1].BH = true
+			}
 			if v1.Rowspan != rowspans {
 				ball = false
 				break
@@ -1325,7 +1352,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool, codeSite string) {
 				lentds := len(v.Tdmap[v2])
 				if v.Rationmap[v2] > checkval {
 					for _, td := range v.Tdmap[v2] {
-						if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) {
+						if td.KeyDirect == 0 && !MoneyReg.MatchString(td.Val) && !GSReg.MatchString(td.Val) {
 							if k1 == "r" {
 								ck := fmtkey("c", td.StartCol, td.EndCol)
 								rt := table.StartAndEndRation[ck]
@@ -1354,7 +1381,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool, codeSite string) {
 									//clen = len(tdn)
 								}
 								if lentds > 1 {
-									if ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
+									if td.Valtype != "NOHEAD" && utf8.RuneCountInString(td.Val) < 15 && ((tdn != nil && v.Rationmap[v2] > fv) || tdn == nil) && td.Valtype != "BO" {
 										td.KeyDirect = 2
 										td.KVDirect = 1
 										td.BH = true
@@ -1450,10 +1477,10 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 		bcon := false
 		//增加表格切块判断,只判断切块分包
 		//控制中标人排序方向
-		bodirect := 0
+		//bodirect := 0
 		//控制中标人排序数值
-		sort := 1
-
+		//sort := 1
+		nextdirect, nextvdirect := 0, 0
 		//开始抽取
 		for _, tr := range table.TRs {
 			bcon = trSingleColumn(tr, bcon, table) //tr单列,是否丢弃内容
@@ -1467,7 +1494,12 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 						numbh++
 					}
 				}
-				if numbh > 0 && numbh <= len(tr.TDs)/2 {
+				if numbh != 0 && numbh == len(tr.TDs) { //5e0d53ef0cf41612e0640495
+					nextdirect, nextvdirect = 2, 1
+					continue
+				} else if nextdirect > 0 && nextvdirect > 0 {
+					direct, vdirect = 2, 1
+				} else if numbh > 0 && numbh <= len(tr.TDs)/2 {
 					direct, vdirect = 1, 2
 				} else {
 					direct, vdirect = 2, 1
@@ -1477,26 +1509,26 @@ func (table *Table) FindKV(isSite bool, codeSite string) {
 				if !td.BH && td.KVDirect < 3 {
 					if !table.FindTdVal(td, direct, vdirect) { //table.FindTdVal()存储了table.SortKV
 						if !table.FindTdVal(td, vdirect, direct) {
-							//都识别不到时,对第一、二中标候选人的处理
-							bo, res := GetBidOrder(td, bodirect, sort)
-							if res {
-								sort++
-								bodirect = bo
-							}
-							if len(td.SortKV.Map) > 0 {
-								for _, tdv := range td.SortKV.Keys {
-									if tdv == "" || td.SortKV.Map[tdv] == "" { //value为空或者null不再添加到table.SortKV
-										continue
-									}
-									table.SortKV.AddKey(tdv, td.SortKV.Map[tdv])
-								}
-							}
+							////都识别不到时,对第一、二中标候选人的处理
+							//bo, res := GetBidOrder(td, bodirect, sort)
+							//if res {
+							//	sort++
+							//	bodirect = bo
+							//}
+							//if len(td.SortKV.Map) > 0 {
+							//	for _, tdv := range td.SortKV.Keys {
+							//		if tdv == "" || td.SortKV.Map[tdv] == "" { //value为空或者null不再添加到table.SortKV
+							//			continue
+							//		}
+							//		table.SortKV.AddKey(tdv, td.SortKV.Map[tdv])
+							//	}
+							//}
 						}
 					}
 					//fmt.Println("td:", td.Val, td.BH, td.HeadTd, td.KVDirect)
 				}
 			}
-
+			nextdirect, nextvdirect = 0, 0
 		}
 		//qutil.Debug("FindKV", table.SortKV.Map)
 	} else if len(table.TRs) > 0 { //没有表头的表格处理,默认纵向吧
@@ -1729,6 +1761,9 @@ func GetBidSort(str string, n int) int {
 	return val
 }
 
+var cleardwReg *regexp.Regexp = regexp.MustCompile("[((]{1}\\d*[人元件个公斤户]/[人元件个公斤户][))]")
+var zbhxrReg *regexp.Regexp = regexp.MustCompile("(中标候选人|投标单位名称)")
+
 //查找每一个单元格的表头,调用FindNear
 func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 	if td.Val == "" || strings.TrimSpace(td.Val) == "" {
@@ -1753,7 +1788,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 					break
 				}
 			}
-		} else if strings.Contains(key, "中标候选人") && strings.Contains(td.Val, "公司") {
+		} else if zbhxrReg.MatchString(key) && findCandidate2.MatchString(td.Val) {
 			key = "中标单位"
 		} else if key == "单位名称" {
 			tmpnewnear := table.FindNear(near, 2)
@@ -1786,11 +1821,6 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 						near = near.TR.TDs[near.ColPos-1]
 					}
 				} else {
-					//near.Val += "_"
-					//for table.SortKV.Map[near.Val] != nil {
-					//	near.Val += "_"
-					//}
-					//key = near.Val //之前这个地方没有重置,导致把之前结果覆盖了
 					bthiskey = true
 				}
 			} else {
@@ -1847,7 +1877,12 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 					}
 				}
 				if bvalfind && varrpos > -1 && len(vals) > varrpos {
-					vals = append(vals, td.Val) // 累加
+					tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, ""))
+					if tmapval == "" {
+						vals = append(vals, td.Val) // 累加
+					} else {
+						vals = append(vals, tmapval) // 累加
+					}
 					val = vals
 					//vals[varrpos] = td.Val // += "__" + td.Val
 				} else {
@@ -1867,11 +1902,26 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 					varrpos = len(vals) - 1
 				}
 			} else if vals, ok := val.(string); ok && vals != "" && td.Val != "" {
+				tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(vals, ""))
+				tmapvaltd := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, ""))
 				if bvalfind {
-					val = td.Val //vals + "__" + td.Val
+					if tmapvaltd == "" {
+						val = td.Val //vals + "__" + td.Val
+					} else {
+						val = tmapvaltd
+					}
 				} else {
-					tval := []string{vals}
-					tval = append(tval, td.Val)
+					tval := []string{}
+					if tmapval == "" {
+						tval = append(tval, vals)
+					} else {
+						tval = append(tval, tmapval)
+					}
+					if tmapvaltd == "" {
+						tval = append(tval, td.Val)
+					} else {
+						tval = append(tval, tmapvaltd)
+					}
 					val = tval
 					varrpos = 1
 				}
@@ -1879,7 +1929,12 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 			barr = true
 		} else {
 			if td.Val != "" {
-				val = td.Val
+				tmapval := strings.TrimSpace(cleardwReg.ReplaceAllString(td.Val, ""))
+				if tmapval == "" {
+					val = td.Val
+				} else {
+					val = tmapval
+				}
 			} else if len(near.SortKV.Map) == 1 && near.SortKV.Map[near.Val] != "" {
 				val = near.SortKV.Map[near.Val]
 			}
@@ -1889,14 +1944,12 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 			tkey := fmtkey("k", near.TR.RowPos, near.ColPos)
 			table.SortKV.ReplaceKey(key, val, tkey)
 		} else {
-			if val == nil || val == "" || key == "采购项目预算金额" {
-				return
-			}
 			if key == "单位名称" && len(near.TR.TDs) > 1 {
-				key = near.TR.TDs[0].Val
+				if near.TR.TDs[0].Val != "序号" {
+					key = near.TR.TDs[0].Val
+				}
 			}
 			table.SortKV.AddKey(key, val)
-			//if table.SortKV.Map[key] != nil {
 			pos := table.SortKV.Index[key]
 			if barr {
 				mval := table.kvscope[pos]
@@ -1947,16 +2000,20 @@ func (table *Table) FindNear(td *TD, direct int) *TD {
 		tr := table.TRs[:td.TR.RowPos]
 		for i := len(tr) - 1; i > -1; i-- {
 			tds := tr[i].TDs
-			for _, td1 := range tds {
+			for it, td1 := range tds {
 				if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow {
 					//找到左临节点
 					if td1.BH {
 						return td1
-					} else {
-						if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
-							return td1.HeadTd
-						}
+					} else if len(tr[i].TDs) == len(td.TR.TDs) && td1.HeadTd != nil && td1.HeadTd.KVDirect == direct {
+						return td1.HeadTd
+					} else if it > 0 && td1.Val == "" && td1.TR.TopTR == nil && len(td.TR.TDs)-(td.StartCol-1) > 0 && strings.Contains(td.TR.TDs[td.StartCol-1].Val, "中标候选人") {
+						return tds[it-1]
+					} else if td1.HeadTd != nil && td1.HeadTd.KVDirect == direct && td.Colspan == td1.Colspan && td.Rowspan == td.Rowspan {
+						return td1.HeadTd
 					}
+				} else if td1.StartCol <= td.StartCol && td1.EndCol >= td.EndCol && td1.EndRow+1 == td.StartRow {
+
 				}
 			}
 		}
@@ -2012,7 +2069,6 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 	oldIndex := []string{} //存放包的原始值
 	brepeat := map[string]bool{}
 	for k, v := range index {
-
 		v = u.PackageNumberConvert(v)
 		if !brepeat[v] {
 			brepeat[v] = true
@@ -2046,8 +2102,9 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 					for tnk, tnv := range tn.StandKV {
 						if nk >= len(tnv) {
 							continue
+						} else if len(index) == len(tnv) {
+							kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk])
 						}
-						kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk])
 					}
 					//kv.KvTags = tn.StandKV
 					bp := &u.BlockPackage{}
@@ -2139,119 +2196,24 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 	}
 	for _, k1 := range tn.SortKV.Keys {
 		v1 := tn.SortKV.Map[k1]
-		if _, bvs := v1.(string); bvs && len(index) > 1 && !strings.HasSuffix(k1, "_") { //table.SortKV.Map.value为字符串并且index有分包而且table.SortKV.Map.key没有_
-			v1_array := []string{v1.(string)}
-			underline := ""
-			for {
-				underline += "_"
-				if tn.SortKV.Map[k1+underline] == nil {
-					break
-				} else if v3, v2_ok := tn.SortKV.Map[k1+underline].(string); v2_ok && v3 != "" {
-					v1_array = append(v1_array, v3)
-				}
-			}
-			v1 = v1_array
-		}
-		if val, bvs := v1.([]string); bvs {
-			if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
-				for k, v := range val {
-					tn.assemblePackage(k1, v, index[k], isSite, codeSite) //组装解析到的分包
-				}
-			} else {
-				for sk1, sv2 := range index {
-					v := val[sk1]
-					//处理http://www.hljcg.gov.cn/xwzs!queryOneXwxxqx.action?xwbh=8145b599-a11e-45cb-a76a-12157a715570
-					if v == "" && strings.Index(k1, "供应商") > -1 {
-						if sk1 != len(index)-1 {
-							//u.Debug(val[sk1+1], val[sk1+2])
-							if standIndex_pos[sk1+1]-standIndex_pos[sk1] > 1 {
-								v = val[standIndex_pos[sk1]+1]
-							}
-						} else {
-							if standIndex_pos[sk1] < len(val)-1 {
-								v = val[standIndex_pos[sk1]+1]
-							}
-						}
-					}
-					if k1 == "单价" {
-						continue
-					}
-					tn.assemblePackage(k1, v, sv2, isSite, codeSite)
-				}
-			}
-			//删除子包的kv
-			//u.Debug("----==1==-------", k1)
-			k1tags := u.GetTags(k1, isSite, codeSite) //取得匹配
-			//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
-			//	tn.SortKV.RemoveKey(k1)
-			//}
-			for _, vcgdw := range k1tags {
-				if vcgdw.Value == "采购单位" {
-				} else if vcgdw.Value == "预算" && len(val) == len(index) {
-					for bi, bv := range index {
-						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget <= 0 {
-							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
-							if len(moneys) > 0 {
-								if vf, ok := moneys[0].(float64); ok {
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget = vf
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBudget = moneys[len(moneys)-1].(bool)
-								} else if vi, ok := moneys[0].(int); ok {
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget = float64(vi)
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBudget = moneys[len(moneys)-1].(bool)
-								}
-							}
-						}
-					}
-				} else if vcgdw.Value == "中标金额" && len(val) == len(index) {
-					for bi, bv := range index {
-						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount <= 0 {
-							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
-							if len(moneys) > 0 {
-								if vf, ok := moneys[0].(float64); ok {
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount = vf
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBidamount = moneys[len(moneys)-1].(bool)
-								} else if vi, ok := moneys[0].(int); ok {
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount = float64(vi)
-									tn.BlockPackage.Map[bv].(*u.BlockPackage).IsTrueBidamount = moneys[len(moneys)-1].(bool)
-								}
-							}
-						}
-					}
-				} else if vcgdw.Value == "中标单位" {
-					for _, bv := range index {
-						if tn.BlockPackage.Map[bv].(*u.BlockPackage).WinnerOrder == nil || len(tn.BlockPackage.Map[bv].(*u.BlockPackage).WinnerOrder) == 0 {
-							continue
-						} else {
-							tn.BlockPackage.Map[bv].(*u.BlockPackage).Winner = qutil.ObjToString(tn.BlockPackage.Map[bv].(*u.BlockPackage).WinnerOrder[0]["entname"])
-						}
+		var v1_arr []string
+		if vtmpv1, ok := v1.(string); ok {
+			v1_arr = PreCon4.FindAllString(qutil.ObjToString(vtmpv1), -1)
+			if len(v1_arr) > 0 {
+				if dw := Precon4dw.FindString(vtmpv1); dw != "" {
+					for i, v := range v1_arr {
+						v1_arr[i] = v + dw
 					}
 				}
 			}
-		} else if val, bvs := v1.(string); bvs && len(index) == 1 {
-			//删除子包的kv
-			kvTags, _ := CommonDataAnaly(k1, "", "", val, isSite, codeSite)
-			for kvTag_k, kvTag_v := range kvTags {
-				hasValid := false
-				for _, kvTag_vv := range kvTag_v {
-					if kvTag_vv.IsInvalid {
-						continue
-					}
-					hasValid = true
-				}
-				if !hasValid {
-					continue
-				}
-				if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
-					if tn.SortKV.Map[k1] != nil {
-						tn.SortKV.RemoveKey(k1)
-						tn.assemblePackage(k1, val, index[0], isSite, codeSite)
-					}
-					//log.Println("remove", k1, val)
-				}
+		} else if vtmpv1s, ok := v1.([]string); ok {
+			v1_arr = vtmpv1s
+		}
+		if len(v1_arr) > 0 && len(v1_arr) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
+			for k, v := range v1_arr {
+				tn.assemblePackage(k1, v, index[k], isSite, codeSite) //组装解析到的分包
 			}
-			//u.Debug("----==2==-------", k1)
 		}
-
 	}
 	return isGoonNext
 }
@@ -2271,7 +2233,6 @@ func (tn *Table) isGoonNext(isSite bool, codeSite string) {
 		} else {
 			str += fmt.Sprintf("%s:%s\n", nk, v)
 		}
-
 		if excludeKey2.MatchString(str) {
 			continue
 		}
@@ -2670,7 +2631,6 @@ func isHasOnePkgAndNoKv(v1 string) (bool, string) {
 
 //替换分包中混淆的词
 func replPkgConfusion(v1 string) string {
-
 	v1 = PreReg.ReplaceAllString(v1, "")
 	v1 = PreReg1.ReplaceAllString(v1, "")
 	v1 = PreCon.ReplaceAllString(v1, "")
@@ -2720,7 +2680,6 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, co
 							for _, this := range thisTdKvs {
 								if str := ContactInfoVagueReg.FindString(this.Key); str != "" {
 									td.SortKV.AddKey(tdType+str, this.Value)
-
 								}
 							}
 						}

+ 23 - 9
src/jy/pretreated/multipackage.go

@@ -3,6 +3,7 @@ package pretreated
 import (
 	"regexp"
 	"sort"
+	"strings"
 )
 
 var (
@@ -16,11 +17,13 @@ var (
 	PreCheckMulti = regexp.MustCompile("[^第]([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)[  \u3000\u2003\u00a0]*个?((子|合同|分|施工|监理)?(标段?|包|合同段|标包))进行([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)|(划分|分[设为成]?|共[分设有计]?)[::]?[  \u3000\u2003\u00a0]*([一二三四五六七八九十两0-9ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)[  \u3000\u2003\u00a0]*个?((子|合同|分|施工|监理)?(标段?|包|合同段|标包|项目))")
 	//替换容易混淆的词
 	PreCon  = regexp.MustCompile("([\r\n]|^)[\u3000\u2003\u00a0\\s]*(^标项)(\\d\\.)+\\d|[一二三四五1-9、.]+[  \u3000\u2003\u00a0]*((标段|分包)(划分|情况)|(标书))|([上下]一[条页篇][::]?[^,,。\\n]{0,120}|备注[::][^\\n]{0,120}|业绩[::][^\\n,。,]{0,80}|三包(手册|服务|政策|凭证|期|标准|规定|责任|要求|售后)|(要求|提供|质量|国家|享受|负责|实行|执行|承诺|门前|法定|规定).{0,6}三包|“三包”|\\d+万?([个套只支分名][^标包])|[?]|[((]请?注意[::][^((]+[))])")
-	PreCon2 = regexp.MustCompile("[评中开定]\\s?标\\s?[0-9一二三四五六七八九十]+|标[准尺高书注]|[^中]标价|[开鼠投招军指企目]标|包[括含装为内]|[承树]包|CA证书")
+	PreCon2 = regexp.MustCompile("[评中开定]\\s?标\\s?[0-9一二三四五六七八九十]+|标[准尺高书注]|[^中]标价|[开鼠投招军指企目]标|包[括含装为内]|[承树]包|CA证书|地点[::].*标|.{30,}合同段")
 	//替换容易混淆的词
-	PreCon1 = regexp.MustCompile("(\\d+\\.?)+万?元")
+	PreCon1 = regexp.MustCompile("(施工)*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+标段)")
 	//提取分包标识
-	MultiReg = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包|标段))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|分|合同|施工|监理|标包|标|包)(标|包段|项|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))[::]?|(子项目[0-9]+)")
+	MultiReg = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段|分标))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包|标段))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|分|合同|施工|监理|标包|标|包)(标|包段|项|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))[::]?|(子项目[0-9]+)")
+	PreCon4 = regexp.MustCompile("([一二三四五六七八九十]标段[::¥0-9.]*(万元)?)[、]?")
+	Precon4dw = regexp.MustCompile("(万元|元)")
 	//匹配到的包格式分类统计
 	keyregs = []map[*regexp.Regexp]int{
 		map[*regexp.Regexp]int{
@@ -45,9 +48,6 @@ var (
 //判断分包
 func CheckMultiPackage(con, title string) (content string, m map[string][]string, b bool) {
 	m = map[string][]string{}
-	//if TitleReg.MatchString(title) {
-	//log.Println(title+"\n------------------", TitleReg.FindAllStringSubmatch(title, -1))
-	//}
 	con = PreReg.ReplaceAllString(con, "")
 	con = PreReg1.ReplaceAllString(con, "")
 	pres := PreCheckMulti.FindStringSubmatch(con)
@@ -73,15 +73,29 @@ func CheckMultiPackage(con, title string) (content string, m map[string][]string
 		for index, v := range res {
 			k := v[1]
 			vindex := 2
+			if k == "" {
+				k = v[11]
+				vindex = 12
+			}
 			if k == "" {
 				k = v[9]
 				vindex = 5
 			}
-			if len(m[k]) == 0 && k != "" {
+			if k == "" {
+				k = v[4]
+				vindex = 5
+			}
+			if k == "" {
+				k = v[6]
+				vindex = 8
+			}
+			if k != "" && v[vindex] != ""  {
+				vindex += 1
+			}
+			if len(m[k]) == 0 && k != "" && v[vindex] != "" {
 				k = ignoreReg.ReplaceAllString(k, "$1")
 				k = clearPkgFlag.ReplaceAllString(k, "")
-				//log.Println(k, "----")
-				m[k] = []string{clearPkgFlag.ReplaceAllString(v[0], ""), v[vindex]}
+				m[k] = []string{clearPkgFlag.ReplaceAllString(strings.TrimSpace(v[0]), ""), v[vindex]}
 				mindex[k] = index
 			}
 		}

+ 71 - 149
src/jy/pretreated/tablev2.go

@@ -288,14 +288,18 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR, isSite bool, codeSite, tag str
 	}
 }
 
+var isnohead *regexp.Regexp = regexp.MustCompile("(个项目|奥图码|优惠比例|下浮比例)")
+
 //对td单元格值判断是否是表头和根据td内容长度进行分块处理
 func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite string) {
-	if yjReg.MatchString(td.Text) {
+	if yjReg.MatchString(td.Text) || isnohead.MatchString(td.Text) {
 		return
 	}
+	kvTitle := ""
+	if len(td.TR.TDs) > 0 {
+		kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
+	}
 	lenval := utf8.RuneCountInString(td.Val) //经过处理的td内容长度
-	//if lentxt > 9 {
-	//td.KV = GetKVAll(txt, "")
 	ub := []*u.Block{}
 	//经过处理的td内容长度大于50,划块,分包
 	if lenval > 50 { //看是否划块
@@ -314,50 +318,6 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite stri
 				}
 			}
 		}
-		//
-		blockPackage := map[string]*u.BlockPackage{}
-		isFindPkg := true
-		/*if td.ColPos-1 >= 0 && excludeKey.MatchString(tr.TDs[td.ColPos-1].Text) {
-			isFindPkg = false
-		} else if len(tr.TDs) > 0 {
-			tdleft = tr.TDs[len(tr.TDs)-1]
-			if tdleft.BH && excludeKey.MatchString(tr.TDs[td.ColPos-1].Text) {
-				isFindPkg = false
-			}
-		}*/
-		if len(tr.TDs) > 0 {
-			tdleft := tr.TDs[len(tr.TDs)-1]
-			if tdleft.BH && excludeKey.MatchString(tdleft.Text) { //(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)
-				isFindPkg = false
-			}
-		}
-		if isFindPkg {
-			if len(ub) > 0 {
-				blockPackage = FindPackageFromBlocks(&ub, isSite, codeSite) //从块里面找分包
-			} else {
-				if !excludeKey2.MatchString(td.Val) {
-					blockPackage = FindPackageFromText("", td.Val, isSite, codeSite) //从正文里面找分包
-				}
-			}
-		}
-		if len(blockPackage) > 0 {
-			table.BPackage = true
-			for bp_k, bp_v := range blockPackage {
-				var bp *u.BlockPackage
-				if table.TableResult.PackageMap.Map[bp_k] == nil {
-					bp = bp_v
-				} else {
-					bp = table.TableResult.PackageMap.Map[bp_k].(*u.BlockPackage)
-					bp.Text += "\n" + bp_v.Text
-				}
-				if bp.TableKV == nil {
-					bp.TableKV = u.NewJobKv()
-				}
-				MergeKvTags(bp.TableKV.KvTags, bp_v.ColonKV.KvTags)
-				MergeKvTags(bp.TableKV.KvTags, bp_v.SpaceKV.KvTags)
-				table.TableResult.PackageMap.AddKey(bp_k, bp)
-			}
-		}
 	}
 	//经过处理的td内容长度小于50,冒号kv,td表头
 	if lenval < 50 {
@@ -384,19 +344,12 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite stri
 				td.SortKV.AddKey(k, v) //存放kv值
 			}
 		}
-		//u.Debug(td.SortKV.Keys, "-------2--------------------------------")
-		//		td.SortKV = FindKv(text, "") //GetKvFromtxt(text, "")
-		//resm := GetKVAll(text, "")
 		if !bsontable {
 			txt := repSpace.ReplaceAllString(td.Val, "")
 			btw, must, _, _, repl := CheckHeader(txt)
 			if lenval > 18 {
 				btw = false
 			}
-			if strings.Contains(td.Val, "个项目") || strings.Contains(td.Val, "奥图码") {
-				must = false
-				btw = false
-			}
 			td.Valtype = repl
 			td.MustBH = must
 			td.BH = btw
@@ -408,40 +361,76 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite stri
 			}
 		}
 	} else if len(ub) == 0 {
-		//之前这里没加判断,现在加上判断,造成分块之后的kv被覆盖掉
-		//u.Debug("----\n\n\n", txt, "\n\n\n----")
-		//u.Debug(GetKVAll(txt, ""))
-		/*
-			subVal := submatchreg.FindAllStringSubmatch(txt, -1)
-			if len(subVal) > 0 {
-				for _, subv1 := range subVal {
-					if len(subv1) == 6 {
-						tr.Table.SortKV.AddKey(If(subv1[2] == "", subv1[3], subv1[2]).(string), subv1[4])
-						//tr.Table.SortKV.AddKey(subv1[1], subv1[2])
-					}
-				}
-			}
-		*/
-
 		fSortKV := FindKv(td.Val, "", 2)
 		for _, v := range fSortKV.Keys {
 			td.SortKV.AddKey(v, fSortKV.Map[v])
 		}
-		//		td.LeftNode.Val
-		//		for _, vvv := range *td.TR {
-		//			u.Debug(">>>>>")
-		//		}
-		kvTitle := ""
-		if len(td.TR.TDs) > 0 {
-			kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
-		}
+
 		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 2, isSite, codeSite) //获取冒号kv入口
 		if yjReg.MatchString(kvTitle) {
 			td.SortKV = NewSortMap()
 			return
 		}
 		for k, v := range resm {
-			td.SortKV.AddKey(k, v)
+			if td.SortKV.Map[k] == nil {
+				td.SortKV.AddKey(k, v)
+			}
+		}
+	}
+	blockPackage := map[string]*u.BlockPackage{}
+	isFindPkg := true
+	if len(tr.TDs) > 0 {
+		tdleft := tr.TDs[len(tr.TDs)-1]
+		if tdleft.BH && excludeKey.MatchString(tdleft.Text) { //(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)
+			isFindPkg = false
+		}
+	}
+	if isFindPkg {
+		if len(ub) > 0 {
+			blockPackage = FindPackageFromBlocks(&ub, isSite, codeSite) //从块里面找分包
+		} else {
+			if !excludeKey2.MatchString(td.Val) &&td.Val!="" {
+				blockPackage = FindPackageFromText(kvTitle, td.Val, isSite, codeSite) //从正文里面找分包
+				if len(blockPackage) > 0 {
+					table.BPackage = true
+					for bp_k, bp_v := range blockPackage {
+						var bp *u.BlockPackage
+						if table.BlockPackage.Map[bp_k] == nil {
+							bp = bp_v
+						} else {
+							bp = table.BlockPackage.Map[bp_k].(*u.BlockPackage)
+							bp.Text += "\n" + bp_v.Text
+							if bp_v.IsTrueBidamount {
+								bp.IsTrueBidamount = bp_v.IsTrueBidamount
+								bp.Bidamount = bp_v.Bidamount
+							}
+							if bp_v.IsTrueBudget {
+								bp.IsTrueBudget = bp_v.IsTrueBudget
+								bp.Budget = bp_v.Budget
+							}
+							if bp_v.Winner == "" {
+								bp.Winner = bp_v.Winner
+							}
+							if bp_v.WinnerPerson == "" {
+								bp.WinnerPerson = bp_v.WinnerPerson
+							}
+							if bp_v.WinnerTel == "" {
+								bp.WinnerTel = bp_v.WinnerTel
+							}
+							if len(bp_v.WinnerOrder) > 0 {
+								bp.WinnerOrder = bp_v.WinnerOrder
+							}
+						}
+						if bp.TableKV == nil {
+							bp.TableKV = u.NewJobKv()
+						}
+						MergeKvTags(bp.TableKV.KvTags, bp_v.ColonKV.KvTags)
+						MergeKvTags(bp.TableKV.KvTags, bp_v.SpaceKV.KvTags)
+
+						table.BlockPackage.AddKey(bp_k, bp)
+					}
+				}
+			}
 		}
 	}
 }
@@ -728,76 +717,6 @@ func CheckCommon(txt string, matchStr ...string) (res, must bool, stype, reg, re
 	txt = filterThText.ReplaceAllString(txt, "")
 	stype = "con"
 	repl = "NOHEAD"
-	if u.NowTimeTest() {
-		json.Unmarshal([]byte(`{
-	"normalhead":[
-		"^((.{2,6}(名称|编号|代码|时间|类型|性质|行政区域|原因|项目|意见|须知|程度))|标段(编号)?|招标金额|规模|统一社会信用代码|拟?中标供应商|质量|(质量)?承诺|地址|招标代理|序号|材料|结构|结构层数|评委|单位|数量|排名|标的|标项|开户银行|邮编|账号|电话|传真|网址|得分|名次|包件?号|职务|(建设|招标|采购|中标|成交|甲|乙)(单位|人|供应商|方|规模).{0,2}|.{0,5}(价格?|额|资金|[预概]算|投资|费用|报价|投标价)(万?元?([大小]写)?))$__M",
-		"^.{0,7}(((单位)?名称|总监|经理|负责人|信息|率|费|期|人|号|码|(价格?|额|资金)(万?元?([大小]写)?)|员|品目|标包|代表|区域|方式|因素|合价|合计|小计|地点|条件|(资质|类别和)等级|类别|状态)|得分|注册专业|方法|家数|全称|简称|邮件|执业或职业资格|证书|部门|事项|来源|划分|长度|规模|保证金|目标)$__",
-		"(名单|证号|名称|要求|时间|日期|地点|单位|条款|机构|范围|情况|概况|品名|规格|参数|标准|指标|型号|限价|数量|方式|等级|依据|明细|概况|内容|次数|产品|性质|地区|地址|币种|主题|详情|说明|代理(公司|机构)|节支率|名单|结果|结果公示)$|^(职称|姓名|级别|职称专业|证书名称|证书编号)$__",
-		"^(联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
-		"(专家|评委|打分)$__",
-		"品牌",
-        "姓名",
-		"起讫桩号",
-		"服务期",
-		"限价",
-		"邮编",
-		"面积",
-		"组织形式",
-		"招标方式",
-		"修建宽度",
-        "类别",
-        "备注",
-		"合计",
-        "电话",
-        "评审",
-		"原因",
-		"行业",
-		"价格",
-		"注册资金"
-	],
-	"jghead":[
-		"^.{0,2}[预拟]?(成交|中标|候选)(供应商|单位|企业|人|机构|价|金额).{0,2}$__M",
-		"^.{0,6}[打得评总](分)$__",
-		"(中标|磋商|投标|报|成交)总?(价|金额)__",
-		"(投标|中标)(人|方|单位|供应商)(名称)?__",
-		"成交",
-		"名次",
-	    "候选",
-		"业绩",
-		"荣誉",
-	    "排序",
-	    "排名",
-		"中标",
-		"供应商"
-	],
-	"con":[
-		"^((子|合同|分|施工|监理)?(标段?|包|合同段|标包|序号)[a-zA-Z0-9\\-一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)__$1",
-		"([a-zA-Z0-9\\-一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+(子|合同|分|施工|监理)?(标段?|包|合同段|标包|号))$__$1",
-		"(^[a-zA-Z0-9\\-一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+$)__$1",
-		"(^.{2,18}(集团|事务所|研究院|事务所|研究所|设计院))__",
-		"(^.{5,}(公司))__",
-		".{2,20}元整|[\\d]+万?元__",
-		".{4,}采购(项目)?__",
-		"(首选|第[一二三四五1-5])(顺序|推荐)?(中标|候选|成交)?(候选)?(人|单位|供应商)__BO"
-	],
-	"abandontable":[
-		"(磋商|谈判|评标(委员会)?)?((小组)?成员|(评审)?专家)(名单)?$__",
-		"(业绩|资质|原因|相关资料)$__",
-		"([废流落]标|评审)(原因|情况)__",
-		"(中标|成交)(候选)*(人|供应商|单位)((类似)*业绩|资质)__",
-		"否决投标情况",
-		"落标供应商及落标原因",
-		"被废标供应商名称",
-		"主要人员",
-		"其他投标人"
-	],
-	"bidorder":[
-		".{0,8}排[序名]$__sort",
-		"(人|供应商|单位)(名称)?$__entname"
-	]
-}`), &u.TableK1)
-	}
 	if len([]rune(txt)) < 30 {
 		tLock.Lock()
 		defer tLock.Unlock()
@@ -853,6 +772,9 @@ func CheckCommon(txt string, matchStr ...string) (res, must bool, stype, reg, re
 						if "M" == repl {
 							must = true
 						}
+					} else if "M" == repl {
+						must = true
+						res = true
 					}
 					stype = v
 					break L1
@@ -902,7 +824,7 @@ func CheckHeader(txt string) (res, must bool, stype, reg, repl string) {
 con 文本
 strtype 1全文 2块文本
 **/
-var hisReg = regexp.MustCompile("类似业绩|历史业绩|开标记录")
+var hisReg = regexp.MustCompile("类似业绩|历史业绩|开标记录|填报项目业绩")
 
 func ComputeConRatio(con string, strtype int) (tabs []*goquery.Selection, ratio float32) {
 	defer qutil.Catch()

+ 8 - 7
src/jy/pretreated/winnerorder.go

@@ -1,7 +1,6 @@
 package pretreated
 
 import (
-	//"jy/clear"
 	"jy/util"
 	qutil "qfw/util"
 	"regexp"
@@ -27,18 +26,19 @@ var (
 	numberReg         = regexp.MustCompile("[一二三四五六七八九十0-9]+")
 	numberReg2        = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+")
 	thisNumberReg     = regexp.MustCompile("第" + numberReg.String())
+	winnerReg0        = regexp.MustCompile("(中标候选人第\\d名)")
 	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?((候|侯)选)?(入围|备选|成交|中(标|选))人?([((]成交[))])?((候|侯)选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
 	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|第[一二三四五六七八九十1-9]+(候|侯)选人)")
 	//winnerReg2     = regexp.MustCompile("(第[一二三四五六七八九十1-9]+(候|侯)选人)")
-	winnerReg3     = regexp.MustCompile("(第[一二三四五六七八九十1-9]+名)")
-	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排(名|序))[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中(标|选)候选人|\n中(标|选)候选.{1,3}\\s*\n|\n(中(标|选)候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排(名|序)|公(示|告)|具体|推荐|结果(公示)?|中(标|选)候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中(标|选)候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
+	winnerReg3     = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+名)")
+	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中(标|选)候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
 	winnerReg5     = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
-	winnerReg6     = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)")
+	winnerReg6     = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)")
 	winnerReg7     = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
 	colonEndReg    = regexp.MustCompile("[::]$")
 	toWarpReg      = regexp.MustCompile("[,。,;;]+")
 	findamountReg  = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+")
-	amountReg      = regexp.MustCompile("^\\d+(\\.\\d+)?((百|千)?元|(百|千)?(万|亿)元?)$")
+	amountReg      = regexp.MustCompile("^\\d+(\\.\\d+)?([百|千]?元|[百|千]?[万|亿]元?)$")
 	companyWarpReg = regexp.MustCompile("(公司)(.+?[::])")
 	findCompanyReg = regexp.MustCompile("[^::]+公司")
 	colonSpaceReg  = regexp.MustCompile("[::]\\s+")
@@ -78,7 +78,7 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 	} else if len(blocks) == 1 {
 		blocks = winnerReg7.Split(text, -1)
 	}
-	winners := wo.findByReg(text, blocks, winnerReg1, from, isSite, codeSite)
+	winners := wo.findByReg(text, blocks, winnerReg0, from, isSite, codeSite)
 	if len(winners) == 0 {
 		winners = wo.findByReg(text, blocks, winnerReg2, from, isSite, codeSite)
 	}
@@ -238,7 +238,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 					object = map[string]interface{}{}
 				}
 				val := wo.clear("中标单位", v)
-				if val != nil && utf8.RuneCountInString(qutil.ObjToString(val)) > 5{
+				if val != nil && utf8.RuneCountInString(qutil.ObjToString(val)) > 5 {
 					count++
 					object["entname"] = strings.TrimSpace(qutil.ObjToString(val))
 					object["sort"] = wo.toNumber(k, count)
@@ -293,6 +293,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 func (wo *WinnerOrderEntity) clear(typ, v string) interface{} {
 	if typ == "中标单位" && regDivision.MatchString(v) {
 		v = findCompanyReg.FindString(v)
+		v = filterWinner.FindString(v)
 	}
 	v = filterValue.ReplaceAllString(v, "")
 	//过滤

+ 1 - 0
src/jy/util/article.go

@@ -41,6 +41,7 @@ type Job struct {
 	SimCityScore      map[string]float64                //简称city得分
 	SimDistrictScore  map[string]float64                //简称district得分
 	Dataging          int
+	IsClearnMoney     string //站点清理金额
 }
 
 type ExtField struct {

+ 11 - 4
src/res/fieldscore.json

@@ -55,6 +55,13 @@
                 "space": 2,
                 "regexp": 2,
                 "kvweight": 1
+            },
+            "budget": {
+                "table": 3,
+                "colon": 2,
+                "space": 2,
+                "regexp": 2,
+                "kvweight": 1
             }
         }
     },
@@ -263,7 +270,7 @@
             },
 			{
                 "describe": "非结尾",
-                "regstr": ".*[^集团|公司|学校|中心|家具城|门诊|\\[大中小\\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行]$",
+                "regstr": ".*[^集团|公司|学校|中心|家具城|门诊|\\[大中小\\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行|处]$",
                 "score": -5
             }
         ],
@@ -799,11 +806,11 @@
         "type": "float",
         "describe": "min>val:1,min<=val<=max:3,max<val:1",
         "min": 100,
-        "max": 10000000000,
+        "max": 100000000000,
         "score": [
             -3,
             2,
-            -4
+            -3
         ]
     },
     "budget": {
@@ -814,7 +821,7 @@
         "score": [
             -3,
             2,
-            -4
+            -3
         ]
     },
     "supervisorrate": {

+ 347 - 1
src/res/moneyclear.json

@@ -11,7 +11,7 @@
   },
   "ha_hnszfcgw_htgg": {
     "descript": "金额除以10000",
-    "maxmoney": 10000000000,
+    "maxmoney": 1000000000,
     "divisor": 10000
   },
   "hn_hnszbtbjgw_zbhxrgs2": {
@@ -39,6 +39,11 @@
     "maxmoney": 10000000000,
     "divisor": 10000
   },
+  "a_zgzfcgw_bid_tender_new_10_12": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
   "a_zgzfcgw_zfcghtgg_new": {
     "descript": "金额除以10000",
     "maxmoney": 10000000000,
@@ -53,5 +58,346 @@
     "descript": "金额除以10000",
     "maxmoney": 10000000000,
     "divisor": 10000
+  },
+  "a_zgzfcgw_dfgg_zongb_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_dfgg_new_10_12": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_dfgg_new_1_3": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_one": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_one_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_dfgg_zongb": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_zygg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_zygg_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_2012": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_2013": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_2014": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_2015": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_2016": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_bid_tender_2017": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_dfgg_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zgzfcgw_dfgg_new_7_9": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_zfcg_cght": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_gcjs_jggs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_qt": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_zfcg_zbgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_jyxx": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_gcjs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_zfcg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_gjggzyjypt_zfcg_gzsx": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "zj_zjsggzyjyfwpt_zfcg_zhbcjgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "yn_ynsggzyjyxxw_gcjs_pbjggs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "zj_zjsggzyjyfwpt_gcxm_zhbjggg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gz_gzszbtbggfwpt_gggs_jsgc": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hi_zghnzfcg_cgxx_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gd_gzggzyjyw_jt_zbhxrgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hb_hbszfcgw_szdwzfcght_New": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "yn_ynsggzyjyxxw_zfcg_zbjg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "bj_bjszfcgw_qjxxgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_zhrmghgczb_dfbx_zhbgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "bj_bjszfcgw_sjxxgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "sd_zgsdzfcgw_sxzhbgg_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "sd_zgsdzfcgw_xxgk_sxhtgk": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hi_hnszbtbjgw": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_jdcpzbtbdzjypt_zhbjggg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "a_jdcpzbtbdzjypt_zbgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gd_gzggzyjyw_fjsz_zbhxrgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "yn_ynsggzyjyxxw_gcjs_zbjggg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hi_zghnzfcg_cgxx": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hb_hbszfcgw_cght_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "sd_sdsggzyjyxxw_zfcg_cght": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hb_hbszfcgw_sszhbgg_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "sd_zgsdzfcgw_sxzhbgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "jx_jxsggzyjyw_zgcg_jggs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "yn_ynsggzyjyxxw_gcjs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "cq_cqszbtbzhw_zbgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "ha_hnszfcgw_htgg_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "js_jszwfww": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "zj_wzsjsgczbw_zhbxx": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "cq_cqszbtbzhw_zbgs_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gx_gxzzzzqzfcg_sxj_xmjyhtgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hl_hljgczbw_zbgs_sg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gx_gxzzzzqzfcg_sxj_xmjyhtgg_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gx_gxzzzzqzfcg_sxj_zbgg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "sn_sxcgyzbw_zbgs_new": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "ha_hnszfcgw_sx_jggg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "yn_ynsggzyjyxxw_zfcg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "sn_sxcgyzbw_zbgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gz_gzszbtbw_zbgs_history": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "gz_gzszbtbw_zbgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "hl_hljgczbw_zbgg_sg": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "jx_jxsggzyjyw_zgcg_htgs": {
+    "descript": "金额除以10000",
+    "maxmoney": 10000000000,
+    "divisor": 10000
+  },
+  "zj_zjzfcg_zbcjgg": {
+    "descript": "大于10w失效",
+    "maxmoney": 100000,
+    "divisor": 10000000,
+    "invalid": "true"
   }
 }

+ 6 - 3
src/web/templates/admin/result_list.html

@@ -181,8 +181,8 @@ $(function () {
 				testtask=[
 						{label:"任务名称",s_label:"s_taskname",type:"tpl_list_local",must:true,url:"/admin/task/gettaskname"},
 						{label:"起始id",s_label:"s_startid",must:true},
-						{label:"抽取数量",s_label:"s_datanum",placeholder:"5",must:true},
-						{label:"结果版本",s_label:"s_resulttrack",must:true}
+						{label:"抽取数量",s_label:"s_datanum",placeholder:"1",must:true},
+						{label:"结果版本",s_label:"s_resulttrack",must:true,placeholder:"a"}
 				]
 				//测试启动按钮
 				testtaskbtn=[
@@ -193,6 +193,9 @@ $(function () {
 							var num = $("#s_datanum").val();
 							var taskid = $("#s_taskname").val();
 							var resulttrack = $("#s_resulttrack").val();
+							//taskid ="5eda01b0c566ca08409370bb"
+							num = 1
+							//resulttrack = "a"
 							var bcon = true;
 							if(id == "" || resulttrack == ""){
 								bcon = false;
@@ -208,7 +211,7 @@ $(function () {
 									return
 								}
 							}else{
-								num = "5";
+								num = "1";
 							}
 							if(bcon){
 								//抽取测试								

+ 2 - 2
udpfilterdup/src/config.json

@@ -5,8 +5,8 @@
         "addr": "192.168.3.207:27092",
         "pool": 5,
         "db": "extract_kf",
-        "extract": "zk_move",
-        "extract_back": "zk_move",
+        "extract": "zk_zk_test",
+        "extract_back": "zk_zk_test",
         "site": {
             "dbname": "extract_kf",
             "coll": "site"

+ 7 - 5
udpfilterdup/src/main.go

@@ -16,6 +16,7 @@ import (
 	"os"
 	"qfw/util"
 	"regexp"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -124,11 +125,7 @@ func main() {
 //测试组人员使用
 func mainT() {
 
-	//testRepairData11()
-	//return
-
 	if TimingTask {
-		log.Println("新历史任务测试开始")
 		go historyTaskDay()
 		time.Sleep(99999 * time.Hour)
 	} else {
@@ -742,7 +739,12 @@ func historyTaskDay() {
 //判断是否在当前id段落
 func judgeIsCurIds (gtid string,lteid string,curid string) bool {
 
-
+	gt_time, _ := strconv.ParseInt(gtid[:8], 16, 64)
+	lte_time, _ := strconv.ParseInt(lteid[:8], 16, 64)
+	cur_time, _ := strconv.ParseInt(curid[:8], 16, 64)
+	if cur_time>gt_time&&cur_time<=lte_time {
+		return true
+	}
 	return false
 }