Browse Source

字段评估 开发 , 抽取-字段来源

zhengkun 3 years ago
parent
commit
ed6da6a2c6

+ 14 - 14
data_monitoring/listen_data/src/main.go

@@ -87,18 +87,9 @@ func main()  {
 	//}
 	//save_mgo.InitPool()
 
-	save_mgo = &MongodbSim{
-		MongodbAddr: "172.17.4.85:27080",
-		DbName:      "qfw",
-		Size:        10,
-		UserName: "",
-		Password: "",
-	}
-	save_mgo.InitPool()
-
 	//save_mgo = &MongodbSim{
-	//	MongodbAddr: "172.17.4.87:27080",
-	//	DbName:      "datamark",
+	//	MongodbAddr: "172.17.4.85:27080",
+	//	DbName:      "qfw",
 	//	Size:        10,
 	//	UserName: "",
 	//	Password: "",
@@ -106,15 +97,24 @@ func main()  {
 	//save_mgo.InitPool()
 
 	//save_mgo = &MongodbSim{
-	//	MongodbAddr: "127.0.0.1:27017",
-	//	DbName:      "zhengkun",
+	//	MongodbAddr: "172.17.4.87:27080",
+	//	DbName:      "datamark",
 	//	Size:        10,
 	//	UserName: "",
 	//	Password: "",
 	//}
 	//save_mgo.InitPool()
 
-	repairDataBWA1019()
+	save_mgo = &MongodbSim{
+		MongodbAddr: "127.0.0.1:27017",
+		DbName:      "zhengkun",
+		Size:        10,
+		UserName: "",
+		Password: "",
+	}
+	save_mgo.InitPool()
+
+	decodeJyUrl()
 
 	return
 

+ 38 - 5
data_monitoring/listen_data/src/zkmethod.go

@@ -71,13 +71,13 @@ func decodeJyUrl()  {
 	//zk@123123   zhengkun
 
 	/*
-	a_bldzzbtbpt_zbxm_gnzb
-	ADjY1wEci4%2FOyw6EndxZ3IkCCQCIDFjcWhwKTggPyEnZ3xwDi9UCVE%3D
-	sh_shsggzyjyzx_zfcg
-	ASMY1wJYy44OygsAllxZ3IkCCQCIDFjcWhwPwUkPS4gVXtzfytUCUg
+	待修复
+	gd_gdsdzhcgzxpt_wsjj_cjgg
+
+
 	*/
 
-	test := "ASMY1wJYy44OygsAllxZ3IkCCQCIDFjcWhwPwUkPS4gVXtzfytUCUg%3D"
+	test := "AqQY1wEfzIoLD84JHdzZGUgCjM/DSZgXGNkPB4rICEgfGdzYidUCYE="
 	var Decode  = qu.CommonDecodeArticle("content", test)
 	log.Debug(Decode[0])
 	return
@@ -89,6 +89,39 @@ func encodeJyUrl()  {
 	log.Debug(Encode)
 }
 
+//记录字段-来源
+func dealWithFieldSourceData()  {
+	dict1 := make(map[string]interface{},0)
+	dict2 := make(map[string]interface{},0)
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total:=map[string]interface{}{},0
+	it := sess.DB(save_mgo.DbName).C("zk_field_score_data").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		source := *qu.ObjToMap(tmp["field_source"])
+		for _,v := range source {
+			if v==nil {
+				continue
+			}
+			source_data :=*qu.ObjToMap(v)
+			ext_type := qu.ObjToString(source_data["ext_type"])
+			ext_from := qu.ObjToString(source_data["ext_from"])
+
+			if dict1[ext_type]==nil {
+				dict1[ext_type] = 1
+			}
+
+			if dict2[ext_from]==nil {
+				dict2[ext_from] = 1
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("结束:",total,dict1,dict2)
+
+
+}
+
 func repairDataBWA1019()  {
 	sess := save_mgo.GetMgoConn()
 	defer save_mgo.DestoryMongoConn(sess)

+ 2 - 2
data_quality/src/config.json

@@ -1,8 +1,8 @@
 {
   "udpport": ":17007",
   "mongodb": {
-    "addrName": "172.17.4.85:27080",
-    "dbName": "qfw",
+    "addrName": "127.0.0.1:27017",
+    "dbName": "zhengkun",
     "collName": "zktest_fieldscore_data",
     "pool": 10
   },

+ 14 - 7
data_quality/src/main.go

@@ -94,13 +94,20 @@ func startFieldScoreTask(mapInfo map[string]interface{}) {
 		//更新集合
 		update_dict := make(map[string]interface{},0)
 
-		//subtype := qu.ObjToString(tmp["subtype"])
-		//b_score := buyerFieldScore(tmp)
-		//update_dict["buyer"] = b_score
-		//if subtype=="中标"||subtype=="成交"||subtype=="合同" {
-		//	w_score := winnerFieldScore(tmp)
-		//	update_dict["winner"] = w_score
-		//}
+		subtype := qu.ObjToString(tmp["subtype"])
+		//buyer_s := buyerFieldScore(tmp)
+		//update_dict["buyer"] = buyer_s
+		budget_s := budgetFieldScore(tmp)
+		update_dict["budget"] = budget_s
+
+		if subtype=="中标"||subtype=="成交"||subtype=="合同" {
+			//winner_s := winnerFieldScore(tmp)
+			//update_dict["winner"] = winner_s
+			bidamount_s := bidamountFieldScore(tmp)
+			update_dict["bidamount"] = bidamount_s
+
+
+		}
 
 
 		updateFieldScore = append(updateFieldScore, []map[string]interface{}{

+ 112 - 4
data_quality/src/method.go

@@ -3,24 +3,69 @@ package main
 import (
 	log "github.com/donnie4w/go-logger/logger"
 	"github.com/go-ego/gse"
+	qu "qfw/util"
 	"regexp"
 	"unicode"
+	"unicode/utf8"
 )
 
+
+//单位
 var specHeadReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|某部|州|自治区|自治州|街道|名称|省|市|县|区|业绩|资格|中标|项目|预算单位)")
 var unHanHeadReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
 var unConReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处|校)")
 var unEndReg *regexp.Regexp = regexp.MustCompile("^.*(公司|学(校)?|博物馆|联合社|合作社|监狱|办公厅|电视台|集团|机构|企业|办公室|委员会|实验室|联社|厂|场|院|所|店|小|台|中心|局|站|城|馆|厅|处|行|科|部|队|联合(会|体)|工作室)$")
 var unenableReg1 *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5]{1,2}(责任|有限|有限股份|有限责任|实业)公司|.*(某部|先生|女士|小姐)|工程技术处)$")
 var unenableReg2 *regexp.Regexp = regexp.MustCompile("(\\?|?|单位|#|xxxx|\\*\\*|%|万元|设计企业|免费|代表|代码标识|盖电子|测试测试|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|\" +\n\t\"法人|&nbsp|国家拨入|借款|积累资金|认股人|--|、|&|`|美元)")
-var GSE *gse.Segmenter  = &gse.Segmenter{}       //分词
+//分词
+var GSE *gse.Segmenter  = &gse.Segmenter{}
+
+
+//编号
+var codeUnConReg *regexp.Regexp = regexp.MustCompile("(null|勘察|测试|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|\\([0-9]{1}\\)|[,,。、::“”‘’_\"])")
+var codeUnLenReg *regexp.Regexp = regexp.MustCompile("[\\u4e00-\\u9fa5]{9,}")
+
+
+
+
+var classMoneyScope map[string]map[string]interface{}
+
 
 func init()  {
 	log.Debug("初始化,切词")
 	GSE.LoadDict("./dictionary.txt")
+
+	//t>d>p
+	classMoneyScope = map[string]map[string]interface{}{
+		"建筑工程": {"min":10000,"max":10000000000},
+		"行政办公": {"min":100,"max":100000000},
+		"医疗卫生": {"min":1000,"max":100000000},
+		"服务采购": {"min":10,"max":100000000},
+		"机械设备": {"min":1000,"max":1000000000},
+		"水利水电": {"min":1000,"max":1000000000},
+		"能源化工": {"min":1000,"max":1000000000},
+		"弱电安防": {"min":1000,"max":1000000000},
+		"信息技术": {"min":100,"max":100000000},
+		"交通工程": {"min":1000,"max":10000000000},
+		"市政设施": {"min":1000,"max":10000000000},
+		"农林牧渔": {"min":100,"max":10000000},
+	}
 }
 
-//1、企业库检测
+//行业金额校验
+func checkingClassMoney(money float64,class string) bool  {
+	data :=classMoneyScope[class]
+	if data!=nil {
+		min := qu.Float64All(data["min"])
+		max := qu.Float64All(data["max"])
+		if money>min && money<max {
+			return true
+		}
+	}
+	return false
+}
+
+//企业库检测
 func qyNameIsExistsQYXY(name string) bool{
 	q := map[string]interface{}{
 		"company_name": name,
@@ -33,7 +78,8 @@ func qyNameIsExistsQYXY(name string) bool{
 	return true
 }
 
-func isIncludingOtherHan(str string) bool {
+//包含非中文
+func isUnHan(str string) bool {
 	var count int
 	for _, v := range str {
 		if !unicode.Is(unicode.Han, v) {
@@ -43,8 +89,9 @@ func isIncludingOtherHan(str string) bool {
 	}
 	return count > 0
 }
+
 //是否含中文
-func isIncludingHan(str string) bool {
+func isHan(str string) bool {
 	var count int
 	for _, v := range str {
 		if unicode.Is(unicode.Han, v) {
@@ -53,4 +100,65 @@ func isIncludingHan(str string) bool {
 		}
 	}
 	return count > 0
+}
+
+//符号数量
+func isCharCount(str string) []int {
+	//中文,英文,数字,其他
+	c1,c2,c3,c4:=0,0,0,0
+	for _, v := range str {
+		if unicode.Is(unicode.Han, v) {
+			c1++
+		}else if unicode.IsLetter(v){
+			c2++
+		} else if unicode.IsNumber(v){
+			c3++
+		}else {
+			c4++
+		}
+	}
+	return []int{c1,c2,c3,c4}
+}
+
+//中文比例-1:3
+func isHanLenToLittle(str string) bool {
+	var count int
+	len := utf8.RuneCountInString(str)
+	min_count := len/3
+	for _, v := range str {
+		if unicode.Is(unicode.Han, v) {
+			count++
+			if count>=min_count {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+
+
+//是否含字母数字
+func isAlphanumeric(str string) bool {
+	var count int
+	for _, v := range str {
+		if unicode.IsNumber(v) || unicode.IsLetter(v) {
+			count++
+			break
+		}
+	}
+	return count > 0
+}
+
+//连续数字
+func isRegTimeDateCode(str string) bool {
+	reg:=`\d{8}`
+	regx,_ := regexp.Compile(reg)
+	if regx.FindString(str)!="" {
+		return false
+	}
+	if utf8.RuneCountInString(str)==8 {
+		return true
+	}
+	return false
 }

+ 58 - 0
data_quality/src/scorebidamount.go

@@ -0,0 +1,58 @@
+package main
+import (
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	qu "qfw/util"
+)
+
+func bidamountFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+
+	bidamount := qu.Float64All(tmp["bidamount"])
+	budget := qu.Float64All(tmp["budget"])
+	score := int64(100)
+	reason:=""
+
+	/*错误项*/
+	if (bidamount < 10.0 || bidamount > 20000000000.0 ) {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"大小错误",
+		}
+	}
+
+	//间隔比例-预算存在情况下
+	if budget > 0.0 {
+		proportion := bidamount/budget
+		if (proportion>=0.1&&proportion<=0.3)||(proportion>=3&&proportion<=5){
+			reason+="~间隔-2"
+			score -= 2
+		}
+		if proportion>=0.001&&proportion<0.1{
+			return map[string]interface{}{
+				"score":int64(0),
+				"reason":"比例错误",
+			}
+		}
+	}
+
+
+	//行业区间-减分项
+	class := make(map[string]interface{},0)
+	if topscopeclass, ok := tmp["topscopeclass"].(primitive.A); ok&&len(topscopeclass)>0 {
+		for _,v := range topscopeclass{
+			value := qu.ObjToString(v)
+			new_value := value[:len(value)-1]
+			if class[new_value]==nil {//校验行业
+				if !checkingClassMoney(bidamount,new_value) {
+					reason+="~"+new_value+"-1"
+					score-=1
+				}
+				class[new_value]=1
+			}
+		}
+	}
+
+	return map[string]interface{}{
+		"score":score,
+		"reason":reason,
+	}
+}

+ 65 - 0
data_quality/src/scorebudget.go

@@ -0,0 +1,65 @@
+package main
+
+import (
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	qu "qfw/util"
+)
+
+func budgetFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+
+	budget := qu.Float64All(tmp["budget"])
+	bidamount := qu.Float64All(tmp["bidamount"])
+	score := int64(100)
+	reason:=""
+
+	/*错误项*/
+	if (budget<10.0||budget>20000000000.0) && budget>0.0 {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"大小错误",
+		}
+	}
+	//间隔比例-预算存在情况下
+	if bidamount > 0.0 && budget >0.0 {
+		proportion := bidamount/budget
+		if (proportion>=0.1&&proportion<=0.3)||(proportion>=3&&proportion<=5){
+			reason+="~间隔-2"
+			score -= 2
+		}
+		if proportion>=0.001&&proportion<0.1{
+			return map[string]interface{}{
+				"score":int64(0),
+				"reason":"比例错误",
+			}
+		}
+	}
+
+	//空值减分
+	if budget==0.0 {
+		reason+="~空值-1"
+		score -= 1
+	}
+
+	//行业区间-减分项
+	class := make(map[string]interface{},0)
+	if topscopeclass, ok := tmp["topscopeclass"].(primitive.A); budget>0.0&&ok&&len(topscopeclass)>0 {
+		for _,v := range topscopeclass{
+			value := qu.ObjToString(v)
+			new_value := value[:len(value)-1]
+			if class[new_value]==nil {//校验行业
+				if !checkingClassMoney(budget,new_value) {
+					reason+="~"+new_value+"-1"
+					score-=1
+				}
+				class[new_value]=1
+			}
+		}
+	}
+
+
+
+	return map[string]interface{}{
+		"score":score,
+		"reason":reason,
+	}
+}

+ 3 - 3
data_quality/src/scorebuyer.go

@@ -14,7 +14,7 @@ func buyerFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
 	reason:=""
 	/*错误项*/
 	if (utf8.RuneCountInString(buyer) > 0 && utf8.RuneCountInString(buyer) < 4 ) ||
-		buyer=="" || !isIncludingHan(buyer) {
+		buyer=="" || !isHan(buyer) {
 		return map[string]interface{}{
 			"score":int64(0),
 			"reason":"错误项",
@@ -47,12 +47,12 @@ func buyerFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
 		score -= 2
 	}
 	//5、中英文结合
-	if isIncludingOtherHan(buyer){
+	if isUnHan(buyer){
 		reason+="~非纯中文-2"
 		score -= 2
 	}
 
-	//6、切词首部比对-影响性能
+	//6、切词首部比对-
 	buyer_jb_arr := GSE.Cut(buyer, true)
 	if len(buyer_jb_arr)>0 && buyer_jb_arr!=nil {
 		head_char := qu.ObjToString(buyer_jb_arr[0])

+ 66 - 0
data_quality/src/scoreprojectcode.go

@@ -0,0 +1,66 @@
+package main
+
+import (
+	qu "qfw/util"
+	"unicode/utf8"
+)
+
+func projectcodeFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+
+	projectcode := qu.ObjToString(tmp["projectcode"])
+	score := int64(100)
+	reason:=""
+
+	/*错误项*/
+	l := utf8.RuneCountInString(projectcode)
+	if l > 19 || l < 5 {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"长度-错误",
+		}
+	}
+
+	if isRegTimeDateCode(projectcode) {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"日期格式-错误",
+		}
+	}
+
+	if !isAlphanumeric(projectcode) {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"不含字母数字-错误",
+		}
+	}
+
+	/*减分项*/
+	//空值
+	if projectcode=="" {
+		reason+="~空值-1"
+		score -= 1
+	}
+
+	//符号数量
+	countArr := isCharCount(projectcode)
+	if countArr[3] > 2 {
+		reason+="~符号-1"
+		score -= 1
+	}
+	//包含关键词
+	if codeUnConReg.MatchString(projectcode) {
+		reason+="~黑名单-2"
+		score -= 2
+	}
+	//连续中文长度超过X个
+	if codeUnLenReg.MatchString(projectcode) {
+		reason+="~连续中文长度-2"
+		score -= 2
+	}
+
+
+	return map[string]interface{}{
+		"score":score,
+		"reason":reason,
+	}
+}

+ 51 - 0
data_quality/src/scoreprojectname.go

@@ -0,0 +1,51 @@
+package main
+
+import (
+	qu "qfw/util"
+	"unicode/utf8"
+)
+func projectnameFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
+	projectname := qu.ObjToString(tmp["projectname"])
+	score := int64(100)
+	reason:=""
+
+	/*错误项*/
+	if utf8.RuneCountInString(projectname) <= 6  || !isHan(projectname) {
+		return map[string]interface{}{
+			"score":int64(0),
+			"reason":"错误项",
+		}
+	}
+
+	/*减分项*/
+
+	//长度过长
+	if utf8.RuneCountInString(projectname) >= 25 {
+		reason+="~过长-1"
+		score -= 1
+	}
+
+
+
+	//标题切词与名称比对-待开发
+
+
+
+
+	//符号数量
+	countArr := isCharCount(projectname)
+	if countArr[3] > 3 {
+		reason+="~符号-2"
+		score -= 2
+	}
+	//中文-长度-比例
+	if !isHanLenToLittle(projectname) {
+		reason+="~中文比例-2"
+		score -= 2
+	}
+
+	return map[string]interface{}{
+		"score":score,
+		"reason":reason,
+	}
+}

+ 2 - 2
data_quality/src/scorewinner.go

@@ -22,7 +22,7 @@ func winnerFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
 	for _,winner:=range s_winner_arr{
 		/*错误项*/
 		if (utf8.RuneCountInString(winner) > 0 && utf8.RuneCountInString(winner) < 4 ) ||
-			winner=="" || !isIncludingHan(winner) {
+			winner=="" || !isHan(winner) {
 			return map[string]interface{}{
 				"score":int64(0),
 				"reason":"错误项",
@@ -56,7 +56,7 @@ func winnerFieldScore(tmp map[string]interface{}) (map[string]interface{}) {
 			score -= 2
 		}
 		//5、中英文结合
-		if isIncludingOtherHan(winner){
+		if isUnHan(winner){
 			reason+="~非纯中文-2"
 			score -= 2
 		}

+ 4 - 1
data_quality/src1/scoreMethod.go

@@ -70,4 +70,7 @@ func isChineseChar(str string) bool {
 		}
 	}
 	return false
-}
+}
+
+
+

+ 2 - 1
src/config.json

@@ -28,11 +28,12 @@
     "fieldsfind": false,
     "qualityaudit": false,
     "saveblock": false,
-    "filelength": 150000,
+    "filelength": 50000,
     "iscltlog": false,
     "brandgoods": false,
     "pricenumber":true,
     "udptaskid": "60b493c2e138234cb4adb640",
+    "nextNode": [],
     "udpport": "6601",
     "esconfig": {
         "available": false,

+ 65 - 2
src/jy/extract/extract.go

@@ -1925,6 +1925,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if len(*j.Jsondata) > 0 {
 			tmp["jsondata"] = j.Jsondata
 		}
+
+		//字段-抽取来源
+		fieldSource := make(map[string]interface{},0)
+		//字段-抽取来源
 		for k, val := range result {
 			if k == "qualifies" {
 				squalifies := make([]interface{}, 0)
@@ -1960,10 +1964,18 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				//存0是否有效
 				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Score > -1 {
 					tmp[v.Field] = v.Value
+					fieldSource[v.Field] = map[string]interface{}{
+						"ext_type":v.Type,
+						"ext_from":v.ExtFrom,
+					}
 					break
 				}
 				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
 					tmp[v.Field] = v.Value
+					fieldSource[v.Field] = map[string]interface{}{
+						"ext_type":v.Type,
+						"ext_from":v.ExtFrom,
+					}
 					break
 				}
 			}
@@ -2003,16 +2015,30 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					}
 				}
 				if qu.Float64All(tmp["budget"]) < tmpBudget {
+					fieldSource["budget"] = map[string]interface{}{
+						"ext_type":"",
+						"ext_from":"package",
+					}
 					tmp["budget"] = tmpBudget
 				}
 				if qu.Float64All(tmp["agencyfee"]) < tmpAgencyfee {
+					fieldSource["agencyfee"] = map[string]interface{}{
+						"ext_type":"",
+						"ext_from":"package",
+					}
 					tmp["agencyfee"] = tmpAgencyfee
 				}
-
-
 				if qu.Float64All(tmp["bidamount"]) > 0 && qu.Float64All(tmp["budget"]) > 0 && (qu.Float64All(tmp["bidamount"])/10 > qu.Float64All(tmp["budget"])) {
+					fieldSource["bidamount"] = map[string]interface{}{
+						"ext_type":"",
+						"ext_from":"package",
+					}
 					tmp["bidamount"] = tmpBidamount
 				} else if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
+					fieldSource["bidamount"] = map[string]interface{}{
+						"ext_type":"",
+						"ext_from":"package",
+					}
 					tmp["bidamount"] = tmpBidamount
 				}
 			} else {
@@ -2020,6 +2046,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				if tmp["budget"] == nil || tmp["budget"] == 0 {
 					for _, v := range j.PackageInfo {
 						if v["budget"] != nil {
+							fieldSource["budget"] = map[string]interface{}{
+								"ext_type":"",
+								"ext_from":"package",
+							}
 							tmp["budget"] = v["budget"]
 						}
 					}
@@ -2028,6 +2058,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				if tmp["agencyfee"] == nil || tmp["agencyfee"] == 0 {
 					for _, v := range j.PackageInfo {
 						if v["agencyfee"] != nil {
+							fieldSource["agencyfee"] = map[string]interface{}{
+								"ext_type":"",
+								"ext_from":"package",
+							}
 							tmp["agencyfee"] = v["agencyfee"]
 						}
 					}
@@ -2036,6 +2070,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				if tmp["bidamount"] == nil || tmp["bidamount"] == 0 {
 					for _, v := range j.PackageInfo {
 						if v["bidamount"] != nil {
+							fieldSource["bidamount"] = map[string]interface{}{
+								"ext_type":"",
+								"ext_from":"package",
+							}
 							tmp["bidamount"] = v["bidamount"]
 						}
 					}
@@ -2053,14 +2091,26 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 			if (savewinner == nil || len(savewinner) == 0) && tmp["winner"] != nil {
 				tmp["s_winner"] = tmp["winner"]
+				fieldSource["s_winner"] = fieldSource["winner"]
 			} else if savewinner != nil {
 				savewinner = RemoveReplicaSliceString(savewinner)
 				tmp["s_winner"] = strings.Join(savewinner, ",")
+
+				if len(savewinner)==1 {
+					fieldSource["s_winner"] = fieldSource["winner"]
+				}else if len(savewinner)>1{
+					fieldSource["s_winner"] = map[string]interface{}{
+						"ext_type":"",
+						"ext_from":"package",
+					}
+				}
 			}
 		} else if tmp["winner"] != nil && tmp["winner"] != "" {
 			//没有分包取winner
 			tmp["s_winner"] = tmp["winner"]
+			fieldSource["s_winner"] = fieldSource["winner"]
 		}
+
 		if len(j.Winnerorder) > 0 { //候选人信息
 			for i, v := range j.Winnerorder {
 				if v["price"] != nil {
@@ -2086,10 +2136,18 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 						if tmp[v.Field] == nil {
 							if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Value.(float64) > 100 && v.Value.(float64) < 50000000000 {
 								tmp[v.Field] = v.Value
+								fieldSource[v.Field] = map[string]interface{}{
+									"ext_type":v.Type,
+									"ext_from":"ff",
+								}
 								break
 							}
 							if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
 								tmp[v.Field] = v.Value
+								fieldSource[v.Field] = map[string]interface{}{
+									"ext_type":v.Type,
+									"ext_from":"ff",
+								}
 								break
 							}
 						}
@@ -2104,6 +2162,11 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				ffield["winnerorder"] = jf.Winnerorder
 			}
 		}
+
+		//添加字段来源
+		tmp["field_source"] = fieldSource
+		//添加字段来源
+
 		for k, v := range *doc {
 			if utf8.RuneCountInString(qu.ObjToString(v)) > 100000 {
 				(*doc)[k] = []rune(qu.ObjToString(v))[:100000]

+ 36 - 10
src/jy/extract/extractudp.go

@@ -17,9 +17,11 @@ import (
 )
 
 var Udpclient mu.UdpClient //udp对象
+var nextNodes []map[string]interface{}
 
 //udp通知抽取
 func ExtractUdp() {
+	nextNodes = qu.ObjArrToMapArr(ju.Config["nextNode"].([]interface{}))
 	Udpclient = mu.UdpClient{Local: ":" + qu.ObjToString(ju.Config["udpport"]), BufSize: 1024}
 	Udpclient.Listen(processUdpMsg)
 }
@@ -60,9 +62,33 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					if udpinfo == "" {
 						udpinfo = "udpok"
 					}
+
+					//新版本控制抽取
 					ExtractByUdp(sid, eid, ra)
 					log.Debug("抽取完成udp通知抽取id段",udpinfo, sid, "~", eid)
 					Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
+
+
+
+					//适配重采抽取-发送udp
+					//go Udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
+					//log.Debug("udp通知抽取id段", sid, " ", eid)
+					//ExtractByUdp(sid, eid, ra)
+					//for _, m := range nextNodes {
+					//	by, _ := json.Marshal(map[string]interface{}{
+					//		"gtid":  sid,
+					//		"lteid": eid,
+					//		"stype": qu.ObjToString(m["stype"]),
+					//	})
+					//	err := Udpclient.WriteUdp(by, mu.OP_TYPE_DATA, &net.UDPAddr{
+					//		IP:   net.ParseIP(m["addr"].(string)),
+					//		Port: qu.IntAll(m["port"]),
+					//	})
+					//	if err != nil {
+					//		log.Debug(err)
+					//	}
+					//}
+					//log.Debug("udp通知抽取完成,eid=", eid)
 				}
 			}
 		}
@@ -149,11 +175,11 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}
-				if qu.ObjToString(v["subtype"])!="中标" &&
-					qu.ObjToString(v["subtype"])!="成交" &&
-					qu.ObjToString(v["subtype"])!="合同" {
-					continue
-				}
+				//if qu.ObjToString(v["subtype"])!="中标" &&
+				//	qu.ObjToString(v["subtype"])!="成交" &&
+				//	qu.ObjToString(v["subtype"])!="合同" {
+				//	continue
+				//}
 
 				var j, jf *ju.Job
 				var isSite bool
@@ -177,11 +203,11 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}
-				if qu.ObjToString(v["subtype"])!="中标" &&
-					qu.ObjToString(v["subtype"])!="成交" &&
-					qu.ObjToString(v["subtype"])!="合同" {
-					continue
-				}
+				//if qu.ObjToString(v["subtype"])!="中标" &&
+				//	qu.ObjToString(v["subtype"])!="成交" &&
+				//	qu.ObjToString(v["subtype"])!="合同" {
+				//	continue
+				//}
 
 
 				var j, jf *ju.Job

+ 2 - 2
udps/main.go

@@ -22,8 +22,8 @@ func main() {
 	flag.IntVar(&p, "p", 6601, "端口")
 	flag.IntVar(&tmptime, "tmptime", 0, "时间查询")
 	flag.StringVar(&tmpkey, "tmpkey", "", "时间字段")
-	flag.StringVar(&id1, "gtid", "1152c2c71a75b8f446966fd5", "gtid")
-	flag.StringVar(&id2, "lteid", "9152c3f21a75b8f446967574", "lteid")
+	flag.StringVar(&id1, "gtid", "614168ea1a75b8f44678a39b", "gtid")
+	flag.StringVar(&id2, "lteid", "6142e5741a75b8f4467b3276", "lteid")
 	flag.StringVar(&ids, "ids", "", "id1,id2")
 	flag.StringVar(&stype, "stype", "biddingall", "stype,传递类型")
 	flag.StringVar(&bkey, "bkey", "", "bkey,加上此参数表示不生关键词和摘要")