Kaynağa Gözat

抽取-更新-

zhengkun 3 yıl önce
ebeveyn
işleme
edac914078

+ 10 - 0
data_monitoring/listen_data/src/main.go

@@ -96,6 +96,15 @@ func main()  {
 	//}
 	//save_mgo.InitPool()
 
+	//save_mgo = &MongodbSim{
+	//	MongodbAddr: "172.17.4.87:27080",
+	//	DbName:      "datamark",
+	//	Size:        10,
+	//	UserName: "",
+	//	Password: "",
+	//}
+	//save_mgo.InitPool()
+
 	save_mgo = &MongodbSim{
 		MongodbAddr: "127.0.0.1:27017",
 		DbName:      "zhengkun",
@@ -105,6 +114,7 @@ func main()  {
 	}
 	save_mgo.InitPool()
 
+	exportModelScoreData()
 
 	return
 

+ 356 - 48
data_monitoring/listen_data/src/zkmethod.go

@@ -69,7 +69,15 @@ func addChromPluginTaskData()  {
 //解密
 func decodeJyUrl()  {
 	//zk@123123   zhengkun
-	test := "ABCY1wEci4%2FIDk6RHtxZ3IkCCQCIDFjcWhwKTgvLT03f3twDidUCbE%3D"
+
+	/*
+	a_bldzzbtbpt_zbxm_gnzb
+	ADjY1wEci4%2FOyw6EndxZ3IkCCQCIDFjcWhwKTggPyEnZ3xwDi9UCVE%3D
+	sh_shsggzyjyzx_zfcg
+	ASMY1wJYy44OygsAllxZ3IkCCQCIDFjcWhwPwUkPS4gVXtzfytUCUg
+	*/
+
+	test := "ASMY1wJYy44OygsAllxZ3IkCCQCIDFjcWhwPwUkPS4gVXtzfytUCUg%3D"
 	var Decode  = qu.CommonDecodeArticle("content", test)
 	log.Debug(Decode[0])
 	return
@@ -81,6 +89,289 @@ func encodeJyUrl()  {
 	log.Debug(Encode)
 }
 
+
+//导出sh_shsggzyjyzx_zfcg 相关异常数据-准备修复
+func dealWithSpiderDataBWA() {
+	//任意俩字段一致的数据-重新抽取
+}
+
+
+//导出训练模型-分数,数据
+func exportModelScoreData() {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total,isok:=map[string]interface{}{},0,0
+
+	it := sess.DB(save_mgo.DbName).C("zktest_model_data").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		//模型:行政级别-项目名称-物品-数量-总金额
+		if total%15 == 0 {
+			if purchasinglist, ok := tmp["purchasinglist"].(primitive.A); ok {
+				if len(purchasinglist) > 0 { //验证标的物
+					items,numbers,moneys,istrue:="",int64(0),int64(0),true
+					lists := qu.ObjArrToMapArr(purchasinglist)
+					for k,v:=range lists {
+						item:=qu.ObjToString(v["itemname"])
+						number:=qu.Int64All(v["number"])
+						money:=qu.Int64All(v["totalprice"])
+						if item==""||number==0 {
+							istrue = false
+							break
+						}
+						if k==0 {
+							items = item
+							numbers = number
+							moneys = money
+						}else {
+							items = items+" "+item
+							numbers = numbers+number
+							moneys =moneys + money
+						}
+					}
+					if istrue {
+						if qu.Int64All(tmp["bidamount"])!=0 {
+							moneys = qu.Int64All(tmp["bidamount"])
+						}
+
+						if moneys>0 {
+							isok++
+							save_mgo.Save("zk_zk_zk_model", map[string]interface{}{
+								"buyer":tmp["buyer"],
+								"name":tmp["projectname"],
+								"items":items,
+								"numbers":numbers,
+								"money":moneys,
+								"href":tmp["href"],
+								"_id":tmp["_id"],
+							})
+						}
+					}
+				} else {
+
+					if qu.Int64All(tmp["bidamount"])!=0 {
+						isok++
+						save_mgo.Save("zk_zk_zk_model", map[string]interface{}{
+							"buyer":tmp["buyer"],
+							"name":tmp["projectname"],
+							"items":"",
+							"numbers":1,
+							"money":qu.Int64All(tmp["bidamount"]),
+							"href":tmp["href"],
+							"_id":tmp["_id"],
+						})
+					}
+				}
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over "," 总计:",total,isok)
+}
+
+//刷站点类型数据
+func updateSiteTypeData()  {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total:=map[string]interface{}{},0
+	isok:=0
+	it := sess.DB(save_mgo.DbName).C("zktest_score_data").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		if total%1000==0 {
+			log.Debug("current index ",total)
+		}
+		data := save_mgo.FindOne("spider_site", map[string]interface{}{
+			"site":qu.ObjToString(tmp["site"]),
+		})
+		if data!=nil {
+			isok++
+			save_mgo.UpdateById("zktest_score_data",BsonTOStringId(tmp["_id"]), map[string]interface{}{
+				"$set": map[string]interface{}{
+					"site_type":qu.ObjToString(data["site_type"]),
+				},
+			})
+		}
+
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over "," 总计:",total,isok)
+}
+
+//分析打分
+func analysisScoreData()  {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	total,iserr,isok:=0,0,0
+	err_data := map[string]int{
+		"ck_buyer":0,
+		"ck_s_winner":0,
+		"ck_budget":0,
+		"ck_bidamount":0,
+		"ck_projectcode":0,
+	}
+	q := map[string]interface{}{
+		"core_score": map[string]interface{}{
+			"$gte":60 ,
+		},
+		"error_score":map[string]interface{}{
+			"$lte":0 ,
+		},
+		"abnormal_score":map[string]interface{}{
+			"$lte":0 ,
+		},
+	}
+	ck_arr := []string{"ck_buyer","ck_s_winner","ck_budget","ck_bidamount","ck_projectcode"}
+	site_type_arr := []string{
+		"公共资源",
+		"其他",
+		"政府采购",
+		"社会采购类",
+		"企业门户/平台",
+		"银行",
+		"代理机构",
+		"政府门户",
+		"",
+		"军队",
+		"学校",
+		"人民政府网",
+		"医疗机构",
+		"工程建设类",
+		"证券",
+		"保险",
+	}
+	isType := true
+	if isType {
+		for _,site_type:=range site_type_arr {
+			q["site_type"]=site_type
+			total,iserr=0,0
+			it := sess.DB(save_mgo.DbName).C("zktest_score_data").Find(&q).Iter()
+			for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+				tmpid:=BsonTOStringId(tmp["_id"])
+				isck:=false
+				data :=save_mgo.FindOne("zktest_markwork_data", map[string]interface{}{
+					"id":tmpid,
+				})
+				if data!=nil && len(data)>2 {
+					for _,v := range ck_arr{
+						ck := qu.Int64All(data[v])
+						if ck>1 && v!="ck_budget" {
+							isck = true
+						}
+					}
+				}
+				if isck {
+					iserr++
+				}
+				tmp = make(map[string]interface{})
+			}
+			if total>0 {
+				fmt.Println("is over ",site_type," 总计:",total,iserr)
+			}
+		}
+	}else {
+		it := sess.DB(save_mgo.DbName).C("zktest_score_data").Find(&q).Iter()
+		for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+			tmpid:=BsonTOStringId(tmp["_id"])
+			isck := false
+			data :=save_mgo.FindOne("zktest_markwork_data", map[string]interface{}{
+				"id":tmpid,
+			})
+			if data!=nil && len(data)>2 {
+				for _,v := range ck_arr{
+					ck := qu.Int64All(data[v])
+					if ck>1 {
+						isck = true
+						err_data[v] = err_data[v]+int(1)
+					}
+				}
+			}
+			if isck {
+				iserr++
+			}else {
+				isok++
+			}
+			tmp = make(map[string]interface{})
+		}
+		log.Debug("is over "," 总计:",total,isok,iserr,err_data)
+	}
+
+
+}
+
+
+
+
+//导出人工标注原始抽取数据
+func exportMarkWorkData()  {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total:=map[string]interface{}{},0
+	it := sess.DB(save_mgo.DbName).C("zktest_markwork_id").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		tmpid:=qu.ObjToString(tmp["id"])
+		data:=save_mgo.FindById("result_20210108",tmpid)
+		if data !=nil && len(data)>2 {
+			save_mgo.Save("zktest_markwork_data",data)
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over "," 总计:",total)
+}
+
+//合并人工标注数据
+func mergeMarkWorkData()  {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	var Url = "https://www.jianyu360.com/article/content/%s.html"
+
+	arr := []string{
+		"20210924Shdx_mf",
+		"markwork_wb",
+		"markwork_wb_2_shdx",
+		"20210924Shdx_nb",
+		"20210928Shdx_nb_1",
+		"markwork_wb_mf",
+		"markwork_jy_lx",
+		"markwork_wb_mf_bz",
+	}
+	for _,coll:=range arr{
+		q,total:=map[string]interface{}{},0
+		it := sess.DB(save_mgo.DbName).C(coll).Find(&q).Iter()
+		for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+			tmpid:=BsonTOStringId(tmp["_id"])
+			jyhref  := fmt.Sprintf(Url, qu.CommonEncodeArticle("content", tmpid))
+			dict := tmp
+			delete(dict,"_id")
+			dict["jyhref"] = jyhref
+			save_mgo.Save("zktest_mark_data",dict)
+			tmp = make(map[string]interface{})
+		}
+		log.Debug("is over ",coll," 总计:",total)
+	}
+}
+
+//导出九江银行数据
+func exportJJYHwinerdata() {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total,isok:=map[string]interface{}{},0,0
+	it := sess.DB(save_mgo.DbName).C("zktest_jjyhwinner_data").Find(&q).Iter()
+	var Url = "https://www.jianyu360.com/article/content/%s.html"
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		tmpid:=BsonTOStringId(tmp["_id"])
+		jyhref  := fmt.Sprintf(Url, qu.CommonEncodeArticle("content", tmpid))
+		save_mgo.UpdateById("zktest_jjyhwinner_data",tmpid, map[string]interface{}{
+			"$set": map[string]interface{}{
+				"jyhref":jyhref,
+			},
+		})
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("is over ",total,isok)
+}
+
+
+
+
 //整合buyer敏感词库
 func finishingEnableBuyerData()  {
 	sess := save_mgo.GetMgoConn()
@@ -380,7 +671,7 @@ func exportFromUrlToDataArr()  {
 		tmpid := Decode[0]
 		data := save_mgo.FindById("bidding",tmpid)
 		if data!=nil && len(data)>2 {
-			save_mgo.Save("zkzkzkzkz_ddddddddd",data)
+			save_mgo.Save("zk_jjyh_extract_data",data)
 		}
 	}
 }
@@ -547,53 +838,7 @@ func repairEsCityData()  {
 
 	log.Debug("结束:",total,isok)
 }
-//抽取试验方式
-func extract_test()  {
-
-	//reg := regexp.MustCompile(`^.{2}([大|小|中|学][学|院]|公司)$`)
-	//bol:= reg.MatchString("一名司公")
-	//log.Debug(bol)
-	//
-	//
-	//return
 
-	//text := `采购公告期:2020年04月13日
-	//		品目分类: 货物
-	//		采购项目包组: A
-	//		供应商组织机构代码: 9113012955331793XL
-	//		供应商名称: 河北金谷粮食机械有限公司
-	//		供应商地址: 赞皇县赞皇镇东白草坪村
-	//		主要标的名称: 高邑县金达粮油购销有限责任公司使用构建粮食延伸收购服务网点工程
-	//		金额(元): 685448.43元
-	//		优惠率: 无
-	//		服务要求: 详见文件`
-	//var MultiReg = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段|分标))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|分|合同|施工|监理|标包|标|包)(标|包段|项|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))[::]?|(子项目[0-9]+)|采购项目(包组)[::\\s]+?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)")
-	//b:=MultiReg.MatchString(text)
-	//log.Debug(b)
-	//res := MultiReg.FindAllStringSubmatch(text, -1)
-	//log.Debug(res)
-	var winnerReg13     = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9])([\\s]+)?(成交候选人|成交供应商|中标候选人)[\\s]+(.*)[\\s]+([0-9\\.\\s万元]+)")
-	text := `第一中标候选人        宁夏金宸达建筑工程有限公司        13166946.44元                108天        合格        曹赢        宁2642015201604192        94.15
-	            第二中标候选人        宁夏第二建筑有限公司        13399999.21元                108天        合格        王海伦        宁2642007200900126        93.87
-	            第三中标候选人        宁夏第一建筑有限公司        13506333.26元                108天        合格`
-
-	b:=winnerReg13.MatchString(text)
-	log.Debug(b)
-	text = winnerReg13.ReplaceAllString(text,"\n${1}${3}:${4}\n中标金额:${5}\n")
-	log.Debug(text)
-
-
-	//str := `
-	//	中标信息:
-	//	一标段:中标内容:大型干湿两用扫地车2台
-	//	中标供应商:烟台海德专用汽车有限公司
-	//	中标金额:壹佰叁拾肆万圆整
-	//	:1340000元`
-	//var mmm = regexp.MustCompile("(第[0-9]包)|([(]?第[0-9]包[)]?)")
-	//res := mmm.FindAllStringSubmatch("工程监理项目(第4包)第三包", -1)
-	//log.Debug(res)
-	//pretreated.CheckMultiPackage(str, "中标信息") //找pkg分包包名
-}
 //导出日期相关数据
 func exportBidOpenEndtimeData() {
 	sess := save_mgo.GetMgoConn()
@@ -1486,6 +1731,69 @@ func escape(s string) string {
 	return news
 }
 
+//抽取试验方式
+func extract_test()  {
+
+	//reg := regexp.MustCompile(`^.{2}([大|小|中|学][学|院]|公司)$`)
+	//bol:= reg.MatchString("一名司公")
+	//log.Debug(bol)
+	//
+	//
+	//return
+
+	//text := `采购公告期:2020年04月13日
+	//		品目分类: 货物
+	//		采购项目包组: A
+	//		供应商组织机构代码: 9113012955331793XL
+	//		供应商名称: 河北金谷粮食机械有限公司
+	//		供应商地址: 赞皇县赞皇镇东白草坪村
+	//		主要标的名称: 高邑县金达粮油购销有限责任公司使用构建粮食延伸收购服务网点工程
+	//		金额(元): 685448.43元
+	//		优惠率: 无
+	//		服务要求: 详见文件`
+	//var MultiReg = regexp.MustCompile("(([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-])+(包|标段|分标))[::]?|(?:^|\\n)([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+(包))|([第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)#?((子|合同|分|施工|监理)?(标段?|合同段|标包)))|(((子|分|合同|施工|监理|标包|标|包)(标|包段|项|组)?)[     ]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+))[::]?|(子项目[0-9]+)|采购项目(包组)[::\\s]+?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)")
+	//b:=MultiReg.MatchString(text)
+	//log.Debug(b)
+	//res := MultiReg.FindAllStringSubmatch(text, -1)
+	//log.Debug(res)
+	var winnerReg20     = regexp.MustCompile("(中标单位候选人名称)[\\s]+(.*)[\\s]+(中标候选人单位名次)[\\s]+([弟|第][一二三四五六七八九十0-9]中标人)")
+	text := `1 中标单位候选人名称
+	   四川兴荣祺商贸有限公司
+       中标候选人单位名次
+	   第一中标人
+	   序号
+	   2 中标单位候选人名称
+	   乐山万美商贸有限公司
+	   中标候选人单位名次
+	   第二中标人
+	   序号
+	   3 中标单位候选人名称
+	   四川睿鑫宇商贸有限公司
+	   中标候选人单位名次
+	   第三中标人`
+
+	b:=winnerReg20.MatchString(text)
+	log.Debug(b)
+	text = winnerReg20.ReplaceAllString(text,"\n${4}:${2}\n")
+	log.Debug(text)
+
+	var winnerReg0        = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选人|中标人[1-9]|第[一二三四五0-9]中标人)")
+	tttt := `第一中标人:四川兴荣祺商贸有限公司`
+	array := winnerReg0.FindAllStringSubmatchIndex(`第一中标人:四川兴荣祺商贸有限公司`, -1)
+	dddd := array[0]
+	log.Debug(tttt[(dddd[1]):])
+	//str := `
+	//	中标信息:
+	//	一标段:中标内容:大型干湿两用扫地车2台
+	//	中标供应商:烟台海德专用汽车有限公司
+	//	中标金额:壹佰叁拾肆万圆整
+	//	:1340000元`
+	//var mmm = regexp.MustCompile("(第[0-9]包)|([(]?第[0-9]包[)]?)")
+	//res := mmm.FindAllStringSubmatch("工程监理项目(第4包)第三包", -1)
+	//log.Debug(res)
+	//pretreated.CheckMultiPackage(str, "中标信息") //找pkg分包包名
+}
+
 //临时测试方法
 func testMethod()  {
 	qu.Catch()

+ 0 - 67
data_quality/src/config.json

@@ -1,67 +0,0 @@
-{
-  "udpport": ":17007",
-  "mongodb": {
-    "addrName": "172.17.4.85:27080",
-    "dbName": "qfw",
-    "collName": "result_20200917",
-    "pool": 10,
-    "site": {
-      "site_dbname": "qfw",
-      "site_coll": "site"
-    }
-  },
-  "qy_mongodb": {
-    "qy_addrName": "172.17.4.187:27081",
-    "qy_dbName": "mixdata",
-    "qy_collName": "qyxy_std",
-    "pool": 10
-  },
-  "score_standard": {
-    "total_score": 100,
-    "core_max": 60,
-    "core_each": 10,
-    "other_max": 40,
-    "other_each": 5,
-    "deduct_each": 5
-  },
-  "core_element":[
-    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
-    {"budget": {"type": "float", "min": 0,"max":50000000000}},
-    {"projectcode": {"type": "string", "min": 4,"max": 20}},
-    {"projectname": {"type": "string", "min": 4,"max": 40}},
-    {"buyer": {"type": "string", "min":4,"max": 15}},
-    {"winner": {"type": "string", "min": 4,"max": 15}}
-  ],
-  "other_element": [
-    {"agency": {"type": "string","min": 3,"max": 15}},
-    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
-    {"winneraddr": {"type": "string","min": 3,"max": 40}},
-    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
-    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
-    {"projectaddr": {"type": "string","min": 2,"max": 40}},
-    {"agencyperson": {"type": "string","min": 0,"max": 10}},
-    {"buyerperson": {"type": "string","min": 0,"max": 10}},
-    {"winnerperson": {"type": "string","min": 0,"max": 10}},
-    {"winnertel": {"type": "string","min": 4,"max": 20}},
-    {"agencytel": {"type": "string","min": 4,"max": 20}},
-    {"buyertel": {"type": "string","min": 4,"max": 20}},
-    {"bidopentime": {"type": "int","min": 946656000}},
-    {"signaturedate": {"type": "int","min": 946656000}}
-  ],
-  "deduct_element": [
-    "area",
-    "city",
-    "site",
-    "toptype",
-    "subtype",
-    "title"
-  ],
-  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
-  "jkmail": {
-    "to": "zhengkun@topnet.net.cn",
-    "api": "http://10.171.112.160:19281/_send/_mail"
-  },
-  "nextNode": [
-
-  ]
-}

+ 0 - 244
data_quality/src/main.go

@@ -1,244 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"log"
-	mu "mfw/util"
-	"net"
-	"os"
-	"qfw/common/src/qfw/util"
-	qu "qfw/util"
-	"time"
-)
-
-
-var (
-	sysconfig    map[string]interface{} //配置文件
-	mgo          *MongodbSim            //mongodb操作对象
-	qy_mgo		 *MongodbSim
-	udpclient    mu.UdpClient             //udp对象
-	nextNode     []map[string]interface{} //下节点数组
-	siteMap	 	 map[string]map[string]interface{} //站点map
-	coll_name,qy_coll_name	string
-	core_element,other_element	[]map[string]interface{}	//要素
-	deduct_element 	[]string
-	total_score,core_max,core_each,other_max,other_each ,deduct_each int
-	specialaddr   string
-)
-
-func initSite()  {
-	//站点配置
-	mconf := sysconfig["mongodb"].(map[string]interface{})
-	site := mconf["site"].(map[string]interface{})
-	siteMap = make(map[string]map[string]interface{}, 0)
-	start := int(time.Now().Unix())
-	sess_site := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess_site)
-	res_site := sess_site.DB(site["site_dbname"].(string)).C(site["site_coll"].(string)).Find(map[string]interface{}{}).Sort("_id").Iter()
-	for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
-		data_map := map[string]interface{}{
-			"area":     util.ObjToString(site_dict["area"]),
-			"city":     util.ObjToString(site_dict["city"]),
-		}
-		siteMap[util.ObjToString(site_dict["site"])] = data_map
-	}
-	log.Printf("new站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(siteMap))
-}
-
-func initMgo()  {
-	mconf := sysconfig["mongodb"].(map[string]interface{})
-	log.Println(mconf)
-	mgo = &MongodbSim{
-		MongodbAddr: mconf["addrName"].(string),
-		DbName:      mconf["dbName"].(string),
-		Size:        qu.IntAllDef(mconf["pool"], 10),
-	}
-	mgo.InitPool()
-
-
-	qy_mconf := sysconfig["qy_mongodb"].(map[string]interface{})
-	qy_mgo = &MongodbSim{
-		MongodbAddr: qy_mconf["qy_addrName"].(string),
-		DbName:      qy_mconf["qy_dbName"].(string),
-		Size:        qu.IntAllDef(qy_mconf["pool"], 10),
-	}
-	qy_mgo.InitPool()
-
-	coll_name = mconf["collName"].(string)
-	qy_coll_name = qy_mconf["qy_collName"].(string)
-
-	core_element = qu.ObjArrToMapArr(sysconfig["core_element"].([]interface{}))
-	other_element = qu.ObjArrToMapArr(sysconfig["other_element"].([]interface{}))
-	deduct_element  =qu.ObjArrToStringArr(sysconfig["deduct_element"].([]interface{}))
-
-	score_standard := sysconfig["score_standard"].(map[string]interface{})
-	total_score = qu.IntAll(score_standard["total_score"])
-	core_max = qu.IntAll(score_standard["core_max"])
-	core_each = qu.IntAll(score_standard["core_each"])
-	other_max = qu.IntAll(score_standard["other_max"])
-	other_each = qu.IntAll(score_standard["other_each"])
-	deduct_each = qu.IntAll(score_standard["deduct_each"])
-
-	specialaddr = sysconfig["specialaddr"].(string)
-}
-
-
-func init() {
-	//加载配置文件
-	qu.ReadConfig(&sysconfig)
-	initMgo()
-	initSite()//加载站点
-	log.Println("采用udp模式")
-	//utf8.RuneCountInString()
-}
-
-
-func mainT() {
-	go checkMapJob()
-	updport := sysconfig["udpport"].(string)
-	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
-	udpclient.Listen(processUdpMsg)
-	log.Println("Udp服务监听", updport)
-	time.Sleep(99999 * time.Hour)
-}
-
-//快速测试使用
-func main() {
-
-
-	sid := "1f0000000000000000000000"
-	eid := "9f0000000000000000000000"
-
-	log.Println(sid, "---", eid)
-	mapinfo := map[string]interface{}{}
-	if sid == "" || eid == "" {
-		log.Println("sid,eid参数不能为空")
-		os.Exit(0)
-	}
-	mapinfo["gtid"] = sid
-	mapinfo["lteid"] = eid
-	startTask([]byte{}, mapinfo)
-	time.Sleep(99999 * time.Hour)
-
-}
-
-
-func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
-	switch act {
-	case mu.OP_TYPE_DATA: //上个节点的数据
-		//从表中开始处理
-		var mapInfo map[string]interface{}
-		err := json.Unmarshal(data, &mapInfo)
-		log.Println("err:", err, "mapInfo:", mapInfo)
-		if err != nil {
-			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
-		} else if mapInfo != nil {
-			taskType := qu.ObjToString(mapInfo["stype"])
-			if taskType == "pingfen" {
-				go startTask(data, mapInfo)
-			} else {
-				log.Println("类别异常... ...")
-			}
-			key, _ := mapInfo["key"].(string)
-			if key == "" {
-				key = "udpok"
-			}
-			udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
-		}
-	case mu.OP_NOOP: //下个节点回应
-		ok := string(data)
-		if ok != "" {
-			log.Println("ok:", ok)
-			udptaskmap.Delete(ok)
-		}
-	}
-}
-
-
-func startTask(data []byte, mapInfo map[string]interface{}) {
-
-	//遍历数据
-	log.Println("开始评分流程")
-	defer qu.Catch()
-	//区间id
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  StringTOBsonId(mapInfo["gtid"].(string)),
-			"$lte": StringTOBsonId(mapInfo["lteid"].(string)),
-		},
-	}
-	log.Println("查询条件:",q)
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	it := sess.DB(mgo.DbName).C(coll_name).Find(&q).Iter()
-	updateExtract := [][]map[string]interface{}{}//更新需要
-	index:=0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
-		if index%10000 == 0 {
-			log.Println("当前数量:", index, tmp["_id"])
-		}
-
-		element_score,core_score,other_score,element_reason:=dealWithElementRate(tmp)
-		error_score,abnormal_score,error_reason,abnormal_reason:=dealWithErrorRate(tmp)
-		//log.Println("元素分:",element_score,"错误分:",error_score,"异常分:",abnormal_score)
-		updateExtract = append(updateExtract, []map[string]interface{}{
-			map[string]interface{}{
-				"_id": tmp["_id"],
-			},
-			map[string]interface{}{
-				"$set": map[string]interface{}{
-					"element_score": element_score,
-					"core_score": core_score,
-					"other_score": other_score,
-					"error_score":error_score,
-					"abnormal_score": abnormal_score,
-					"quality_reason":map[string]interface{}{
-						"element_reason":element_reason,
-						"error_reason":error_reason,
-						"abnormal_reason":abnormal_reason,
-					},
-				},
-			},
-		})
-		if len(updateExtract) >= 200 {
-			mgo.UpSertBulk(coll_name, updateExtract...)
-			updateExtract = [][]map[string]interface{}{}
-		}
-
-		tmp = make(map[string]interface{})
-	}
-
-
-	if len(updateExtract) >0 {
-		mgo.UpSertBulk(coll_name, updateExtract...)
-	}
-
-
-	log.Println("task quality over - 总计数量",index)
-
-	time.Sleep(30 * time.Second)
-
-	//任务完成,开始发送广播通知下面节点
-	log.Println("评分统计完成-发送udp")
-	for _, to := range nextNode {
-		sid, _ := mapInfo["gtid"].(string)
-		eid, _ := mapInfo["lteid"].(string)
-		key := sid + "-" + eid + "-" + util.ObjToString(to["stype"])
-		by, _ := json.Marshal(map[string]interface{}{
-			"gtid":  sid,
-			"lteid": eid,
-			"stype": util.ObjToString(to["stype"]),
-			"key":   key,
-		})
-		addr := &net.UDPAddr{
-			IP:   net.ParseIP(to["addr"].(string)),
-			Port: util.IntAll(to["port"]),
-		}
-		node := &udpNode{by, addr, time.Now().Unix(), 0}
-		udptaskmap.Store(key, node)
-		udpclient.WriteUdp(by, mu.OP_TYPE_DATA, addr)
-	}
-
-
-
-}

+ 0 - 165
data_quality/src/mark

@@ -1,165 +0,0 @@
-{
-  "udpport": ":17007",
-  "mongodb": {
-    "addrName": "192.168.3.207:27092",
-    "dbName": "extract_kf",
-    "collName": "zk_move",
-    "pool": 10,
-    "site": {
-      "site_dbname": "extract_kf",
-      "site_coll": "site"
-    }
-  },
-  "score_standard": {
-    "total_score": 100,
-    "core_max": 60,
-    "core_each": 10,
-    "other_max": 40,
-    "other_each": 5,
-    "deduct_each": 5
-  },
-  "qy_mongodb": {
-    "qy_addrName": "192.168.3.207:27092",
-    "qy_dbName": "extract_kf",
-    "qy_collName": "zk",
-    "pool": 10
-  },
-  "core_element":[
-    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
-    {"budget": {"type": "float", "min": 0,"max":50000000000}},
-    {"projectcode": {"type": "string", "min": 4,"max": 20}},
-    {"projectname": {"type": "string", "min": 4,"max": 40}},
-    {"buyer": {"type": "string", "min":4,"max": 15}},
-    {"winner": {"type": "string", "min": 4,"max": 15}}
-  ],
-  "other_element": [
-    {"agency": {"type": "string","min": 3,"max": 15}},
-    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
-    {"winneraddr": {"type": "string","min": 3,"max": 40}},
-    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
-    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
-    {"projectaddr": {"type": "string","min": 2,"max": 40}},
-    {"agencyperson": {"type": "string","min": 0,"max": 10}},
-    {"buyerperson": {"type": "string","min": 0,"max": 10}},
-    {"winnerperson": {"type": "string","min": 0,"max": 10}},
-    {"winnertel": {"type": "string","min": 4,"max": 20}},
-    {"agencytel": {"type": "string","min": 4,"max": 20}},
-    {"buyertel": {"type": "string","min": 4,"max": 20}},
-    {"bidopentime": {"type": "int","min": 946656000}},
-    {"signaturedate": {"type": "int","min": 946656000}}
-  ],
-  "deduct_element": [
-    "area",
-    "city",
-    "site",
-    "toptype",
-    "subtype",
-    "title"
-  ],
-  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
-  "jkmail": {
-    "to": "zhengkun@topnet.net.cn",
-    "api": "http://10.171.112.160:19281/_send/_mail"
-  },
-  "nextNode": [
-
-  ]
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{
-  "udpport": ":17007",
-  "mongodb": {
-    "addrName": "172.17.4.85:27080",
-    "dbName": "qfw",
-    "collName": "result_20200715",
-    "pool": 10,
-    "site": {
-      "site_dbname": "qfw",
-      "site_coll": "site"
-    }
-  },
-  "qy_mongodb": {
-    "qy_addrName": "172.17.4.187:27081",
-    "qy_dbName": "mixdata",
-    "qy_collName": "qyxy_std",
-    "pool": 10
-  },
-  "score_standard": {
-    "total_score": 100,
-    "core_max": 60,
-    "core_each": 10,
-    "other_max": 40,
-    "other_each": 5,
-    "deduct_each": 5
-  },
-  "core_element":[
-    {"bidamount": {"type": "float", "min": 0,"max":50000000000}},
-    {"budget": {"type": "float", "min": 0,"max":50000000000}},
-    {"projectcode": {"type": "string", "min": 4,"max": 20}},
-    {"projectname": {"type": "string", "min": 4,"max": 40}},
-    {"buyer": {"type": "string", "min":4,"max": 15}},
-    {"winner": {"type": "string", "min": 4,"max": 15}}
-  ],
-  "other_element": [
-    {"agency": {"type": "string","min": 3,"max": 15}},
-    {"bidopenaddress": {"type": "string","min": 3,"max": 40}},
-    {"winneraddr": {"type": "string","min": 3,"max": 40}},
-    {"agencyaddr": {"type": "string","min": 3,"max": 40}},
-    {"buyeraddr": {"type": "string","min": 3,"max": 40}},
-    {"projectaddr": {"type": "string","min": 2,"max": 40}},
-    {"agencyperson": {"type": "string","min": 0,"max": 10}},
-    {"buyerperson": {"type": "string","min": 0,"max": 10}},
-    {"winnerperson": {"type": "string","min": 0,"max": 10}},
-    {"winnertel": {"type": "string","min": 4,"max": 20}},
-    {"agencytel": {"type": "string","min": 4,"max": 20}},
-    {"buyertel": {"type": "string","min": 4,"max": 20}},
-    {"bidopentime": {"type": "int","min": 946656000}},
-    {"signaturedate": {"type": "int","min": 946656000}}
-  ],
-  "deduct_element": [
-    "area",
-    "city",
-    "site",
-    "toptype",
-    "subtype",
-    "title"
-  ],
-  "specialaddr": "号|楼|座|巷|街|幢|路|室|层|区|段|道|园|镇|乡|村|县|区|市|栋|厦|房|社区|单元|交叉口|服务中心",
-  "jkmail": {
-    "to": "zhengkun@topnet.net.cn",
-    "api": "http://10.171.112.160:19281/_send/_mail"
-  },
-  "nextNode": [
-
-  ]
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

+ 0 - 328
data_quality/src/mgo.go

@@ -1,328 +0,0 @@
-package main
-
-import (
-	"context"
-	"log"
-	"time"
-
-	"go.mongodb.org/mongo-driver/bson"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-	"go.mongodb.org/mongo-driver/mongo"
-	"go.mongodb.org/mongo-driver/mongo/options"
-)
-
-type MgoSess struct {
-	Db     string
-	Coll   string
-	Query  interface{}
-	Sorts  []string
-	fields interface{}
-	limit  int64
-	skip   int64
-	M      *MongodbSim
-}
-
-type MgoIter struct {
-	Cursor *mongo.Cursor
-}
-
-func (mt *MgoIter) Next(result interface{}) bool {
-	if mt.Cursor != nil {
-		if mt.Cursor.Next(nil) {
-			err := mt.Cursor.Decode(result)
-			if err != nil {
-				log.Println("mgo cur err", err.Error())
-				mt.Cursor.Close(nil)
-				return false
-			}
-			return true
-		} else {
-			mt.Cursor.Close(nil)
-			return false
-		}
-	} else {
-		return false
-	}
-
-}
-
-func (ms *MgoSess) DB(name string) *MgoSess {
-	ms.Db = name
-	return ms
-}
-
-func (ms *MgoSess) C(name string) *MgoSess {
-	ms.Coll = name
-	return ms
-}
-
-func (ms *MgoSess) Find(q interface{}) *MgoSess {
-	ms.Query = q
-	return ms
-}
-
-func (ms *MgoSess) Select(fields interface{}) *MgoSess {
-	ms.fields = fields
-	return ms
-}
-
-func (ms *MgoSess) Limit(limit int64) *MgoSess {
-	ms.limit = limit
-	return ms
-}
-func (ms *MgoSess) Skip(skip int64) *MgoSess {
-	ms.skip = skip
-	return ms
-}
-
-func (ms *MgoSess) Sort(sorts ...string) *MgoSess {
-	ms.Sorts = sorts
-	return ms
-}
-
-func (ms *MgoSess) Iter() *MgoIter {
-	it := &MgoIter{}
-	find := options.Find()
-	if ms.skip > 0 {
-		find.SetSkip(ms.skip)
-	}
-	if ms.limit > 0 {
-		find.SetLimit(ms.limit)
-	}
-	find.SetBatchSize(100)
-	if len(ms.Sorts) > 0 {
-		sort := bson.M{}
-		for _, k := range ms.Sorts {
-			switch k[:1] {
-			case "-":
-				sort[k[1:]] = -1
-			case "+":
-				sort[k[1:]] = 1
-			default:
-				sort[k] = 1
-			}
-		}
-		find.SetSort(sort)
-	}
-	if ms.fields != nil {
-		find.SetProjection(ms.fields)
-	}
-	cur, err := ms.M.C.Database(ms.Db).Collection(ms.Coll).Find(ms.M.Ctx, ms.Query, find)
-	if err != nil {
-		log.Println("mgo find err", err.Error())
-	} else {
-		it.Cursor = cur
-	}
-	return it
-}
-
-type MongodbSim struct {
-	MongodbAddr string
-	Size        int
-	//	MinSize     int
-	DbName   string
-	C        *mongo.Client
-	Ctx      context.Context
-	ShortCtx context.Context
-	pool     chan bool
-	UserName string
-	Password string
-}
-
-func (m *MongodbSim) GetMgoConn() *MgoSess {
-	//m.Open()
-	ms := &MgoSess{}
-	ms.M = m
-	return ms
-}
-
-func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
-	//m.Close()
-	ms.M = nil
-	ms = nil
-}
-
-func (m *MongodbSim) InitPool() {
-	opts := options.Client()
-	opts.SetConnectTimeout(3 * time.Second)
-	opts.ApplyURI("mongodb://" + m.MongodbAddr)
-	opts.SetMaxPoolSize(uint64(m.Size))
-	m.pool = make(chan bool, m.Size)
-
-	if m.UserName !="" && m.Password !="" {
-		cre := options.Credential{
-			Username:m.UserName,
-			Password:m.Password,
-		}
-		opts.SetAuth(cre)
-	}
-
-
-
-	opts.SetMaxConnIdleTime(2 * time.Hour)
-	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
-	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
-	client, err := mongo.Connect(m.ShortCtx, opts)
-	if err != nil {
-		log.Println("mgo init error:", err.Error())
-	} else {
-		m.C = client
-		log.Println("init success")
-	}
-}
-
-func (m *MongodbSim) Open() {
-	m.pool <- true
-}
-func (m *MongodbSim) Close() {
-	<-m.pool
-}
-
-//批量插入
-func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	var writes []mongo.WriteModel
-	for _, d := range doc {
-		write := mongo.NewUpdateOneModel()
-		write.SetFilter(d[0])
-		write.SetUpdate(d[1])
-		write.SetUpsert(true)
-		writes = append(writes, write)
-	}
-	r, e := coll.BulkWrite(m.Ctx, writes)
-	if e != nil {
-		log.Println("mgo upsert error:", e.Error())
-		return nil, false
-	}
-	//	else {
-	//		if r.UpsertedCount != int64(len(doc)) {
-	//			log.Println("mgo upsert uncomplete:uc/dc", r.UpsertedCount, len(doc))
-	//		}
-	//		return true
-	//	}
-	return r.UpsertedIDs, true
-}
-
-//批量插入
-func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	var writes []mongo.WriteModel
-	for _, d := range doc {
-		write := mongo.NewInsertOneModel()
-		write.SetDocument(d)
-		writes = append(writes, write)
-	}
-	_, e := coll.BulkWrite(m.Ctx, writes)
-	if e != nil {
-		log.Println("mgo savebulk error:", e.Error())
-		return false
-	}
-	return true
-}
-
-//保存
-func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.InsertOne(m.Ctx, doc)
-	if err != nil {
-		return nil
-	}
-	return r.InsertedID
-}
-
-//更新by Id
-func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
-	if err != nil {
-		return false
-	}
-	return true
-}
-
-//删除by id
-func (m *MongodbSim) DeleteById(c, id string) int64 {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.DeleteOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
-	if err != nil {
-		return 0
-	}
-	return r.DeletedCount
-}
-
-//通过条件删除
-func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r, err := coll.DeleteMany(m.Ctx, query)
-	if err != nil {
-		return 0
-	}
-	return r.DeletedCount
-}
-
-//findbyid
-func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
-	v := map[string]interface{}{}
-	r.Decode(&v)
-	return v
-}
-
-//findone
-func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	r := coll.FindOne(m.Ctx, query)
-	v := map[string]interface{}{}
-	r.Decode(&v)
-	return v
-}
-
-//find
-func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
-	m.Open()
-	defer m.Close()
-	coll := m.C.Database(m.DbName).Collection(c)
-	op := options.Find()
-	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields))
-	if err != nil {
-		log.Fatal(err)
-		return nil, err
-	}
-	var results []map[string]interface{}
-	if err = r.All(m.Ctx, &results); err != nil {
-		log.Fatal(err)
-		return nil, err
-	}
-	return results, nil
-}
-
-//创建_id
-func NewObjectId() primitive.ObjectID {
-	return primitive.NewObjectID()
-}
-
-func StringTOBsonId(id string) primitive.ObjectID {
-	objectId, _ := primitive.ObjectIDFromHex(id)
-	return objectId
-}
-
-func BsonTOStringId(id interface{}) string {
-	return id.(primitive.ObjectID).Hex()
-}

+ 0 - 304
data_quality/src/scoreExpError.go

@@ -1,304 +0,0 @@
-package main
-
-import (
-	qu "qfw/util"
-	"regexp"
-	"sync"
-	"time"
-	"unicode/utf8"
-)
-
-var sitelock sync.Mutex         //锁
-var error_reason map[string]interface{}
-var abnormal_reason map[string]interface{}
-
-func dealWithErrorRate(tmp map[string]interface{}) (int , int, map[string]interface{}, map[string]interface{}) {
-	error_reason = map[string]interface{}{}
-	abnormal_reason = map[string]interface{}{}
-	//错误 , 异常error_score  abnormal_score,
-	err_num,ab_num:=0,0
-	//金额类
-	budget:=qu.Float64All(tmp["budget"])
-	bidamount:=qu.Float64All(tmp["bidamount"])
-	if tmp["budget"]==nil&&tmp["bidamount"]==nil {
-		//均不存在
-		ab_num++
-		abnormal_reason["money"] = "budget-bidamount均不存在"
-	}else if tmp["budget"]!=nil&&tmp["bidamount"]!=nil&&budget!=0&&bidamount!=0 {
-		//均存在
-		err,ab:=amountAnalysis(budget,bidamount)
-		err_num = err_num+err
-		ab_num = err_num+ab
-	}else {
-		//二者存在一个
-		if budget==0 && qu.ObjToString(tmp["toptype"])=="招标"{
-			ab_num++
-			abnormal_reason["money"] = "招标:budget空"
-		}
-		if bidamount==0 && qu.ObjToString(tmp["toptype"])=="结果"{
-			ab_num++
-			abnormal_reason["money"] = "结果:bidamount空"
-		}
-	}
-
-	//采购单位
-	buyer:=qu.ObjToString(tmp["buyer"])
-	if buyer=="" {
-		ab_num++
-		abnormal_reason["buyer"] = "buyer空"
-	}else {
-		if utf8.RuneCountInString(buyer)<4 {
-			err_num++
-			error_reason["buyer"] = "buyer长度"
-		}else {
-			if !buyerAnalysis(buyer) {
-				ab_num++
-				abnormal_reason["buyer"] = "buyer-企业无"
-			}
-		}
-	}
-
-	//中标单位
-	if qu.ObjToString(tmp["toptype"])=="结果" {
-		winner:=qu.ObjToString(tmp["winner"])
-		if winner=="" {
-			ab_num++
-			abnormal_reason["winner"] = "winner空"
-		}else {
-			if utf8.RuneCountInString(winner)<4 {
-				err_num++
-				error_reason["winner"] = "winner长度"
-			}else {
-				if !winnerAnalysis(winner) {
-					ab_num++
-					abnormal_reason["winner"] = "winner-企业无"
-				}
-			}
-		}
-	}
-
-
-	//标题,名称
-	title:=qu.ObjToString(tmp["title"])
-	if title=="" {
-		err_num++
-		error_reason["title"] = "空"
-	}else {
-		if utf8.RuneCountInString(title)<4 {
-			ab_num++
-			abnormal_reason["title"] = "title长度"
-		}else {
-
-		}
-	}
-	projectname:=qu.ObjToString(tmp["projectname"])
-	if projectname=="" {
-		err_num++
-		error_reason["projectname"] = "空"
-	}else {
-		if utf8.RuneCountInString(projectname)<4 {
-			ab_num++
-			abnormal_reason["projectname"] = "projectname长度"
-		}else {
-
-		}
-	}
-
-
-	//编号组
-	projectcode:=qu.ObjToString(tmp["projectcode"])
-	contractnumber:=qu.ObjToString(tmp["contractnumber"])
-	if projectcode==""&&contractnumber=="" {
-		ab_num++
-		abnormal_reason["code"] = "code-空"
-	}else {
-		if !codesAnalysis(projectcode,contractnumber) {
-			ab_num++
-			abnormal_reason["code"] = "code-不符"
-		}
-	}
-
-	//发布时间
-	publishtime:=qu.Int64All(tmp["publishtime"])
-	now:=time.Now().Unix()
-	if publishtime<=0||publishtime-now>0 {
-		err_num++
-		error_reason["publishtime"] = "publishtime-(超前)(0)"
-	}
-
-	//省份,城市
-	area := qu.ObjToString(tmp["area"])
-	if area == "A" {
-		area = "全国"
-	}
-	city := qu.ObjToString(tmp["city"])
-	site := qu.ObjToString(tmp["site"])
-	if !citysAnalysis(area,city,site) {
-		ab_num++
-		abnormal_reason["city"] = "area-站点不一致"
-	}
-
-	//招标时间-地点
-	if qu.Int64All(tmp["bidopentime"])==0 && qu.ObjToString(tmp["bidopenaddress"])=="" &&
-		qu.ObjToString(tmp["toptype"])=="招标" {
-		ab_num++
-		abnormal_reason["bidopen"] = "bidopen-时间-地点-空"
-	}
-
-
-
-	//类别问题
-	if qu.ObjToString(tmp["toptype"]) == ""{
-		err_num++
-		error_reason["toptype"] = "toptype:空"
-	}else {
-		if !categoryAnalysis(tmp) {
-			ab_num++
-			abnormal_reason["toptype"] = "toptype:内容>>"
-		}
-	}
-
-
-	return err_num,ab_num,error_reason,abnormal_reason
-}
-
-
-
-
-
-//分析-金额
-func amountAnalysis(budget float64,bidamount float64) (int ,int) {
-	err_num,ab_num:=0,0
-	proportion := bidamount/budget
-	if proportion>=0.1&&proportion<=10 {
-
-	}else if (proportion>=0.01&&proportion<0.1)||(proportion>10&&proportion<=100){
-		ab_num++
-		abnormal_reason["money"] = "bidamount/budget间隔异常"
-	}else {
-		err_num++
-		error_reason["money"] = "bidamount/budget-比例错误"
-	}
-
-	return err_num,ab_num
-}
-
-//分析-采购单位
-func buyerAnalysis(buyer string) bool{
-
-	q := map[string]interface{}{
-		"company_name": buyer,
-	}
-	data,_:=mgo.Find(qy_coll_name,q,nil,map[string]interface{}{"company_name":1})
-	if data==nil {
-		return false
-	}
-	return true
-}
-
-
-//分析-中标单位
-func winnerAnalysis(winner string) bool {
-
-	q := map[string]interface{}{
-		"company_name": winner,
-	}
-	data,_:=mgo.Find(qy_coll_name,q,nil,map[string]interface{}{"company_name":1})
-	if data==nil {
-		return false
-	}
-	return true
-}
-
-//分析-编号组
-func codesAnalysis(projectcode string,contractnumber string) bool {
-
-	if projectcode!="" {
-		if utf8.RuneCountInString(projectcode)<4 {
-			return false
-		}
-		//符合-8长度-日期格式 yyyyMMdd
-		if !regAnalysis(projectcode) &&utf8.RuneCountInString(projectcode)==8 {
-			return false
-		}
-	}
-
-	if contractnumber!="" {
-		if utf8.RuneCountInString(contractnumber)<4 {
-			return false
-		}
-
-		if !regAnalysis(contractnumber)  && utf8.RuneCountInString(projectcode)==8  {
-			return false
-		}
-	}
-	return true
-}
-
-//分析-省份,城市
-func citysAnalysis(area string,city string,site string) bool {
-
-	if site != "" {//站点对比
-		sitelock.Lock()
-		dict := siteMap[site]
-		sitelock.Unlock()
-		if dict != nil {
-			if (area == "全国" && dict["area"] != "") ||
-				(city == "" && dict["city"] != "") {
-				return false
-			}
-		}
-	}
-	return true
-}
-
-
-
-
-//分析-类别
-func categoryAnalysis(tmp map[string]interface{}) bool {
-	toptype:=qu.ObjToString(tmp["toptype"])
-	if toptype=="招标" {
-		winner:=qu.ObjToString(tmp["winner"])
-		bidamount:=qu.Float64All(tmp["bidamount"])
-		if winner!=""||bidamount!=0 {
-			//结果超前
-			return false
-		}
-	}
-	return true
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-func regAnalysis(str string) bool {
-	reg:=`\d{8}`
-	regx,_ := regexp.Compile(reg)
-	if regx.FindString(str)!="" {
-		return false
-	}
-	return true
-}
-
-
-
-
-
-
-
-
-
-
-
-
-

+ 0 - 190
data_quality/src/scoreLogic.go

@@ -1,190 +0,0 @@
-package main
-
-import (
-	qu "qfw/util"
-	"strings"
-	"unicode/utf8"
-)
-var element_reason map[string]interface{}
-
-func dealWithElementRate(tmp map[string]interface{}) (int,int,int,map[string]interface{}) {
-
-	//score_standard 打分标准  要素打分 - 需慎重
-	element_reason = map[string]interface{}{}
-	m,n,z :=0,0,0
-	core_value,other_value,deduct_value :="","",""
-
-	//第一次-配置验证
-	coreArr,otherArr:=filterConfiguration(tmp)
-	//细节过滤-验证
-	new_coreArr,new_otherArr:=[]string{},[]string{}
-	for _,v:=range coreArr{
-		if v=="projectname"||v=="buyer"||v=="winner" {
-			if isChinese(qu.ObjToString(tmp[v])) {
-				new_coreArr = append(new_coreArr,v)
-			}
-		}else {
-			new_coreArr = append(new_coreArr,v)
-		}
-	}
-
-	for _,v:=range otherArr{
-		if v=="agencyperson"||v=="buyerperson"||v=="winnerperson" {
-			if isChinese(qu.ObjToString(tmp[v])) {
-				new_otherArr = append(new_otherArr,v)
-			}
-		}else if v=="agencytel"||v=="buyertel"||v=="winnertel" {
-			if !isChinese(qu.ObjToString(tmp[v])) {
-				if isTelephone(qu.ObjToString(tmp[v])) {
-					new_otherArr = append(new_otherArr,v)
-				}
-			}
-		}else if v=="bidopentime"||v=="signaturedate" {
-			if isTimestamp(qu.Int64All(tmp[v])) {
-				new_otherArr = append(new_otherArr,v)
-			}
-		}else if v=="bidopenaddress"||v=="winneraddr"||v=="agencyaddr"||
-			v=="buyeraddr" {
-			if isPlaceAddr(qu.ObjToString(tmp[v])) {
-				new_otherArr = append(new_otherArr,v)
-			}
-		}else if v=="projectaddr" {
-			if isChinese(qu.ObjToString(tmp[v])) {
-				new_otherArr = append(new_otherArr,v)
-			}
-		}else {
-			new_otherArr = append(new_otherArr,v)
-		}
-	}
-
-	core_value = strings.Join(new_coreArr, ",")
-	m = len(new_coreArr)
-
-	other_value = strings.Join(new_otherArr, ",")
-	n = len(new_otherArr)
-
-
-
-	//扣分项
-	for _,v:=range deduct_element{
-		if qu.ObjToString(tmp[v])=="" {
-			z--
-			if deduct_value == "" {
-				deduct_value = v
-			}else {
-				deduct_value = deduct_value+","+v
-			}
-		}
-	}
-
-	total_s,core_s,other_s:=calculateScore(m,n,z)
-	return total_s,core_s,other_s,map[string]interface{}{
-		"coreElement":map[string]interface{}{
-			"key":core_value,
-		},
-		"otherElement":map[string]interface{}{
-			"key":other_value,
-		},
-		"deductElement":map[string]interface{}{
-			"key":deduct_value,
-		},
-	}
-}
-
-func filterConfiguration(tmp map[string]interface{})([]string,[]string)  {
-	coreArr ,otherArr:= []string{},[]string{}
-	//核心要素   int - 时间-   float-金额区间  string-字符串长度
-	for _,v:=range core_element{
-		for k1,v1:=range v{
-			if tmp[k1]==nil {
-				continue
-			}
-			dict :=*qu.ObjToMap(v1)
-			element_type := qu.ObjToString(dict["type"])
-
-			if element_type=="int" {
-				min:=qu.IntAll(dict["min"])
-				if qu.IntAll(tmp[k1])>min {
-					coreArr = append(coreArr,k1)
-				}
-			}else if element_type=="float" {
-				min:=qu.Float64All(dict["min"])
-				max:=qu.Float64All(dict["max"])
-				if qu.Float64All(tmp[k1])>min && qu.Float64All(tmp[k1])<max{
-					coreArr = append(coreArr,k1)
-				}
-			}else if element_type=="string" {
-				min:=qu.IntAll(dict["min"])
-				max:=qu.IntAll(dict["max"])
-
-				if utf8.RuneCountInString(qu.ObjToString(tmp[k1]))>min &&
-					utf8.RuneCountInString(qu.ObjToString(tmp[k1]))<max{
-					coreArr = append(coreArr,k1)
-				}
-			}else {
-
-			}
-		}
-	}
-
-
-	for _,v:=range other_element{
-		for k1,v1:=range v{
-			if tmp[k1]==nil {
-				continue
-			}
-			dict :=*qu.ObjToMap(v1)
-			element_type := qu.ObjToString(dict["type"])
-
-			if element_type=="int" {
-				min:=qu.IntAll(dict["min"])
-				if qu.IntAll(tmp[k1])>min {
-					otherArr = append(otherArr,k1)
-				}
-			}else if element_type=="float" {
-				min:=qu.Float64All(dict["min"])
-				max:=qu.Float64All(dict["max"])
-				if qu.Float64All(tmp[k1])>min && qu.Float64All(tmp[k1])<max{
-					otherArr = append(otherArr,k1)
-				}
-			}else if element_type=="string" {
-				min:=qu.IntAll(dict["min"])
-				max:=qu.IntAll(dict["max"])
-
-				if utf8.RuneCountInString(qu.ObjToString(tmp[k1]))>min &&
-					utf8.RuneCountInString(qu.ObjToString(tmp[k1]))<max{
-					otherArr = append(otherArr,k1)
-				}
-			}else {
-
-			}
-		}
-	}
-
-	return coreArr,otherArr
-}
-
-
-
-
-func calculateScore(core_num int,other_num int,deduct_num int) (int,int,int)  {
-
-	m ,core_s:=core_each*core_num,core_each*core_num
-	if m>core_max {
-		m = core_max
-	}
-
-	n ,other_s:=other_each*other_num,other_each*other_num
-
-	if n > other_max {
-		n = other_max
-	}
-
-	z := deduct_each*deduct_num
-	t :=m+n+z
-	if t > total_score {
-		t=total_score
-	}
-
-	return t,core_s,other_s
-}

+ 0 - 73
data_quality/src/scoreMethod.go

@@ -1,73 +0,0 @@
-package main
-
-import (
-	"regexp"
-	"time"
-	"unicode"
-)
-
-func isPlaceAddr(str string) bool {
-
-	if isTelephone(str) {
-		return false
-	}
-	regx,_ := regexp.Compile(specialaddr)
-	result:=regx.FindString(str)
-	if result !="" {
-		return true
-	}
-	return false
-}
-
-
-
-func isTimestamp(i int64) bool {
-	now:=time.Now().Unix()
-	if i<now+86400*180 {
-		return true
-	}
-	return false
-}
-
-func isTelephone(str string) bool {
-
-	reg1:=`^1[3|4|5|6|7|8|9][0-9]\d{8}$`
-	regx1,_ := regexp.Compile(reg1)
-
-	arr1:=regx1.FindAllString(str,-1)
-	if len(arr1)>0 {
-		return true
-	}
-
-	reg2:=`^(\d{2,4}-)?\d{7,8}$`
-	regx2,_ := regexp.Compile(reg2)
-	arr2:=regx2.FindAllString(str,-1)
-	if len(arr2)>0 {
-		return true
-	}
-
-	return false
-}
-
-
-
-
-func isChinese(str string) bool {
-	var count int
-	for _, v := range str {
-		if unicode.Is(unicode.Han, v) {
-			count++
-			break
-		}
-	}
-	return count > 0
-}
-
-func isChineseChar(str string) bool {
-	for _, r := range str {
-		if unicode.Is(unicode.Scripts["Han"], r) || (regexp.MustCompile("[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b]").MatchString(string(r))) {
-			return true
-		}
-	}
-	return false
-}

+ 0 - 59
data_quality/src/sendmail.go

@@ -1,59 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"io/ioutil"
-	"log"
-	mu "mfw/util"
-	"net"
-	"net/http"
-	"sync"
-	"time"
-)
-
-var udptaskmap = &sync.Map{}
-var tomail string
-var api string
-
-type udpNode struct {
-	data      []byte
-	addr      *net.UDPAddr
-	timestamp int64
-	retry     int
-}
-
-func checkMapJob() {
-	//阿里云内网无法发送邮件
-	jkmail, _ := sysconfig["jkmail"].(map[string]interface{})
-	if jkmail != nil {
-		tomail, _ = jkmail["to"].(string)
-		api, _ = jkmail["api"].(string)
-	}
-	log.Println("start checkMapJob", tomail, sysconfig["jkmail"])
-	for {
-		udptaskmap.Range(func(k, v interface{}) bool {
-			now := time.Now().Unix()
-			node, _ := v.(*udpNode)
-			if now-node.timestamp > 120 {
-				node.retry++
-				if node.retry > 5 {
-					log.Println("udp重试失败", k)
-					udptaskmap.Delete(k)
-					res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", api, tomail, "extract-send-fail", k.(string)))
-					if err == nil {
-						defer res.Body.Close()
-						read, err := ioutil.ReadAll(res.Body)
-						log.Println("邮件发发送:", string(read), err)
-					}
-				} else {
-					log.Println("udp重发", k)
-					udpclient.WriteUdp(node.data, mu.OP_TYPE_DATA, node.addr)
-				}
-			} else if now-node.timestamp > 10 {
-				log.Println("udp任务超时中..", k)
-			}
-			return true
-		})
-		time.Sleep(60 * time.Second)
-	}
-}

+ 2 - 2
src/jy/extract/extract.go

@@ -551,7 +551,7 @@ func file2text(doc *map[string]interface{}) {
 
 //抽取-正文
 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
-	e.ExtractDetail(j, isSite, j.SpiderCode) //正文-抽取属性
+	e.ExtractDetail(j, isSite, j.SpiderCode) //正文抽取属性
 	if jf != nil && jf.IsFile { //附件jf → j  合并
 		e.ExtractDetail(jf, isSite, j.SpiderCode)
 		for tmpk, xs := range jf.Result {
@@ -674,7 +674,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 					continue
 				}
 
-				if vc.Field =="bidamount" {
+				if vc.Field =="winner" {
 					//log.Debug("调试抽取字段")
 				}
 				////抽取-前置规则

+ 1 - 1
src/jy/extract/extractudp.go

@@ -146,7 +146,7 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				//	continue
 				//}
 				if spidercode[qu.ObjToString(v["spidercode"])] { //临时开标记录
-					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
+					//log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}
 				var j, jf *ju.Job

+ 5 - 0
src/jy/pretreated/division.go

@@ -338,6 +338,11 @@ func mergetext(titles []string, text string) string {
 			}else if strings.Contains(titles[i], lentexts[0]) ||strings.Contains(titles[i], lentexts[0]){
 				tt += titles[i] + ":" + lentexts[1] + "\n"
 			}
+		}else {
+			//特殊处理
+			if strings.Contains(v,"中标人 ") {
+				tt +=v+"\n"
+			}
 		}
 	}
 	if len(tt) == 0 {

+ 14 - 10
src/jy/pretreated/winnerorder.go

@@ -28,25 +28,31 @@ var (
 	numberReg         = regexp.MustCompile("[一二三四五六七八九十0-9]+")
 	numberReg2        = regexp.MustCompile("[\\d一二三四五六七八九十.,,]+")
 	thisNumberReg     = regexp.MustCompile("第" + numberReg.String())
-	winnerReg0        = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选人|[第|弟][0-9一二三四五]中标人|中标人[1-9])")
 	winnerReg1        = regexp.MustCompile("(^|[^为])(【?(推荐)?第[一二三四五六七八九十1-9]+(合格|名|包|标段)?】?([候|侯]选)?(入围|备选|成交|中[标|选])人?([((]成交[))])?([候|侯]选|排序)?(人(单位)?|供[应货]商|单位|机构)(名称)?为?)($|[^,;;。,])")
+	winnerReg0        = regexp.MustCompile("(中标候选人第\\d名|第[0-9一二三四五](中选|中标|成交)候选人|中标人[1-9]|[第|弟][一二三四五0-9]中标人)")
 	winnerReg2        = regexp.MustCompile("(排名第[一二三四五六七八九十1-9]+|[第|弟][一二三四五六七八九十1-9]+(中标|中选)?[候|侯]选人|中标候选人排名[:]\\d)")
-	//winnerReg2     = regexp.MustCompile("(第[一二三四五六七八九十1-9]+(候|侯)选人)")
+
 	winnerReg3     = regexp.MustCompile("((中标候选人)?第[一二三四五六七八九十1-9]+名)|()")
 	winnerReg4     = regexp.MustCompile("((确认|推荐|评审|排[名|序])[为::]+|(由高到低排序前.名|公示下列内容|(确定|推荐)的?中[标|选]候选人|\n中[标|选]候选.{1,3}\\s*\n|\n(中[标|选]候选.{1,3}[::\u3000\u2003\u00a0\\s]|成交候选供应商)|(排[名|序]|公[示|告]|具体|推荐|结果(公示)?|中[标|选]候选人.{0,2})如下|[一二三四五六七八九十\\d]+、(中[标|选]候选[^\n::]{1,8}|.{0,8}(成交|结果)信息|成交[^\n::]{2,8}))[为::]?)")
 	winnerReg5     = regexp.MustCompile("([^,;;。,、\n投标人]+?)(为?)(第[一二三四五六七八九十1-9]+(成交|中标)?([候|侯]选(人|供应商|单位|机构)|名)|排名第[一二三四五六七八九十1-9]+)([,;;。,、]|\\s+\n)")
 	winnerReg6     = regexp.MustCompile("(^(排名)?第[一二三四五六七八九十1-9]+[名中标成交备选候人单位供应商]*)")
 	winnerReg7     = regexp.MustCompile("第[一二三四五六七八九十]{1}标段[::]")
+
+	//带金额
 	winnerReg8     = regexp.MustCompile("(第[一二三四五六七八九十]中选候选人)[::\\s]+?[((]1[))][\\s]+?(单位名称)[::]?(.*)[\\s]+?[((]2[))][\\s]+(参选报价|投标报价(含税))[::]?(.*)")
 	//winnerReg8     = regexp.MustCompile("(第[一二三四五六七八九十]中标候选人)[::]?\n(1)单位名称:(.*)\n(2)投标报价(含税):(.*)")
     winnerReg9     = regexp.MustCompile("(第[一二三四五六七八九十]中[选|标]?候选人|中标人[1-9])[::\\s]+?([\u4E00-\u9FA5]{4,20})[\\s]+([0-9\\.\\s万元]+)")
     winnerReg10    = regexp.MustCompile("(第[一二三四五六七八九十]中标人)[::\\s]+?报价[¥]?([0-9\\.\\s万元]+)[;;]([\u4E00-\u9FA5]{4,20})")
 	winnerReg11     = regexp.MustCompile("([弟|第][一二三四五六七八九十]中[标|选]候选人)[::\\s]+?(单位名称|投标人名称)[::]?(.*)[\\s]+?(参选报价|投标报价[((]含税[))]|投标报价[((]元[))])[::]?(.*)")
-	winnerReg12     = regexp.MustCompile("(中[标|选]候选人[弟|第][一二三四五六七八九十0-9]名)[::\\s]+?(.*)[\\s,,]+?(投标报价)[::]?([0-9\\.\\s万元]+)")
+	winnerReg12     = regexp.MustCompile("(中[标|选]候选人[弟|第][一二三四五六七八九十0-9]名|[弟|第][一二三四五六七八九十0-9]候选人)[::\\s ]+?(.*)[ \\s,,]+?(投标报价|金额)[::]?([0-9\\.\\s万元]+)")
 	winnerReg13     = regexp.MustCompile("([弟|第][一二三四五六七八九十0-9])\n(成交候选人|成交供应商)\n(.*)\n([0-9\\.\\s万元]+)")
 
+	//不带金额
+	winnerReg20     = regexp.MustCompile("(中标单位候选人名称)[\\s]+(.*)[\\s]+(中标候选人单位名次)[\\s]+([弟|第][一二三四五六七八九十0-9]中标人)")
+
+
 
-winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
+    winnerRegclear = regexp.MustCompile("(买方人员|经评审.*排名第[一二三四五六七八九十1-9]+)")
 	colonEndReg    = regexp.MustCompile("[::]$")
 	toWarpReg      = regexp.MustCompile("[,。,;;]+")
 	findamountReg  = regexp.MustCompile("[,。,;;\u3000\u2003\u00a0\\s]+")
@@ -75,6 +81,10 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 		return []map[string]interface{}{}
 	}
 	text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
+	text = winnerReg20.ReplaceAllString(text,"\n${4}:${2}\n")
+
+
+
 	text = winnerReg8.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${5}\n")
 	text = winnerReg9.ReplaceAllString(text,"\n${1}:${2}\n中标金额:${3}\n")
 	text = winnerReg10.ReplaceAllString(text,"\n${1}:${3}\n中标金额:${2}\n")
@@ -84,13 +94,7 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int, isSite bool,
 
 
 
-	/*
-	第一候选人:河南冠嘉建设工程有限公司41.9450万元 合格  90日历天  孔祥子
 
-	中标人名称 中标价格 中标份额(%)
-	中标人1 山东益通安装有限公司 0.97元 55%
-	中标人2 山东鸿华建筑安装工程有限公司 0.96元 45%
-	*/
 	text = clearSpace1.ReplaceAllString(text, "") //清理(1)	单位名称:成都维诺信科技有限公司-->单位名称:成都维诺信科技有限公司
 	if strings.TrimSpace(text) == "" {
 		return []map[string]interface{}{}

+ 1 - 1
src/jy/util/clearHtml.go

@@ -110,7 +110,7 @@ func (c *Cut) ClearHtml(src string) string {
 
 			}
 		}
-		if tmp == "<br>" || tmp == "<br/>" || tmp == "<center>" || tmp == "</center>"{
+		if tmp == "<br>" || tmp == "</ul>" ||tmp == "<ul>"  || tmp == "<br/>" || tmp == "<center>" || tmp == "</center>"{
 			return "\n"
 		}
 		if tmp[1] != 47 { //开始标签

+ 1 - 1
src/res/fieldscore.json

@@ -265,7 +265,7 @@
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "((中标|候选|成交|代表|投标|代理)人|我公司|定标|通知|异议|法院|要求|代理|详细|test|意见|原因|具体|结果|负责|付款|附件|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交[^通]|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|注:|\\d[\\s]{0,10}(\\d|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100}\\n)",
+                "regstr": "((中标|候选|成交|代表|投标|代理)人|我公司|定标|通知|异议|法院|要求|代理机构|详细|test|意见|原因|具体|结果|负责|付款|附件|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|废标|废止|流标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交[^通]|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|注:|\\d[\\s]{0,10}(\\d|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100}\\n)",
                 "score": -20
             },
 			{

+ 22 - 3
udpextract/src/config.json

@@ -1,14 +1,33 @@
 {
     "udpport": ":1784",
+    "jkmail": {
+        "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn",
+        "api": "http://172.17.145.179:19281/_send/_mail"
+    },
     "extractNode": [
         {
             "addr": "127.0.0.1",
             "port": 6601,
             "stype": "extract_1"
+        },
+        {
+            "addr": "127.0.0.1",
+            "port": 6602,
+            "stype": "extract_2"
         }
     ],
     "nextNode": [
-
+        {
+            "addr": "127.0.0.1",
+            "port": 1799,
+            "stype": "",
+            "memo": "生城市"
+        },
+        {
+            "addr": "127.0.0.1",
+            "port": 1762,
+            "stype": "",
+            "memo": "敏感词清理"
+        }
     ]
-
-} 
+}

+ 8 - 0
udpextract/src/main.go

@@ -34,6 +34,7 @@ func resetExtractLevel()  {
 	}
 }
 func main()  {
+	go checkMailJob()
 	updport := Config["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
 	udpclient.Listen(processUdpMsg)
@@ -60,6 +61,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					udpinfo = "udpok"
 				}
 				go udpclient.WriteUdp([]byte(udpinfo), mu.OP_NOOP, ra)
+				log.Println("")
 				log.Println("接收当前段落,udp通知抽取-需拆分",len(extractNode),"组", sid, "~~", eid)
 				udplock.Lock()
 				resetExtractLevel() //重置状态
@@ -72,6 +74,10 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 				if len(splitArr)!=len(extractNode){//直接发送整段
 					log.Println("段落划分异常...请检查程序...")
 				}
+
+				key:=fmt.Sprintf("%s~%s",sid,eid)
+				node := &udpNode{time.Now().Unix()}
+				udptaskmap.Store(key, node)
 				sendExtractNode(splitArr) //通知抽取
 			}
 		}
@@ -87,6 +93,8 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 				sid := qu.ObjToString(extractLevel["sid"])
 				eid := qu.ObjToString(extractLevel["eid"])
 				if sid != ""&&eid != "" {
+					key:=fmt.Sprintf("%s~%s",sid,eid)
+					udptaskmap.Delete(key)
 					sendNextNode(sid,eid)
 				}
 			}

+ 35 - 0
udpextract/src/mark

@@ -9,3 +9,38 @@
 调整:  上承:分类
        下接:多程序抽取(完成) , 补城市,敏感词
 
+
+
+{
+    "udpport": ":1784",
+    "jkmail": {
+        "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn",
+        "api": "http://172.17.145.179:19281/_send/_mail"
+    },
+    "extractNode": [
+        {
+            "addr": "127.0.0.1",
+            "port": 6601,
+            "stype": "extract_1"
+        },
+        {
+            "addr": "127.0.0.1",
+            "port": 6602,
+            "stype": "extract_2"
+        }
+    ],
+    "nextNode": [
+        {
+            "addr": "127.0.0.1",
+            "port": 1799,
+            "stype": "",
+            "memo": "生城市"
+        },
+        {
+            "addr": "127.0.0.1",
+            "port": 1762,
+            "stype": "",
+            "memo": "敏感词清理"
+        }
+    ]
+}

+ 48 - 0
udpextract/src/udptaskmail.go

@@ -0,0 +1,48 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"sync"
+	"time"
+)
+
+var udptaskmap = &sync.Map{}
+var tomail string
+var api string
+
+type udpNode struct {
+	timestamp int64
+}
+
+func checkMailJob() {
+
+	//阿里云内网无法发送邮件
+	jkmail, _ := Config["jkmail"].(map[string]interface{})
+	if jkmail != nil {
+		tomail, _ = jkmail["to"].(string)
+		api, _ = jkmail["api"].(string)
+	}
+	log.Println("start check mail Job", tomail, Config["jkmail"])
+	for {
+		udptaskmap.Range(func(k, v interface{}) bool {
+			now := time.Now().Unix()
+			node, _ := v.(*udpNode)
+			if now-node.timestamp >= 600 {
+				udptaskmap.Delete(k)
+				res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", api, tomail, "extract_control-warning",k.(string)))
+				if err == nil {
+					defer res.Body.Close()
+					read, err := ioutil.ReadAll(res.Body)
+					log.Println("控制中心-邮件发送成功:", string(read), err)
+				}else {
+					log.Println("控制中心-邮件发送异常:", err)
+				}
+			}
+			return true
+		})
+		time.Sleep(60 * time.Second)
+	}
+}

+ 1 - 1
udpfilterdup/src/config.json

@@ -20,7 +20,7 @@
     },
     "jkmail": {
         "to": "zhengkun@topnet.net.cn,zhangjinkun@topnet.net.cn",
-        "api": "http://10.171.112.160:19281/_send/_mail"
+        "api": "http://172.17.145.179:19281/_send/_mail"
     },
     "nextNode": [
     ],

+ 0 - 1
udpfilterdup/src/datamap.go

@@ -203,7 +203,6 @@ func NewInfo(tmp map[string]interface{}) *Info {
 		subtype = "招标"
 	}
 
-
 	area := qutil.ObjToString(tmp["area"])
 	if area == "A" {
 		area = "全国"

+ 2 - 2
udpfilterdup/src/main.go

@@ -137,7 +137,7 @@ func init() {
 	}
 	log.Printf("new站点加载用时:%d秒,%d个\n", int(time.Now().Unix())-start, len(SiteMap))
 }
-func main() {
+func mainT() {
 	go checkMapJob()
 	updport := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
@@ -171,7 +171,7 @@ func main() {
 	time.Sleep(99999 * time.Hour)
 }
 //测试组人员使用
-func mainT() {
+func main() {
 	if TimingTask {
 		go historyTaskDay()
 		time.Sleep(99999 * time.Hour)

+ 3 - 3
udps/main.go

@@ -19,11 +19,11 @@ func main() {
 	flag.StringVar(&startDate, "start", "", "开始日期2006-01-02")
 	flag.StringVar(&endDate, "end", "", "结束日期2006-01-02")
 	flag.StringVar(&ip, "ip", "127.0.0.1", "ip")
-	flag.IntVar(&p, "p", 1784, "端口")
+	flag.IntVar(&p, "p", 6601, "端口")
 	flag.IntVar(&tmptime, "tmptime", 0, "时间查询")
 	flag.StringVar(&tmpkey, "tmpkey", "", "时间字段")
-	flag.StringVar(&id1, "gtid", "6152c2c71a75b8f446966fd5", "gtid")
-	flag.StringVar(&id2, "lteid", "6152c3f21a75b8f446967574", "lteid")
+	flag.StringVar(&id1, "gtid", "1152c2c71a75b8f446966fd5", "gtid")
+	flag.StringVar(&id2, "lteid", "9152c3f21a75b8f446967574", "lteid")
 	flag.StringVar(&ids, "ids", "", "id1,id2")
 	flag.StringVar(&stype, "stype", "biddingall", "stype,传递类型")
 	flag.StringVar(&bkey, "bkey", "", "bkey,加上此参数表示不生关键词和摘要")