zhengkun 4 жил өмнө
parent
commit
d18e9b8362

+ 9 - 2
data_monitoring/listen_data/src/main.go

@@ -66,6 +66,7 @@ func initMgo()  {
 
 func init() {
 	//加载配置文件
+	return
 	qu.ReadConfig(&sysconfig)
 	flag.StringVar(&start, "s", "", "是否启动") //增量
 	flag.Parse()
@@ -74,6 +75,14 @@ func init() {
 
 
 func main()  {
+	save_mgo = &MongodbSim{
+		MongodbAddr: "192.168.3.207:27092",
+		DbName:      "zhengkun",
+		Size:        10,
+	}
+	save_mgo.InitPool()
+	decodeJyUrl()
+	return
 
 	c := cron.New()
 	c.AddFunc("0 50 8 ? * *", func() { dealWithSiteData() })
@@ -85,8 +94,6 @@ func main()  {
 	}else {
 		log.Println("正常监听...")
 	}
-
-	decodeJyUrl()
 	time.Sleep(99999*time.Hour)
 }
 

+ 357 - 1
data_monitoring/listen_data/src/zkmethod.go

@@ -1,19 +1,46 @@
 package main
 
 import (
+	"bytes"
+	"encoding/json"
 	"fmt"
 	log "github.com/donnie4w/go-logger/logger"
+	"github.com/tealeg/xlsx"
+	"io/ioutil"
+	"net/http"
+	"os"
 	qu "qfw/util"
 	"qfw/util/elastic"
 	"qfw/util/redis"
+	"reflect"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+	"unicode"
+	"unicode/utf8"
 )
 
+
+var task chan struct{} = make(chan struct{}, 1)
+var rpre *regexp.Regexp = regexp.MustCompile("https://www.jianyu360.com/article/content/")
+var rsuf *regexp.Regexp = regexp.MustCompile("(.html).*")
+
 //解密
 func decodeJyUrl()  {
 
-	test := "ABCY1wJYzwOMyg4NHdxZ3IkCCQCIDFjcWhwPw4nLS4NYGpzcQFUCSs%3D"
+	//jyurl := ""
+	//jyurl = rpre.ReplaceAllString(jyurl, "")
+	//jyurl = rsuf.ReplaceAllString(jyurl, "")
+	//new_id := qu.CommonDecodeArticle("content", jyurl)[0]
+	//log.Debug(new_id)
+
+    
+	test := "ABCY1wJYjxYJys7RHhjZHUoCDI4QCJ0XFJ0KB4nKDodd3tzeD9UCjE%3D"
 	var Decode  = qu.CommonDecodeArticle("content", test)
 	log.Debug(Decode[0])
+
+	return
 }
 //加密
 func encodeJyUrl()  {
@@ -21,6 +48,335 @@ func encodeJyUrl()  {
 	var Encode  = fmt.Sprintf(Url, qu.CommonEncodeArticle("content", "60b9bf4a8a2adb30a5a25000"))
 	log.Debug(Encode)
 }
+func dealWithBaiduYunData() {
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q,total,isOK,exist,un_exist:=map[string]interface{}{},0,0,0,0
+	arr := make([]map[string]string,0)
+	it := sess.DB(save_mgo.DbName).C("bidding_test").Find(&q).Select(map[string]interface{}{
+		"detail":1,
+		"buyer":1,
+	}).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		if total%1000==0 {
+			log.Debug("cur index ",total,isOK,exist,un_exist,tmp["_id"])
+		}
+		detail := trimHtml(qu.ObjToString(tmp["detail"]))
+		buyer := qu.ObjToString(tmp["buyer"])
+		length := utf8.RuneCountInString(detail)
+		if length > 50 && buyer!="" {
+			isOK++
+			new_buyer := getBaiduYunBuyer(detail)
+			if new_buyer!="" {
+				exist++
+				//log.Debug("原值:",buyer,"  最终匹配:",new_buyer)
+			}else {
+				un_exist++
+				//log.Debug("未匹配:",tmp["_id"])
+			}
+			arr = append(arr, map[string]string{
+				"source":buyer,
+				"buyer":new_buyer,
+			})
+		}
+		if total>200 {
+			break
+		}
+		tmp = make(map[string]interface{})
+	}
+
+	log.Debug("处理完毕over......",total,isOK,exist,un_exist)
+	log.Debug("准备写入xlsx...",len(arr))
+	os.Remove("百度云统计.xlsx")
+	f :=xlsx.NewFile()
+	sheet, _ := f.AddSheet("训练")
+	row := sheet.AddRow()
+	row.AddCell().Value = "原采购"
+	row.AddCell().Value = "结果"
+	row.AddCell().Value = "新采购等"
+	for _,v :=range arr{
+		row := sheet.AddRow()
+		buyer:=v["source"]
+		new_buyer := v["buyer"]
+		row.AddCell().SetString(buyer)
+		buyerArr := strings.Split(new_buyer,"~")
+		isTrue := false
+		for _,name :=range buyerArr{
+			if name == buyer {
+				isTrue = true
+				break
+			}
+		}
+		if isTrue {
+			row.AddCell().Value = "正确"
+		}else {
+			if new_buyer !="" {
+				row.AddCell().Value = "异常"
+			}else {
+				row.AddCell().Value = ""
+			}
+		}
+		row.AddCell().SetString(new_buyer)
+	}
+	err := f.Save("百度云统计.xlsx")
+	if err != nil {
+		log.Debug("保存xlsx失败:", err)
+	}else {
+		log.Debug("保存xlsx成功:", err)
+	}
+}
+//百度云相关
+func getBaiduYunBuyer(detail string) string  {
+	//fmt.Println("runing...")
+	buyer:=""
+	body := map[string]interface{}{"text":detail}
+	data := postBaiDuYun("https://aip.baidubce.com/rpc/2.0/ai_custom/v1/entity_xtr/allbuyer?access_token=24.595a79beb92df28ae44081d8c069e32c.2592000.1627033355.282335-24414386",
+		body, "application/json")
+	//fmt.Println("post...end")
+	if results, ok := data["results"].([]interface{}); ok {
+		for _,v:=range results{
+			tmp := *qu.ObjToMap(v)
+			span := qu.ObjToString(tmp["span"])
+			if span!="" {
+				if buyer=="" {
+					buyer = span
+				}else {
+					buyer = buyer+"~"+qu.ObjToString(tmp["span"])
+				}
+			}
+		}
+	}else {
+		log.Debug("异常:",reflect.TypeOf(data["results"]),data["results"])
+	}
+
+	return buyer
+}
+func postBaiDuYun(url string, data interface{}, contentType string) map[string]interface{}{
+	task <- struct{}{}
+	defer func() {
+		<-task
+	}()
+	client := &http.Client{Timeout: 15 * time.Second}
+	jsonStr, _ := json.Marshal(data)
+	resp, err := client.Post(url, contentType, bytes.NewBuffer(jsonStr))
+	if err != nil {
+		panic(err)
+	}
+	defer resp.Body.Close()
+	result, _ := ioutil.ReadAll(resp.Body)
+	dict := make(map[string]interface{})
+	json.Unmarshal(result, &dict)
+	//fmt.Println("post...start")
+	return dict
+}
+
+
+//修复全量-指定字段数据
+func dealWithFullData()  {
+	log.Debug("......处理全量数据")
+
+
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q:=map[string]interface{}{}
+	total,isOK := 0,0
+	pool := make(chan bool, 10)
+	wg := &sync.WaitGroup{}
+	it := sess.DB(save_mgo.DbName).C("result_dis_0618").Find(&q).Select(map[string]interface{}{
+		"bidmode":1,
+		"getdocmethod":1,
+		"agencyfee":1,
+		"agencyrate":1,
+		"docamount":1,
+	}).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		if total%100000==0 {
+			log.Debug("cur index ",total,isOK,tmp["_id"])
+		}
+		b,dict := fieldValidValue(tmp)
+		curID := BsonTOStringId(tmp["_id"])
+		if b {
+			source := save_mgo.FindById("result_20210108",curID)
+			if source!=nil && len(source)>2 {
+				isOK++ //符合条件-可以更新
+				pool <- true
+				wg.Add(1)
+				go func(dict map[string]interface{},curID string) {
+					defer func() {
+						<-pool
+						wg.Done()
+					}()
+					//更新方法
+					save_mgo.UpdateById("result_20210108",curID,map[string]interface{}{
+						"$set": dict,
+					})
+				}(dict,curID)
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+
+	wg.Wait()
+	time.Sleep(30*time.Second)
+	log.Debug("处理完毕over......",total,isOK)
+}
+func fieldValidValue(data map[string]interface{}) (bool,map[string]interface{}) {
+	b:=false
+	dict := make(map[string]interface{},0)
+	bidmode := qu.IntAll(data["bidmode"])
+	if bidmode == 1 {
+		b=true
+		dict["bidway"] = "纸质投标"
+	}else if bidmode == 2 {
+		b=true
+		dict["bidway"] = "电子投标"
+	}
+	getdocmethod := qu.ObjToString(data["getdocmethod"])
+	if getdocmethod !="" {
+		b=true
+		dict["getdocmethod"] = getdocmethod
+	}
+	agencyfee := qu.Float64All(data["agencyfee"])
+	if agencyfee > float64(0) {
+		b=true
+		dict["agencyfee"] = agencyfee
+	}
+	agencyrate := qu.Float64All(data["agencyrate"])
+	if agencyrate > float64(0) {
+		b=true
+		dict["agencyrate"] = agencyrate
+	}
+	docamount := qu.Float64All(data["docamount"])
+	if docamount > float64(0) {
+		b=true
+		dict["docamount"] = docamount
+	}
+	return b,dict
+}
+//导出实体数据
+func exportEntityData()  {
+	log.Debug("......导出数据")
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q:=map[string]interface{}{}
+	arr := make([]map[string]string,0)
+	total := 0
+	it := sess.DB(save_mgo.DbName).C("bidding_buyer_test").Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);total++{
+		if total%1000==0 {
+			log.Debug("cur index ",total,tmp["_id"])
+		}
+
+		detail := trimHtml(qu.ObjToString(tmp["detail"]))
+		buyer := qu.ObjToString(tmp["buyer"])
+		//buyer不能有符号
+		buyer = strings.ReplaceAll(buyer,"(","(")
+		buyer = strings.ReplaceAll(buyer,")",")")
+
+		length := utf8.RuneCountInString(detail)
+		if length > 50 && buyer!="" {
+			if length > 500 {
+				detail = string([]rune(detail)[0:500])
+			}
+			if strings.Contains(detail,buyer) {
+				arr = append(arr, map[string]string{
+					"detail":detail,
+					"buyer":buyer,
+				})
+				//log.Debug("长度:",len(detail),utf8.RuneCountInString(detail))
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("准备完毕......",len(arr))
+
+
+	maxTag :=0
+	indexArrStr := make([]map[string]string,0)
+	for _,v:=range arr{
+		detail:= v["detail"]
+		buyer:= v["buyer"]
+		reg := regexp.MustCompile(buyer)
+		indexArr := reg.FindAllStringIndex(detail,-1)
+		//log.Debug(indexArr)
+		if len(indexArr)>maxTag {
+			maxTag = len(indexArr)
+		}
+		//处理下标 [7,8],LOC   [[3 30] [48 75] [304 331]]
+		str := ""
+		for _,index := range indexArr {
+			first_index:=index[0]
+			tempStr := detail[0:first_index]
+			head,length := utf8.RuneCountInString(tempStr),utf8.RuneCountInString(buyer)
+			if str =="" {
+				str = fmt.Sprintf("[%d,%d],采购单位",head,head+length)
+			}else {
+				str = str+fmt.Sprintf(":[%d,%d],采购单位",head,head+length)
+			}
+		}
+		indexArrStr = append(indexArrStr, map[string]string{
+			"detail":string(detail),
+			"index":str,
+		})
+		//log.Debug(str)
+	}
+	os.Remove("训练模型.xlsx")
+	f :=xlsx.NewFile()
+	sheet, _ := f.AddSheet("extract")
+	row := sheet.AddRow()
+	row.AddCell().Value = "文本内容"
+	for i := 1; i <= maxTag; i++ {
+		row.AddCell().SetString(fmt.Sprintf("实体标注%d", i))
+	}
+	for _,tmp:=range indexArrStr {
+		row = sheet.AddRow()
+		row.AddCell().SetString(fmt.Sprintf("%s入库量", tmp["detail"]))
+		indexArr := strings.Split(tmp["index"], ":")
+		for _, str := range indexArr {
+			row.AddCell().SetString(fmt.Sprintf("%s", str))
+		}
+	}
+	err := f.Save("训练模型.xlsx")
+	if err != nil {
+		log.Debug("保存xlsx失败:", err)
+		return
+	}else {
+		log.Debug("保存xlsx成功:", err)
+	}
+
+}
+func trimHtml(src string) string {
+	//将HTML标签全转换成小写
+	re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")
+	src = re.ReplaceAllStringFunc(src, strings.ToLower)
+	//去除STYLE
+	re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>")
+	src = re.ReplaceAllString(src, "")
+	//去除SCRIPT
+	re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>")
+	src = re.ReplaceAllString(src, "")
+	//去除所有尖括号内的HTML代码,并换成换行符
+	re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")
+	src = re.ReplaceAllString(src, "\n")
+	//去除连续的换行符
+	re, _ = regexp.Compile("\\s{2,}")
+	src = re.ReplaceAllString(src, "\n")
+	return strings.TrimSpace(src)
+}
+func escape(s string) string {
+	news := ""
+	for _, c := range s {
+		if unicode.Is(unicode.Han, c) || unicode.IsNumber(c) || unicode.IsLetter(c) {
+			news = news + string(c)
+		}else if c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/' || c == '#' || c == '@' || c == '(' || c == ')' || c == '>' || c == '<' || c == '“' || c == '”' || c == '?' || c == '、' || c == '.' {
+			a := string([]rune{os.PathSeparator, '\\'})
+			news = news + a + string(c)
+		} else {
+			return ""
+		}
+	}
+	return news
+}
 
 func testMethod()  {
 	qu.Catch()

+ 31 - 0
data_monitoring/vps_server/src/main.go

@@ -28,6 +28,10 @@ func init()  {
 	port = sysconfig["port"].(string)
 	arr := sysconfig["vpsIDs"].([]interface{})
 	idsArr = qu.ObjArrToStringArr(arr)
+
+
+
+	updatelock.Lock()
 	dataTmp = make(map[string]map[string]interface{},0)
 	for _,v := range idsArr{
 		id := qu.ObjToString(v)
@@ -39,6 +43,7 @@ func init()  {
 			"isProMail":0,
 		}
 	}
+	updatelock.Unlock()
 
 	during = qu.Int64All(sysconfig["during"])
 	isErr = qu.Int64All(sysconfig["isErr"])
@@ -69,12 +74,38 @@ func main() {
 
 	spec :=fmt.Sprintf("30 */%d * * * ?",during)
 	//spec =fmt.Sprintf("*/%d * * * * ?",during)
+
+	spec_reset := "0 0 0 * * ?"
+
 	c := cron.New()
 	c.AddFunc(spec, func() { taskFinishing()})
+	c.AddFunc(spec_reset, func() { resetRecordData()})
 	c.Start()
+
+
+
 	time.Sleep(99999 * time.Hour)
 }
 
+func resetRecordData()  {
+	updatelock.Lock()
+	log.Println("重置数据...")
+	dataTmp = make(map[string]map[string]interface{},0)
+	for _,v := range idsArr{
+		id := qu.ObjToString(v)
+		dataTmp[id] = map[string]interface{}{
+			"isHeart":0,
+			"isErrNum":0,
+			"isProcess" : 0,
+			"isVpsMail":0,
+			"isProMail":0,
+		}
+	}
+	log.Println("重置数据...",len(dataTmp))
+	updatelock.Unlock()
+}
+
+
 func handler(w http.ResponseWriter, r *http.Request) {
 	updatelock.Lock()
 	r.ParseForm() //解析参数,默认是不会解析的

+ 4 - 1
src/jy/clear/tonumber.go

@@ -39,7 +39,7 @@ func init() {
 	regOperator, _ = regexp.Compile(`[*|+|)*)]`)
 	regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
 	regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
-	regStrJe = regexp.MustCompile(`([1-9]\d*|0)(\.\d+)?[\s|元|万|亿]{0,3}`)
+	regStrJe = regexp.MustCompile(`([1-9]\d*|0)(\.\d_+)?[\s|元|万|亿]{0,3}`)
 	regStrChar := `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
 	moneyRegChar, _ = regexp.Compile(regStrChar)
 	contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
@@ -110,8 +110,11 @@ func ChiToFloat(data []interface{}, spidercode ...string) []interface{} {
 //金额转换
 func ObjToMoney(data []interface{}, spidercode ...string) []interface{} {
 	//isfindUnit := true
+
 	tmpstr := (data)[0]
 	totmpstr := util.ObjToString(tmpstr)
+	//totmpstr = strings.ReplaceAll(totmpstr,"_","")
+	//data[0] = totmpstr
 	if kxjsReg.MatchString(totmpstr) {
 		fromString, err := decimal.NewFromString(totmpstr)
 		if err == nil {

+ 18 - 5
src/jy/extract/extract.go

@@ -657,6 +657,8 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		for _, vc1 := range tmprules {
 			for _, vc := range vc1 {
 				tmp := ju.DeepCopy(doc).(map[string]interface{})
+				if vc.Field == "budget" {
+				}
 				//是否进入逻辑
 				if !ju.Logic(vc.LuaLogic, tmp) {
 					continue
@@ -717,6 +719,9 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		}
 		//函数清理
 		for key, val := range j.Result {
+			if key =="budget" {
+				log.Debug(111)
+			}
 			for i, v := range val {
 				if v.Field == "project_duration" {
 					arr := clear.ObjToMoney([]interface{}{v.Value, j.Content}, j.SpiderCode, j.IsClearnMoney)
@@ -1949,7 +1954,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					tmp[v.Field] = v.Value
 					break
 				}
-				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget" ) && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
+				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
 					tmp[v.Field] = v.Value
 					break
 				}
@@ -2340,10 +2345,18 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 			delete(tmp, "bidamount")
 		}*/
 	}
-	//投标方式
-	if bidmode, ok := tmp["bidmode"].(int); !ok || bidmode==0 {
-		delete(tmp, "bidmode")
-	}
+	//投标方式-
+	bidway := qu.IntAll(tmp["bidway"])
+	if bidway == 1 {
+		tmp["bidway"] = "纸质投标"
+	}else if bidway == 2 {
+		tmp["bidway"] = "电子投标"
+	}else {
+		delete(tmp, "bidway")
+	}
+	//if bidmode, ok := tmp["bidmode"].(int); !ok || bidmode==0 {
+	//	delete(tmp, "bidmode")
+	//}
 	return tmp
 }
 

+ 3 - 2
src/jy/pretreated/tablev2.go

@@ -163,7 +163,8 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table, isSite bool, codeSi
 	td.tdIsHb(tr, table, bsontable, isSite, codeSite)
 	bhead := false
 	if td.TR.RowPos == 0 { //第一行
-		if utf8.RuneCountInString(td.Val) < 15 && td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
+		no_head_b, _, _, _, _ :=CheckCommon(td.Val,"normalhead")
+		if  !no_head_b && utf8.RuneCountInString(td.Val) < 15 && td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
 			bhead = true
 		}
 	}
@@ -288,7 +289,7 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR, isSite bool, codeSite, tag str
 	}
 }
 
-var isnohead *regexp.Regexp = regexp.MustCompile("(个项目|奥图码|优惠比例|下浮比例)")
+var isnohead *regexp.Regexp = regexp.MustCompile("(个项目|奥图码|优惠比例|下浮比例|印刷服务)")
 
 //对td单元格值判断是否是表头和根据td内容长度进行分块处理
 func (td *TD) tdIsHb(tr *TR, table *Table, bsontable, isSite bool, codeSite string) {

+ 0 - 3
src/main.go

@@ -80,9 +80,6 @@ func main() {
 		http.ListenAndServe("localhost:10000", nil)
 	}()
 
-
-
-
 	lock := make(chan bool)
 	<-lock
 

+ 1 - 0
src/res/tablev1.json

@@ -25,6 +25,7 @@
 		"行业",
 		"价格",
 		"注册资金",
+		"印刷服务",
 		"[\\d]+标段$__M",
 		"(\\W{2,10}(名称|参数[及]?要求))$__M"
 	],

+ 25 - 1
udpfilterdup/src/dataMethod.go

@@ -52,7 +52,10 @@ func dealWithSpecialWordNumber(info*Info,v*Info) int {
 }
 
 //关键词再次判断
-func againRepeat(v *Info, info *Info) bool {
+func againRepeat(v *Info, info *Info ,site bool) bool {
+	if isPublishtimeInterval(info.publishtime,v.publishtime) && site {
+		return true
+	}
 	if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
 		return true
 	}
@@ -228,6 +231,27 @@ func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
 	}
 }
 
+//发布时间区间为一天
+func isPublishtimeInterval(i1 int64 ,i2 int64) bool {
+	if i1==0||i2==0 {
+		return false
+	}
+	//不在同一天-或者同一天间隔超过12小时,属于不相等返回true
+	timeOne,timeTwo:=i1,i2
+	day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
+	day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
+	if day1==day2 {
+		//是否间隔超过十二小时
+		if math.Abs(float64(i1-i2)) >=43200.0 {
+			return true
+		}else {
+			return false
+		}
+	}else {
+		return true
+	}
+}
+
 //开标时间区间为一天
 func isTheSameDay(i1 int64 ,i2 int64) bool {
 	if i1==0||i2==0 {

+ 4 - 4
udpfilterdup/src/datamap.go

@@ -320,7 +320,7 @@ L:
 					//不同href
 					if info.href != "" && info.href != v.href {
 						if v.title==info.title{
-							if !againRepeat(v, info) {//进行同站点二次判断
+							if !againRepeat(v, info,true)   {//进行同站点二次判断
 								reason = "同站点-href不同-标题相同等"
 								b = true
 								source = v
@@ -330,7 +330,7 @@ L:
 								continue
 							}
 						}else {
-							if againRepeat(v, info) {
+							if againRepeat(v, info,true)  {
 								continue
 							}
 						}
@@ -340,7 +340,7 @@ L:
 				specialNum:= dealWithSpecialWordNumber(info,v)
 				//前置条件 - 标题相关,有且一个关键词
 				if specialNum==1 {
-					if againRepeat(v, info) {
+					if againRepeat(v, info,false) {
 						continue
 					}
 				}
@@ -359,7 +359,7 @@ L:
 						}
 						if letter1==letter2 {
 							reason = reason + "标题关键词相等关系"
-							if !againRepeat(v, info) {//进行二级金额判断
+							if !againRepeat(v, info,false) {//进行二级金额判断
 								b = true
 								source = v
 								reasons = reason

+ 8 - 0
udpfilterdup/src/main.go

@@ -55,6 +55,7 @@ var (
 	userName,passWord 	string				//mongo -用户密码
 
 )
+var udptask chan struct{} = make(chan struct{}, 1)
 
 
 
@@ -198,6 +199,12 @@ func mainT() {
 //upd接收
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 	fmt.Println("接受的段数据")
+
+	udptask <- struct{}{}
+	defer func() {
+		<-udptask
+	}()
+
 	switch act {
 	case mu.OP_TYPE_DATA: //上个节点的数据
 		//从表中开始处理
@@ -309,6 +316,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 							"repeat_reason": reason,
 							"repeat_id":     source.id,
 							"dataging":		 0,
+							"updatetime_repeat" :util.Int64All(time.Now().Unix()),
 						},
 					},
 				}