فهرست منبع

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

apple 5 سال پیش
والد
کامیت
6c261e22e8

+ 2 - 2
fullproject/src_v1/config.json

@@ -2,11 +2,11 @@
     "loadStart": 0,
 	"validdays":150,
     "statusdays": 7,
-	"mongodbServers": "192.168.3.207:27082",
+	"mongodbServers": "192.168.3.207:27092",
     "mongodbPoolSize": 10,
     "mongodbName": "extract_kf",
 	"hints":"publishtime_1",
-    "extractColl": "jh_info",
+    "extractColl": "december",
     "projectColl": "jh_project",
     "backupFlag": true,
     "backupColl": "jh_project1",

+ 0 - 1
fullproject/src_v1/init.go

@@ -296,7 +296,6 @@ type ProjectInfo struct {
 	score         int
 	comStr        string
 	resVal, pjVal int
-	IdStatusInfo  map[string]map[string]interface{}
 }
 
 type Site struct {

+ 150 - 105
fullproject/src_v1/project.go

@@ -430,16 +430,6 @@ var FIELDS = []string{
 	"package",
 }
 
-var bidtype = map[string]string{
-	"招标": "招标",
-	"询价": "询价",
-	"竞谈": "竞谈",
-	"单一": "单一",
-	"竞价": "竞价",
-	"变更": "变更",
-	"邀标": "邀标",
-}
-
 var bidstatus = map[string]string{
 	"预告": "预告",
 	"中标": "中标",
@@ -492,34 +482,28 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 	}
 	//projecthref保存
 	if jsonData, ok := tmp["jsondata"].(map[string]interface{}); ok {
-		if jsonData != nil && jsonData["projecthref"] != "" {
+		if jsonData != nil && qu.ObjToString(jsonData["projecthref"]) != "" {
 			set["projecthref"] = jsonData["projecthref"]
 		}
 	}
 
 	//招标类型
+	bt := qu.ObjToString(tmp["toptype"])
+	set["bidtype"] = bt
+	bs, _ := tmp["subtype"].(string)
 	p.mapBidLock.Lock()
-	bt := bidtype[thisinfo.SubType]
-	p.mapBidLock.Unlock()
-	if bt == "" {
-		bt = "招标"
+	if bidstatus[bs] != "" {
+		set["bidstatus"] = thisinfo.SubType
+	} else if tmp["infoformat"] == 2 {
+		set["bidstatus"] = "拟建"
+	} else if tmp["subytpe"] == "招标" {
+		set["bidstatus"] = thisinfo.TopType
+	} else {
+		set["bidstatus"] = "其它"
 	}
-	set["bidtype"] = bt
-	set["bidstatus"] = thisinfo.SubType
+	p.mapBidLock.Unlock()
 
 	p1, pkg := p.NewCachePinfo(pId, thisinfo, bt)
-	//招标信息是中标或者成交,保存bidstatus、budget、bidamount
-	if thisinfo.SubType == "中标" || thisinfo.SubType == "成交" {
-		p1.IdStatusInfo = map[string]map[string]interface{}{
-			thisinfo.Id: {
-				"projectname": thisinfo.ProjectName,
-				"bidstatus": thisinfo.SubType,
-				"budget": thisinfo.Budget,
-				"bidamount": thisinfo.Bidamount,
-			},
-		}
-	}
-
 	if len(thisinfo.Subscopeclass) > 0 {
 		s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
 		set["s_subscopeclass"] = s_subscopeclass
@@ -646,29 +630,24 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 		}
 	}
 	//2--lasttime
-	if thisinfo.Publishtime > pInfo.LastTime {
-		pInfo.LastTime = thisinfo.Publishtime
-		set["lasttime"] = thisinfo.Publishtime
-		p.mapBidLock.Lock()
-		defer p.mapBidLock.Unlock()
-		bt := bidtype[thisinfo.SubType]
-		if bt != "" {
-			set["bidtype"] = bt
-		}
-		bs, _ := tmp["subtype"].(string)
-		if bidstatus[bs] != "" {
-			set["bidstatus"] = thisinfo.SubType
-			if bidstatus[bs] != "预告" && bidstatus[bs] != "合同" {
-				set["jgtime"] = tmp["publishtime"]
-			}
-		} else if tmp["infoformat"] == 2 {
-			set["bidstatus"] = "拟建"
-		} else if tmp["subytpe"] == "招标" {
-			set["bidstatus"] = thisinfo.TopType
-		} else {
-			set["bidstatus"] = "其它"
+	pInfo.LastTime = thisinfo.Publishtime
+	set["lasttime"] = thisinfo.Publishtime
+	set["bidtype"] = tmp["toptype"]
+	bs, _ := tmp["subtype"].(string)
+	p.mapBidLock.Lock()
+	if bidstatus[bs] != "" {
+		set["bidstatus"] = thisinfo.SubType
+		if bidstatus[bs] != "预告" && bidstatus[bs] != "合同" {
+			set["jgtime"] = tmp["publishtime"]
 		}
+	} else if tmp["infoformat"] == 2 {
+		set["bidstatus"] = "拟建"
+	} else if tmp["subytpe"] == "招标" {
+		set["bidstatus"] = thisinfo.TopType
+	} else {
+		set["bidstatus"] = "其它"
 	}
+	p.mapBidLock.Unlock()
 
 	//废标、流标   处理时间
 	if thisinfo.SubType == "流标" || thisinfo.SubType == "废标" {
@@ -849,7 +828,7 @@ func (p *ProjectTask) CompareStatus(project *ProjectInfo, info *Info) (bool, int
 			if (info.Publishtime - project.FirstTime) > p.statusTime {
 				return true, 0
 			} else {
-				return true, 0
+				return false, 0
 			}
 		} else if project.Bidstatus == "成交" && info.SubType == "中标" {
 			return true, 0
@@ -920,8 +899,6 @@ func packageEle(map1 map[string]interface{}, id string) map[string]interface{} {
 }
 
 func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
-	budget := 0
-	bidamount := 0
 	p1 := map[string]interface{}{}
 	if project != nil && project.Package != nil && len(project.Package) > 0 {
 		p1 = project.Package
@@ -932,15 +909,6 @@ func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
 				if v2["bidstatus"] == nil {
 					v2["bidstatus"] = info.SubType
 				}
-				if isCount(project, v2[""]) {
-					
-				}
-				if v2["budget"] != nil {
-					budget = budget + v2["budget"].(float64)
-				}
-				if v2["bidamount"] != nil {
-					bidamount = bidamount + v2["bidamount"].(float64)
-				}
 				addFlag := false
 				for k1, v3 := range p1 {
 					if v4, ok := v3.([]map[string]interface{}); ok {
@@ -965,72 +933,149 @@ func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
 			if p2["bidstatus"] == nil {
 				p2["bidstatus"] = info.SubType
 			}
-			if p2["budget"] != nil {
-				budget = p2["budget"].(float64)
-			}
-			if p2["bidamount"] != nil {
-				bidamount = p2["bidamount"].(float64)
-			}
 			p1[k] = []map[string]interface{}{p2}
 		}
 	}
-	info.Budget = budget
-	info.Bidamount = bidamount
 	return p1
 }
 
 //计算预算(budget)、中标金额(bidamount)
 func CountAmount(project *ProjectInfo, info *Info) {
-	//if project!= nil && project.Package != nil && len(project.Package) > 0 {
-	////暂时未考虑太多情况,简单处理
-	//}
-
-	if info.Budget > 0 {
-		//项目中第一条招标信息是否是包/段项目
-		key := titleGetPc.FindStringSubmatch(project.ProjectName)
-		if len(key) > 0 {
-			//判断项目中是否已经计算过该包/段的预算
-			if !isCount(project, info.ProjectName) {
-				project.Budget = project.Budget + info.Budget
+	if info.HasPackage {
+		budget := 0.0
+		for _, v := range project.Package{
+			v1, _ := v.([]map[string]interface{})
+			for _, v2 := range v1{
+				b1 := qu.Float64All(v2["budget"])
+				if b1 > 0 {
+					budget = budget + b1
+					break
+				}
+			}
+		}
+		project.Budget = budget
+	}else {
+		//招标没有多包
+		k := KeyPackage.FindStringSubmatch(project.ProjectName)
+		if len(k) > 0 {
+			//招标是单包
+			if len(project.Package) > 0 {
+				//项目有多包
+				flag := false
+				for _, v := range project.Package{
+					v1, _ := v.([]map[string]interface{})
+					if len(v1) > 0 && v1[0]["name"] == info.ProjectName {
+						flag = true
+					}
+				}
+				if !flag {
+					project.Budget = project.Budget + info.Budget
+				}
+			}else {
+				//项目没有多包
+				if info.Budget > 0 {
+					project.Budget = project.Budget + info.Budget
+				}
 			}
 		}else {
+			//招标不是单包
 			if project.Budget < info.Budget {
 				project.Budget = info.Budget
 			}
 		}
 	}
-	//中标、成交、合同     处理中标金额(bidamount)
 	if info.SubType == "中标" || info.SubType == "成交" || info.SubType == "合同" {
-		if info.Bidamount > 0 {
-			key := titleGetPc.FindStringSubmatch(project.ProjectName)
-			if len(key) > 0 {
-				if !isCount(project, info.ProjectName) {
-					project.Bidamount = project.Bidamount + info.Bidamount
+		if info.HasPackage {
+			bidamount := 0.0
+			for _, v := range project.Package{
+				v1, _ := v.([]map[string]interface{})
+				for _, v2 := range v1{
+					b1 := qu.Float64All(v2["bidamount"])
+					if b1 > 0 {
+						bidamount = bidamount + b1
+						break
+					}
+				}
+			}
+			project.Bidamount = bidamount
+		}else {
+			//招标没有多包
+			k := KeyPackage.FindStringSubmatch(project.ProjectName)
+			if len(k) > 0 {
+				//招标是单包
+				if len(project.Package) > 0 {
+					//项目有多包
+					flag := false
+					for _, v := range project.Package{
+						v1, _ := v.([]map[string]interface{})
+						if len(v1) > 0 {
+							flag = true
+						}
+					}
+					if !flag {
+						project.Bidamount = project.Bidamount + info.Bidamount
+					}
+				}else {
+					//项目没有多包
+					if info.Bidamount > 0 {
+						project.Bidamount = project.Bidamount + info.Bidamount
+					}
 				}
 			}else {
-				if project.Bidamount < project.Bidamount {
+				//招标不是单包
+				if project.Bidamount < info.Bidamount {
 					project.Bidamount = info.Bidamount
 				}
 			}
 		}
 	}
 
-	//保存信息到IdStatusInfo
-	project.IdStatusInfo[info.Id] = map[string]interface{}{
-		"projectname": info.ProjectName,
-		"bidstatus": info.SubType,
-		"budget": info.Budget,
-		"bidamount": info.Bidamount,
-	}
-}
 
-func isCount(project *ProjectInfo, infoName string) bool {
-	if project.IdStatusInfo != nil && len(project.IdStatusInfo) > 0 {
-		for _, v := range project.IdStatusInfo{
-			if v["projectname"] == infoName {
-				return true
-			}
-		}
-	}
-	return false
+	//if info.Budget > 0 {
+	//	//项目中第一条招标信息是否是包/段项目
+	//	key := KeyPackage.FindStringSubmatch(project.ProjectName)
+	//	if len(key) > 0 {
+	//		//判断项目中是否已经计算过该包/段的预算
+	//		if !isCount(project, info.ProjectName) {
+	//			project.Budget = project.Budget + info.Budget
+	//		}
+	//	}else {
+	//		if project.Budget < info.Budget {
+	//			project.Budget = info.Budget
+	//		}
+	//	}
+	//}
+	////中标、成交、合同     处理中标金额(bidamount)
+	//if info.SubType == "中标" || info.SubType == "成交" || info.SubType == "合同" {
+	//	if info.Bidamount > 0 {
+	//		key := KeyPackage.FindStringSubmatch(project.ProjectName)
+	//		if len(key) > 0 {
+	//			if !isCount(project, info.ProjectName) {
+	//				project.Bidamount = project.Bidamount + info.Bidamount
+	//			}
+	//		}else {
+	//			if project.Bidamount > info.Bidamount {
+	//				project.Bidamount = info.Bidamount
+	//			}
+	//		}
+	//	}
+	//}
+	//
+	////保存信息到IdStatusInfo
+	//if project.IdStatusInfo != nil {
+	//	project.IdStatusInfo[info.Id] = map[string]interface{}{
+	//		"projectname": info.ProjectName,
+	//		"bidstatus": info.SubType,
+	//		"budget": info.Budget,
+	//		"bidamount": info.Bidamount,
+	//	}
+	//}else {
+	//	project.IdStatusInfo = map[string]map[string]interface{}{}
+	//	project.IdStatusInfo[info.Id] = map[string]interface{}{
+	//		"projectname": info.ProjectName,
+	//		"bidstatus": info.SubType,
+	//		"budget": info.Budget,
+	//		"bidamount": info.Bidamount,
+	//	}
+	//}
 }

+ 4 - 1
fullproject/src_v1/task.go

@@ -480,7 +480,7 @@ func (p *ProjectTask) CommonMerge(tmp map[string]interface{}, info *Info) {
 					p.AllIdsMapLock.Lock()
 					comparePro := p.AllIdsMap[pid].P
 					p.AllIdsMapLock.Unlock()
-					_, ex := CompareStatus(comparePro, info)
+					_, ex := p.CompareStatus(comparePro, info)
 					p.UpdateProject(tmp, info, comparePro, -1, "AAAAAAAAAA", ex)
 				} else {
 					id, p1 := p.NewProject(tmp, info)
@@ -515,6 +515,9 @@ func ParseInfo(tmp map[string]interface{}) (info *Info) {
 	if len(thisinfo.Subscopeclass) == 0 {
 		thisinfo.Subscopeclass = []string{}
 	}
+	if thisinfo.SubType == "" {
+		thisinfo.SubType = util.ObjToString(tmp["bidstatus"])
+	}
 
 	if thisinfo.Publishtime == 0 {
 		thisinfo.Publishtime = thisinfo.Comeintime

+ 31 - 31
fullproject/src_v1/update.go

@@ -68,7 +68,7 @@ func (p *ProjectTask) mergeAndModify(pInfoId string, index int, info *Info, tmp
 				//更新其它的项目
 				pro := MongoTool.FindById(ProjectColl, mergePro.Id.Hex())
 				backupPro(pro, )
-				choose, ex := CompareStatus(mergePro, info)
+				choose, ex := p.CompareStatus(mergePro, info)
 				if !choose {
 					p.UpdateProject(tmp, info, mergePro, i, comStr, ex)
 				}else {
@@ -165,7 +165,7 @@ func (p *ProjectTask) updateMerge(index int, info *Info, pInfoId string, tmp map
 			ex := 0
 			resArr := []*ProjectInfo{}
 			for _, res := range resN{
-				choose, e := CompareStatus(resN[0], info)
+				choose, e := p.CompareStatus(resN[0], info)
 				if !choose {
 					ex = e
 					resArr = append(resArr, res)
@@ -267,28 +267,20 @@ func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[st
 		}
 	}
 	//2--lasttime
-	if thisinfo.Publishtime > pInfo.LastTime {
-		pInfo.LastTime = thisinfo.Publishtime
-		set["lasttime"] = thisinfo.Publishtime
-		p.mapBidLock.Lock()
-		bt := bidtype[thisinfo.SubType]
-		p.mapBidLock.Unlock()
-		if bt != "" {
-			set["bidtype"] = bt
-		}
-		if thisinfo.SubType != "" {
-			set["bidstatus"] = thisinfo.SubType
-			if thisinfo.SubType != "预告" {
-				set["jgtime"] = thisinfo.Publishtime
-			}
-		}else if thisinfo.Infoformat == 2 {
-			set["bidstatus"] = "拟建"
-		}else if thisinfo.SubType == "招标" {
-			set["bidstatus"] = thisinfo.TopType
-		}else {
-			set["bidstatus"] = thisinfo.SubType
+	pInfo.LastTime = thisinfo.Publishtime
+	set["lasttime"] = thisinfo.Publishtime
+	set["bidtype"] = thisinfo.SubType
+	if thisinfo.SubType != "" {
+		set["bidstatus"] = thisinfo.SubType
+		if thisinfo.SubType != "预告" {
+			set["jgtime"] = thisinfo.Publishtime
 		}
-
+	}else if thisinfo.Infoformat == 2 {
+		set["bidstatus"] = "拟建"
+	}else if thisinfo.SubType == "招标" {
+		set["bidstatus"] = thisinfo.TopType
+	}else {
+		set["bidstatus"] = thisinfo.SubType
 	}
 
 	//3\4\5--省、市、县
@@ -353,14 +345,17 @@ func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[st
 		pInfo.Bidopentime = thisinfo.Bidopentime
 		set["bidopentime"] = pInfo.Bidopentime
 	}
-	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
-		pInfo.Bidamount = thisinfo.Bidamount
-		set["bidamount"] = pInfo.Bidamount
-	}
 
-	if thisinfo.Budget > 0 && pInfo.Budget < 1 {
-		pInfo.Budget = thisinfo.Budget
-		set["budget"] = pInfo.Budget
+	//废标、流标   处理时间
+	if thisinfo.SubType == "流标" || thisinfo.SubType == "废标" {
+		pInfo.FirstTime = thisinfo.Publishtime
+		pInfo.Bidopentime = int64(0)
+		pInfo.LastTime = thisinfo.Publishtime
+
+		set["firsttime"] = thisinfo.Publishtime
+		set["zbtime"] = int64(0)
+		set["publishtime"] = thisinfo.Publishtime
+		set["bidopentime"] = int64(0)
 	}
 
 	if len(thisinfo.Topscopeclass) > 0 {
@@ -399,12 +394,17 @@ func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[st
 	}
 
 	if thisinfo.HasPackage {
-		pkg, _, _ := PackageFormat(thisinfo, pInfo)
+		pkg := PackageFormat(thisinfo, pInfo)
 		set["multipackage"] = 1
 		pInfo.Package = pkg
 	}else {
 		set["multipackage"] = 0
 	}
+	//处理多包后,计算预算金额、中标金额
+	CountAmount(pInfo, thisinfo)
+	set["budget"] = pInfo.Budget
+	set["bidamount"] = pInfo.Bidamount
+
 
 	set["mpn"] = pInfo.MPN
 	set["mpc"] = pInfo.MPC

+ 3 - 3
src/config.json

@@ -2,19 +2,19 @@
     "port": "9090",
     "mgodb": "192.168.3.207:27092",
     "dbsize": 10,
-    "dbname": "extract_kf",
+    "dbname": "extract_dev32",
     "redis": "buyer=192.168.3.207:1679,winner=192.168.3.207:1679,agency=192.168.3.207:1679",
     "elasticsearch": "http://192.168.3.11:9800",
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
     "qualityaudit": false,
     "saveblock": false,
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,
-    "udptaskid": "5cdd3025698414032c8322b1",
+    "udptaskid": "5e103206234ddc34b406c5d1",
     "udpport": "1484",
     "nextNode": [
         {

+ 9 - 9
src/jy/extract/extract.go

@@ -26,13 +26,13 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 100                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
-	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 100                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
+	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
 
@@ -267,7 +267,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	tmpdocument, err := goquery.NewDocumentFromReader(strings.NewReader(tmpDeatil))
 	if err == nil {
 		conlen := utf8.RuneCountInString(strings.Trim(tmpdocument.Text(), " "))
-		if conlen < 50 {
+		if conlen < 200 {
 			if isextFile {
 				detail += qu.ObjToString(doc["detailfile"])
 				doc["detail"] = detail
@@ -1871,7 +1871,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 1 - 0
src/jy/pretreated/colonkv.go

@@ -744,6 +744,7 @@ func HasOrderContactType(text string) []string {
 //from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
 func GetKVAll(content, title string, contactFormat *ContactFormat, from int, isSite bool, codeSite string) *JobKv {
 	content = formatText(content, "kv")
+	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite,codeSite)
 	//	for _, kvs := range m1Kvs {
 	//		qutil.Debug(kvs.Key, kvs.Value)
 	//	}

+ 5 - 0
src/main.go

@@ -9,6 +9,8 @@ import (
 	_ "jy/front"
 	. "jy/router"
 	"jy/util"
+	"net/http"
+	_ "net/http/pprof"
 	qu "qfw/util"
 	//"qfw/util/elastic"
 	"qfw/util/redis"
@@ -42,6 +44,9 @@ func main() {
 	go extract.Export()
 	go Router.Run(":" + qu.ObjToString(util.Config["port"]))
 	go log.Debug("启动..", qu.ObjToString(util.Config["port"]))
+	go func() {
+		http.ListenAndServe("localhost:10000", nil)
+	}()
 	lock := make(chan bool)
 	<-lock
 }

+ 10 - 26
udpfilterdup/src/config.json

@@ -3,39 +3,23 @@
     "dupdays": 5,
     "mongodb": {
         "addr": "192.168.3.207:27092",
-        "pool": 15,
-        "db": "zhaolongyue",
-        "extract": "kedaxunfei_zhengfa_gnq",
-        "extract_copy": "a_testbidding",
-        "bidding": "bidding_126"
+        "pool": 5,
+        "db": "data_Xinxihua",
+        "extract": "20200103_fupin_data",
+        "site": {
+            "dbname": "zhaolongyue",
+            "coll": "site"
+        }
     },
     "jkmail": {
-        "to": "renzheng@topnet.net.cn",
+        "to": "zhangjinkun@topnet.net.cn",
         "api": "http://10.171.112.160:19281/_send/_mail"
     },
-    "nextNode": [
-        {
-            "addr": "127.0.0.11",
-            "port": 1482,
-            "stype": "project",
-            "memo": "合并项目"
-        },
-        {
-            "addr": "127.0.0.1",
-            "port": 1483,
-            "stype": "bidding",
-            "memo": "创建招标数据索引"
-        }
-    ],
-    "isMerger":false,
+    "nextNode": [],
+    "isMerger": false,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包)",
     "specialtitle_2": "项目([0-9a-zA-Z一二三四五六七八九十零123456789])",
-
-
     "beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"
-
-
-
 }
 

+ 1 - 11
udpfilterdup/src/datamap.go

@@ -41,8 +41,6 @@ type Info struct {
 
 var datelimit = float64(432000) //五天
 var sitelock sync.Mutex         //锁
-var reason = ""
-
 
 //判重数据
 type datamap struct {
@@ -228,8 +226,7 @@ func NewInfo(tmp map[string]interface{}) *Info {
 }
 
 //判重方法
-func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
-	reason = ""
+func (d *datamap) check(info *Info) (b bool, source *Info, reason string) {
 	keys := []string{}
 	//不同时间段
 	d.lock.Lock()
@@ -280,7 +277,6 @@ L:
 							reason = "href相同"
 							b = true
 							source = v
-							reasons = reason
 							break L
 						}
 						if info.href != "" && info.href != v.href {
@@ -304,7 +300,6 @@ L:
 							reason = "标题关键词且包含关系"
 							b = true
 							source = v
-							reasons = reason
 							break L
 						}
 					}
@@ -315,7 +310,6 @@ L:
 						if quickHeavyMethodTwo(v, info, reason) {
 							b = true
 							source = v
-							reasons = reason
 							break
 						}
 					} else {
@@ -325,7 +319,6 @@ L:
 							if quickHeavyMethodTwo(v, info, reason) {
 								b = true
 								source = v
-								reasons = reason
 								break
 							}
 						} else {
@@ -333,7 +326,6 @@ L:
 							if quickHeavyMethodOne(v, info, reason) {
 								b = true
 								source = v
-								reasons = reason
 								break
 							}
 						}
@@ -368,8 +360,6 @@ L:
 }
 
 func (h *historymap) checkHistory(info *Info) (b bool, source *Info, reasons string) {
-	reason = ""
-
 	h.lock.Lock()
 	defer h.lock.Unlock()
 	keys := []string{}

+ 46 - 19
udpfilterdup/src/main.go

@@ -11,6 +11,7 @@ import (
 	"log"
 	mu "mfw/util"
 	"net"
+	"os"
 	"qfw/util"
 	"qfw/util/mongodb"
 	"regexp"
@@ -23,15 +24,13 @@ var (
 	mconf     map[string]interface{} //mongodb配置信息
 	mgo       *mongodb.MongodbSim    //mongodb操作对象
 	//siteMgo      *mongodb.MongodbSim
-	extract      string
-	extract_copy string
-	bidding      string
-	udpclient    mu.UdpClient             //udp对象
-	nextNode     []map[string]interface{} //下节点数组
-	dupdays      = 5                      //初始化判重范围
-	DM           *datamap                 //
-	HM           *historymap              //判重数据
-	lastid       = ""
+	extract   string
+	udpclient mu.UdpClient             //udp对象
+	nextNode  []map[string]interface{} //下节点数组
+	dupdays   = 5                      //初始化判重范围
+	DM        *datamap                 //
+	HM        *historymap              //判重数据
+	lastid    = ""
 	/*
 		5da3f2c5a5cb26b9b79847fc
 	*/
@@ -42,10 +41,15 @@ var (
 
 	isMerger bool                              //是否合并
 	SiteMap  map[string]map[string]interface{} //站点map
+
+	idtype, sid, eid string //测试人员判重使用
 )
 
 func init() {
 	flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
+	flag.StringVar(&sid, "sid", "", "开始id")
+	flag.StringVar(&eid, "eid", "", "结束id")
+	flag.StringVar(&idtype, "idtype", "", "id类型,默认ObjectId:0,String:1")
 	flag.Parse()
 	//172.17.145.163:27080
 	util.ReadConfig(&Sysconfig)
@@ -57,7 +61,6 @@ func init() {
 		Size:        util.IntAllDef(mconf["pool"], 10),
 	}
 	extract = mconf["extract"].(string)
-	extract_copy = mconf["extract_copy"].(string)
 	mgo.InitPool()
 
 	//测试可以临时注释
@@ -69,13 +72,13 @@ func init() {
 	FilterRegTitle_2 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_2"]))
 	isMerger = Sysconfig["isMerger"].(bool)
 
-	//配置站点Map
+	//站点配置
+	site := mconf["site"].(map[string]interface{})
 	SiteMap = make(map[string]map[string]interface{}, 0)
 	start := int(time.Now().Unix())
-	//站点配置
 	sess_site := mgo.GetMgoConn()
 	defer sess_site.Close()
-	res_site := sess_site.DB("zhaolongyue").C("site").Find(nil).Sort("_id").Iter()
+	res_site := sess_site.DB(site["dbname"].(string)).C(site["coll"].(string)).Find(nil).Sort("_id").Iter()
 	for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
 		data_map := map[string]interface{}{
 			"area":     util.ObjToString(site_dict["area"]),
@@ -100,6 +103,21 @@ func main() {
 	time.Sleep(99999 * time.Hour)
 }
 
+//测试组人员使用
+func mainT() {
+	//568551000000000000000000,5e0f65000000000000000000
+	mapinfo := map[string]interface{}{}
+	if sid == "" || eid == "" {
+		log.Println("sid,eid参数不能为空")
+		os.Exit(0)
+	}
+	mapinfo["gtid"] = sid
+	mapinfo["lteid"] = eid
+	mapinfo["stop"] = "true"
+	task([]byte{}, mapinfo)
+	time.Sleep(5 * time.Second)
+}
+
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 
 	fmt.Println("接受的段数据")
@@ -148,13 +166,22 @@ func task(data []byte, mapInfo map[string]interface{}) {
 	//区间id
 	sess := mgo.GetMgoConn()
 	defer mgo.DestoryMongoConn(sess)
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gt":  util.StringTOBsonId(mapInfo["gtid"].(string)),
-			"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
-		},
+	var q map[string]interface{}
+	if idtype == "1" {
+		q = map[string]interface{}{
+			"_id": map[string]interface{}{
+				"$gt":  mapInfo["gtid"].(string),
+				"$lte": mapInfo["lteid"].(string),
+			},
+		}
+	} else {
+		q = map[string]interface{}{
+			"_id": map[string]interface{}{
+				"$gt":  util.StringTOBsonId(mapInfo["gtid"].(string)),
+				"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
+			},
+		}
 	}
-
 	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
 	updateExtract := [][]map[string]interface{}{}
 	pool := make(chan bool, 16)