Răsfoiți Sursa

中标单位联系人、电话优化

maxiaoshan 5 ani în urmă
părinte
comite
2648c1e722

+ 2 - 2
fullproject/src_v1/config.json

@@ -2,11 +2,11 @@
     "loadStart": 0,
 	"validdays":150,
     "statusdays": 7,
-	"mongodbServers": "192.168.3.207:27082",
+	"mongodbServers": "192.168.3.207:27092",
     "mongodbPoolSize": 10,
     "mongodbName": "extract_kf",
 	"hints":"publishtime_1",
-    "extractColl": "jh_info",
+    "extractColl": "december",
     "projectColl": "jh_project",
     "backupFlag": true,
     "backupColl": "jh_project1",

+ 0 - 1
fullproject/src_v1/init.go

@@ -296,7 +296,6 @@ type ProjectInfo struct {
 	score         int
 	comStr        string
 	resVal, pjVal int
-	IdStatusInfo  map[string]map[string]interface{}
 }
 
 type Site struct {

+ 150 - 105
fullproject/src_v1/project.go

@@ -430,16 +430,6 @@ var FIELDS = []string{
 	"package",
 }
 
-var bidtype = map[string]string{
-	"招标": "招标",
-	"询价": "询价",
-	"竞谈": "竞谈",
-	"单一": "单一",
-	"竞价": "竞价",
-	"变更": "变更",
-	"邀标": "邀标",
-}
-
 var bidstatus = map[string]string{
 	"预告": "预告",
 	"中标": "中标",
@@ -492,34 +482,28 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 	}
 	//projecthref保存
 	if jsonData, ok := tmp["jsondata"].(map[string]interface{}); ok {
-		if jsonData != nil && jsonData["projecthref"] != "" {
+		if jsonData != nil && qu.ObjToString(jsonData["projecthref"]) != "" {
 			set["projecthref"] = jsonData["projecthref"]
 		}
 	}
 
 	//招标类型
+	bt := qu.ObjToString(tmp["toptype"])
+	set["bidtype"] = bt
+	bs, _ := tmp["subtype"].(string)
 	p.mapBidLock.Lock()
-	bt := bidtype[thisinfo.SubType]
-	p.mapBidLock.Unlock()
-	if bt == "" {
-		bt = "招标"
+	if bidstatus[bs] != "" {
+		set["bidstatus"] = thisinfo.SubType
+	} else if tmp["infoformat"] == 2 {
+		set["bidstatus"] = "拟建"
+	} else if tmp["subytpe"] == "招标" {
+		set["bidstatus"] = thisinfo.TopType
+	} else {
+		set["bidstatus"] = "其它"
 	}
-	set["bidtype"] = bt
-	set["bidstatus"] = thisinfo.SubType
+	p.mapBidLock.Unlock()
 
 	p1, pkg := p.NewCachePinfo(pId, thisinfo, bt)
-	//招标信息是中标或者成交,保存bidstatus、budget、bidamount
-	if thisinfo.SubType == "中标" || thisinfo.SubType == "成交" {
-		p1.IdStatusInfo = map[string]map[string]interface{}{
-			thisinfo.Id: {
-				"projectname": thisinfo.ProjectName,
-				"bidstatus": thisinfo.SubType,
-				"budget": thisinfo.Budget,
-				"bidamount": thisinfo.Bidamount,
-			},
-		}
-	}
-
 	if len(thisinfo.Subscopeclass) > 0 {
 		s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
 		set["s_subscopeclass"] = s_subscopeclass
@@ -646,29 +630,24 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 		}
 	}
 	//2--lasttime
-	if thisinfo.Publishtime > pInfo.LastTime {
-		pInfo.LastTime = thisinfo.Publishtime
-		set["lasttime"] = thisinfo.Publishtime
-		p.mapBidLock.Lock()
-		defer p.mapBidLock.Unlock()
-		bt := bidtype[thisinfo.SubType]
-		if bt != "" {
-			set["bidtype"] = bt
-		}
-		bs, _ := tmp["subtype"].(string)
-		if bidstatus[bs] != "" {
-			set["bidstatus"] = thisinfo.SubType
-			if bidstatus[bs] != "预告" && bidstatus[bs] != "合同" {
-				set["jgtime"] = tmp["publishtime"]
-			}
-		} else if tmp["infoformat"] == 2 {
-			set["bidstatus"] = "拟建"
-		} else if tmp["subytpe"] == "招标" {
-			set["bidstatus"] = thisinfo.TopType
-		} else {
-			set["bidstatus"] = "其它"
+	pInfo.LastTime = thisinfo.Publishtime
+	set["lasttime"] = thisinfo.Publishtime
+	set["bidtype"] = tmp["toptype"]
+	bs, _ := tmp["subtype"].(string)
+	p.mapBidLock.Lock()
+	if bidstatus[bs] != "" {
+		set["bidstatus"] = thisinfo.SubType
+		if bidstatus[bs] != "预告" && bidstatus[bs] != "合同" {
+			set["jgtime"] = tmp["publishtime"]
 		}
+	} else if tmp["infoformat"] == 2 {
+		set["bidstatus"] = "拟建"
+	} else if tmp["subytpe"] == "招标" {
+		set["bidstatus"] = thisinfo.TopType
+	} else {
+		set["bidstatus"] = "其它"
 	}
+	p.mapBidLock.Unlock()
 
 	//废标、流标   处理时间
 	if thisinfo.SubType == "流标" || thisinfo.SubType == "废标" {
@@ -849,7 +828,7 @@ func (p *ProjectTask) CompareStatus(project *ProjectInfo, info *Info) (bool, int
 			if (info.Publishtime - project.FirstTime) > p.statusTime {
 				return true, 0
 			} else {
-				return true, 0
+				return false, 0
 			}
 		} else if project.Bidstatus == "成交" && info.SubType == "中标" {
 			return true, 0
@@ -920,8 +899,6 @@ func packageEle(map1 map[string]interface{}, id string) map[string]interface{} {
 }
 
 func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
-	budget := 0
-	bidamount := 0
 	p1 := map[string]interface{}{}
 	if project != nil && project.Package != nil && len(project.Package) > 0 {
 		p1 = project.Package
@@ -932,15 +909,6 @@ func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
 				if v2["bidstatus"] == nil {
 					v2["bidstatus"] = info.SubType
 				}
-				if isCount(project, v2[""]) {
-					
-				}
-				if v2["budget"] != nil {
-					budget = budget + v2["budget"].(float64)
-				}
-				if v2["bidamount"] != nil {
-					bidamount = bidamount + v2["bidamount"].(float64)
-				}
 				addFlag := false
 				for k1, v3 := range p1 {
 					if v4, ok := v3.([]map[string]interface{}); ok {
@@ -965,72 +933,149 @@ func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
 			if p2["bidstatus"] == nil {
 				p2["bidstatus"] = info.SubType
 			}
-			if p2["budget"] != nil {
-				budget = p2["budget"].(float64)
-			}
-			if p2["bidamount"] != nil {
-				bidamount = p2["bidamount"].(float64)
-			}
 			p1[k] = []map[string]interface{}{p2}
 		}
 	}
-	info.Budget = budget
-	info.Bidamount = bidamount
 	return p1
 }
 
 //计算预算(budget)、中标金额(bidamount)
 func CountAmount(project *ProjectInfo, info *Info) {
-	//if project!= nil && project.Package != nil && len(project.Package) > 0 {
-	////暂时未考虑太多情况,简单处理
-	//}
-
-	if info.Budget > 0 {
-		//项目中第一条招标信息是否是包/段项目
-		key := titleGetPc.FindStringSubmatch(project.ProjectName)
-		if len(key) > 0 {
-			//判断项目中是否已经计算过该包/段的预算
-			if !isCount(project, info.ProjectName) {
-				project.Budget = project.Budget + info.Budget
+	if info.HasPackage {
+		budget := 0.0
+		for _, v := range project.Package{
+			v1, _ := v.([]map[string]interface{})
+			for _, v2 := range v1{
+				b1 := qu.Float64All(v2["budget"])
+				if b1 > 0 {
+					budget = budget + b1
+					break
+				}
+			}
+		}
+		project.Budget = budget
+	}else {
+		//招标没有多包
+		k := KeyPackage.FindStringSubmatch(project.ProjectName)
+		if len(k) > 0 {
+			//招标是单包
+			if len(project.Package) > 0 {
+				//项目有多包
+				flag := false
+				for _, v := range project.Package{
+					v1, _ := v.([]map[string]interface{})
+					if len(v1) > 0 && v1[0]["name"] == info.ProjectName {
+						flag = true
+					}
+				}
+				if !flag {
+					project.Budget = project.Budget + info.Budget
+				}
+			}else {
+				//项目没有多包
+				if info.Budget > 0 {
+					project.Budget = project.Budget + info.Budget
+				}
 			}
 		}else {
+			//招标不是单包
 			if project.Budget < info.Budget {
 				project.Budget = info.Budget
 			}
 		}
 	}
-	//中标、成交、合同     处理中标金额(bidamount)
 	if info.SubType == "中标" || info.SubType == "成交" || info.SubType == "合同" {
-		if info.Bidamount > 0 {
-			key := titleGetPc.FindStringSubmatch(project.ProjectName)
-			if len(key) > 0 {
-				if !isCount(project, info.ProjectName) {
-					project.Bidamount = project.Bidamount + info.Bidamount
+		if info.HasPackage {
+			bidamount := 0.0
+			for _, v := range project.Package{
+				v1, _ := v.([]map[string]interface{})
+				for _, v2 := range v1{
+					b1 := qu.Float64All(v2["bidamount"])
+					if b1 > 0 {
+						bidamount = bidamount + b1
+						break
+					}
+				}
+			}
+			project.Bidamount = bidamount
+		}else {
+			//招标没有多包
+			k := KeyPackage.FindStringSubmatch(project.ProjectName)
+			if len(k) > 0 {
+				//招标是单包
+				if len(project.Package) > 0 {
+					//项目有多包
+					flag := false
+					for _, v := range project.Package{
+						v1, _ := v.([]map[string]interface{})
+						if len(v1) > 0 {
+							flag = true
+						}
+					}
+					if !flag {
+						project.Bidamount = project.Bidamount + info.Bidamount
+					}
+				}else {
+					//项目没有多包
+					if info.Bidamount > 0 {
+						project.Bidamount = project.Bidamount + info.Bidamount
+					}
 				}
 			}else {
-				if project.Bidamount < project.Bidamount {
+				//招标不是单包
+				if project.Bidamount < info.Bidamount {
 					project.Bidamount = info.Bidamount
 				}
 			}
 		}
 	}
 
-	//保存信息到IdStatusInfo
-	project.IdStatusInfo[info.Id] = map[string]interface{}{
-		"projectname": info.ProjectName,
-		"bidstatus": info.SubType,
-		"budget": info.Budget,
-		"bidamount": info.Bidamount,
-	}
-}
 
-func isCount(project *ProjectInfo, infoName string) bool {
-	if project.IdStatusInfo != nil && len(project.IdStatusInfo) > 0 {
-		for _, v := range project.IdStatusInfo{
-			if v["projectname"] == infoName {
-				return true
-			}
-		}
-	}
-	return false
+	//if info.Budget > 0 {
+	//	//项目中第一条招标信息是否是包/段项目
+	//	key := KeyPackage.FindStringSubmatch(project.ProjectName)
+	//	if len(key) > 0 {
+	//		//判断项目中是否已经计算过该包/段的预算
+	//		if !isCount(project, info.ProjectName) {
+	//			project.Budget = project.Budget + info.Budget
+	//		}
+	//	}else {
+	//		if project.Budget < info.Budget {
+	//			project.Budget = info.Budget
+	//		}
+	//	}
+	//}
+	////中标、成交、合同     处理中标金额(bidamount)
+	//if info.SubType == "中标" || info.SubType == "成交" || info.SubType == "合同" {
+	//	if info.Bidamount > 0 {
+	//		key := KeyPackage.FindStringSubmatch(project.ProjectName)
+	//		if len(key) > 0 {
+	//			if !isCount(project, info.ProjectName) {
+	//				project.Bidamount = project.Bidamount + info.Bidamount
+	//			}
+	//		}else {
+	//			if project.Bidamount > info.Bidamount {
+	//				project.Bidamount = info.Bidamount
+	//			}
+	//		}
+	//	}
+	//}
+	//
+	////保存信息到IdStatusInfo
+	//if project.IdStatusInfo != nil {
+	//	project.IdStatusInfo[info.Id] = map[string]interface{}{
+	//		"projectname": info.ProjectName,
+	//		"bidstatus": info.SubType,
+	//		"budget": info.Budget,
+	//		"bidamount": info.Bidamount,
+	//	}
+	//}else {
+	//	project.IdStatusInfo = map[string]map[string]interface{}{}
+	//	project.IdStatusInfo[info.Id] = map[string]interface{}{
+	//		"projectname": info.ProjectName,
+	//		"bidstatus": info.SubType,
+	//		"budget": info.Budget,
+	//		"bidamount": info.Bidamount,
+	//	}
+	//}
 }

+ 4 - 1
fullproject/src_v1/task.go

@@ -480,7 +480,7 @@ func (p *ProjectTask) CommonMerge(tmp map[string]interface{}, info *Info) {
 					p.AllIdsMapLock.Lock()
 					comparePro := p.AllIdsMap[pid].P
 					p.AllIdsMapLock.Unlock()
-					_, ex := CompareStatus(comparePro, info)
+					_, ex := p.CompareStatus(comparePro, info)
 					p.UpdateProject(tmp, info, comparePro, -1, "AAAAAAAAAA", ex)
 				} else {
 					id, p1 := p.NewProject(tmp, info)
@@ -515,6 +515,9 @@ func ParseInfo(tmp map[string]interface{}) (info *Info) {
 	if len(thisinfo.Subscopeclass) == 0 {
 		thisinfo.Subscopeclass = []string{}
 	}
+	if thisinfo.SubType == "" {
+		thisinfo.SubType = util.ObjToString(tmp["bidstatus"])
+	}
 
 	if thisinfo.Publishtime == 0 {
 		thisinfo.Publishtime = thisinfo.Comeintime

+ 31 - 31
fullproject/src_v1/update.go

@@ -68,7 +68,7 @@ func (p *ProjectTask) mergeAndModify(pInfoId string, index int, info *Info, tmp
 				//更新其它的项目
 				pro := MongoTool.FindById(ProjectColl, mergePro.Id.Hex())
 				backupPro(pro, )
-				choose, ex := CompareStatus(mergePro, info)
+				choose, ex := p.CompareStatus(mergePro, info)
 				if !choose {
 					p.UpdateProject(tmp, info, mergePro, i, comStr, ex)
 				}else {
@@ -165,7 +165,7 @@ func (p *ProjectTask) updateMerge(index int, info *Info, pInfoId string, tmp map
 			ex := 0
 			resArr := []*ProjectInfo{}
 			for _, res := range resN{
-				choose, e := CompareStatus(resN[0], info)
+				choose, e := p.CompareStatus(resN[0], info)
 				if !choose {
 					ex = e
 					resArr = append(resArr, res)
@@ -267,28 +267,20 @@ func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[st
 		}
 	}
 	//2--lasttime
-	if thisinfo.Publishtime > pInfo.LastTime {
-		pInfo.LastTime = thisinfo.Publishtime
-		set["lasttime"] = thisinfo.Publishtime
-		p.mapBidLock.Lock()
-		bt := bidtype[thisinfo.SubType]
-		p.mapBidLock.Unlock()
-		if bt != "" {
-			set["bidtype"] = bt
-		}
-		if thisinfo.SubType != "" {
-			set["bidstatus"] = thisinfo.SubType
-			if thisinfo.SubType != "预告" {
-				set["jgtime"] = thisinfo.Publishtime
-			}
-		}else if thisinfo.Infoformat == 2 {
-			set["bidstatus"] = "拟建"
-		}else if thisinfo.SubType == "招标" {
-			set["bidstatus"] = thisinfo.TopType
-		}else {
-			set["bidstatus"] = thisinfo.SubType
+	pInfo.LastTime = thisinfo.Publishtime
+	set["lasttime"] = thisinfo.Publishtime
+	set["bidtype"] = thisinfo.SubType
+	if thisinfo.SubType != "" {
+		set["bidstatus"] = thisinfo.SubType
+		if thisinfo.SubType != "预告" {
+			set["jgtime"] = thisinfo.Publishtime
 		}
-
+	}else if thisinfo.Infoformat == 2 {
+		set["bidstatus"] = "拟建"
+	}else if thisinfo.SubType == "招标" {
+		set["bidstatus"] = thisinfo.TopType
+	}else {
+		set["bidstatus"] = thisinfo.SubType
 	}
 
 	//3\4\5--省、市、县
@@ -353,14 +345,17 @@ func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[st
 		pInfo.Bidopentime = thisinfo.Bidopentime
 		set["bidopentime"] = pInfo.Bidopentime
 	}
-	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
-		pInfo.Bidamount = thisinfo.Bidamount
-		set["bidamount"] = pInfo.Bidamount
-	}
 
-	if thisinfo.Budget > 0 && pInfo.Budget < 1 {
-		pInfo.Budget = thisinfo.Budget
-		set["budget"] = pInfo.Budget
+	//废标、流标   处理时间
+	if thisinfo.SubType == "流标" || thisinfo.SubType == "废标" {
+		pInfo.FirstTime = thisinfo.Publishtime
+		pInfo.Bidopentime = int64(0)
+		pInfo.LastTime = thisinfo.Publishtime
+
+		set["firsttime"] = thisinfo.Publishtime
+		set["zbtime"] = int64(0)
+		set["publishtime"] = thisinfo.Publishtime
+		set["bidopentime"] = int64(0)
 	}
 
 	if len(thisinfo.Topscopeclass) > 0 {
@@ -399,12 +394,17 @@ func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[st
 	}
 
 	if thisinfo.HasPackage {
-		pkg, _, _ := PackageFormat(thisinfo, pInfo)
+		pkg := PackageFormat(thisinfo, pInfo)
 		set["multipackage"] = 1
 		pInfo.Package = pkg
 	}else {
 		set["multipackage"] = 0
 	}
+	//处理多包后,计算预算金额、中标金额
+	CountAmount(pInfo, thisinfo)
+	set["budget"] = pInfo.Budget
+	set["bidamount"] = pInfo.Bidamount
+
 
 	set["mpn"] = pInfo.MPN
 	set["mpc"] = pInfo.MPC

+ 6 - 4
src/config.json

@@ -3,18 +3,20 @@
     "mgodb": "192.168.3.207:27092",
     "dbsize": 10,
     "dbname": "extract_kf",
-    "redis": "buyer=192.168.3.207:1679,winner=192.168.3.207:1679,agency=192.168.3.207:1679",
-    "elasticsearch": "http://192.168.3.11:9800",
+    "redis": "buyer=127.0.0.1:6379,winner=127.0.0.1:6379,agency=127.0.0.1:6379",
+    "elasticsearch": "http://127.0.0.1:9200",
+    "elasticsearch_index": "extract_kf",
+    "elasticsearch_type": "enterprise_qyxy",
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
     "qualityaudit": false,
     "saveblock": false,
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,
-    "udptaskid": "5cdd3025698414032c8322b1",
+    "udptaskid": "5e103206234ddc34b406c5d1",
     "udpport": "1484",
     "nextNode": [
         {

BIN
src/gopkg.in/olivere.zip


+ 216 - 0
src/jy/admin/audit/qiyeku.go

@@ -0,0 +1,216 @@
+package audit
+
+import (
+	"context"
+	"encoding/json"
+	"github.com/gin-gonic/gin"
+	"gopkg.in/mgo.v2/bson"
+	"gopkg.in/olivere/elastic.v5"
+	. "jy/admin"
+	"jy/clear"
+	. "jy/mongodbutil"
+	"jy/util"
+	"log"
+	"strconv"
+	"strings"
+	"time"
+)
+
+/**
+企业库
+ */
+func init() {
+	//页面
+	Admin.GET("/audit/qiyeku_info", func(c *gin.Context) {
+		c.HTML(200, "qiyekuinfo.html", gin.H{})
+	})
+	//save
+	Admin.POST("/audit/qiyeku_info/save", func(c *gin.Context) {
+		_id := c.PostForm("_id")
+		company_name := c.PostForm("company_name")
+		alias := c.PostForm("alias")
+		province := c.PostForm("province")
+		city := c.PostForm("city")
+		district := c.PostForm("district")
+		capital := c.PostForm("capital")
+		company_address := c.PostForm("company_address")
+		if strings.TrimSpace(company_name) == "" {
+			c.JSON(200, gin.H{"rep": 400})
+			return
+		}
+		capitalfloat :=clear.ObjToMoney([]interface{}{capital, ""})[0]
+		e := make(map[string]interface{})
+		e["company_name"] = company_name
+		e["alias"] = alias
+		e["province"] = province
+		e["city"] = city
+		e["district"] = district
+		e["capital"] = capitalfloat
+		e["company_address"] = company_address
+		var sid string
+		if strings.TrimSpace(_id) != "" && bson.IsObjectIdHex(_id) {
+			sid = _id
+			tmpb := Mgo.Update("enterprise_qyxy", bson.M{"_id": bson.ObjectIdHex(_id)}, bson.M{"$set": bson.M{
+				"company_name":    company_name,
+				"alias":           alias,
+				"province":        province,
+				"city":            city,
+				"district":        district,
+				"capital":         capitalfloat,
+				"company_address": company_address,
+			}}, false, false)
+			if tmpb{
+				_,err := util.ElasticClient.Update().Index(util.ElasticClientIndex).Type(util.ElasticClientType).
+					Id(sid).Doc(map[string]interface{}{
+					"company_name":    company_name,
+					"alias":           alias,
+					"province":        province,
+					"city":            city,
+					"district":        district,
+					"capital":         capitalfloat,
+					"company_address": company_address,
+				}).Refresh("true").Do(context.TODO())
+				if err != nil{
+					log.Println("update qyk err:",err)
+				}
+			}
+		} else {
+			sid = Mgo.Save("enterprise_qyxy", e)
+			delete(e,"_id")
+			_,err :=util.ElasticClient.Index().Index(util.ElasticClientIndex).Type(util.ElasticClientType).Id(sid).BodyJson(e).Refresh("true").Do(context.TODO())
+			if err != nil{
+				log.Println("save qyk err:",err)
+			}
+		}
+		c.JSON(200, gin.H{"rep": 200, "saveid": sid})
+	})
+	//列表查询
+	Admin.POST("/audit/query_qyk/list", func(c *gin.Context) {
+		search, _ := c.GetPostForm("search[value]")
+		startstr, _ := c.GetPostForm("start")
+		limitstr, _ := c.GetPostForm("length")
+		start, _ := strconv.Atoi(startstr)
+		limit, _ := strconv.Atoi(limitstr)
+		if limit < 1 {
+			limit = 10
+		}
+		if search == "" {
+			c.JSON(200, gin.H{"data": []map[string]interface{}{}, "recordsFiltered": 0, "recordsTotal": 0})
+		} else {
+			//log.Println(util.ElasticClientIndex, util.ElasticClientType, search)
+			res, err := util.ElasticClient.Search(util.ElasticClientIndex).
+				Type(util.ElasticClientType).
+				Query( elastic.NewMatchPhraseQuery("company_name", search)).
+				Size(limit).
+				From(start).
+				Do(context.TODO())
+			if err != nil {
+				log.Println(err)
+				c.JSON(500, gin.H{"data": []map[string]interface{}{}, "recordsFiltered": 0, "recordsTotal": 0})
+				return
+			}
+			tmps := make([]map[string]interface{}, 0)
+			for _, v := range res.Hits.Hits {
+				tmp := make(map[string]interface{})
+				err := json.Unmarshal(*v.Source, &tmp)
+				if err != nil {
+					log.Println(err)
+					continue
+				}
+				tmp["_id"] = v.Id
+				//log.Println(tmp)
+				tmps = append(tmps, tmp)
+			}
+			//count := Mgo.Count("enterprise_qyxy", bson.M{"company_name": bson.M{"$regex": bson.RegEx{search, "i"}}})
+			//data, _ := Mgo.Find("enterprise_qyxy", bson.M{"company_name": bson.M{"$regex": bson.RegEx{search, "i"}}}, `{"_id":-1}`, nil, false, start, limit)
+			c.JSON(200, gin.H{"data": tmps, "recordsFiltered": res.Hits.TotalHits, "recordsTotal": res.Hits.TotalHits})
+		}
+	})
+	//delete
+	Admin.POST("/audit/qiyeku_info/deleteQyk", func(c *gin.Context) {
+		_id := c.PostForm("_id")
+		if bson.IsObjectIdHex(_id) {
+			delisok := Mgo.Del("enterprise_qyxy", bson.M{"_id": bson.ObjectIdHex(_id)})
+			_,err := util.ElasticClient.Delete().Index(util.ElasticClientIndex).Type(util.ElasticClientType).Id(_id).Refresh("true").Do(context.TODO())
+			if err != nil{
+				log.Println("delete qyk err:",err)
+			}
+			c.JSON(200, gin.H{"rep": 200, "data": delisok})
+		} else {
+			c.JSON(200, gin.H{"rep": 400})
+		}
+	})
+	//queryById
+	Admin.POST("/audit/query_qyk/ById", func(c *gin.Context) {
+		_id := c.PostForm("_id")
+		q_field := c.PostForm("q_field")
+		if bson.IsObjectIdHex(_id) && strings.TrimSpace(q_field) != "" {
+			data, _ := Mgo.FindById("enterprise_qyxy", _id, bson.M{q_field: 1})
+			c.JSON(200, gin.H{"rep": 200, "data": data})
+		} else {
+			c.JSON(200, gin.H{"rep": 400})
+		}
+	})
+	//updateIndustrys
+	Admin.POST("/audit/query_qyk/UpdateIndustrys", func(c *gin.Context) {
+		_id := c.PostForm("_id")
+		industrys := c.PostFormArray("industry")
+		//log.Println(_id,industrys)
+		if bson.IsObjectIdHex(_id) {
+			b := Mgo.Update("enterprise_qyxy", bson.M{"_id": bson.ObjectIdHex(_id)}, bson.M{"$set": bson.M{"industry": industrys}}, false, false)
+			if b {
+				_,err := util.ElasticClient.Update().Index(util.ElasticClientIndex).Type(util.ElasticClientType).Id(_id).Doc(map[string]interface{}{
+					"industry":industrys,
+				}).Refresh("true").Do(context.TODO())
+				if err != nil{
+					log.Println("update yqk industry err :",err)
+				}
+			}
+			c.JSON(200, gin.H{"rep": 200, "data": b})
+		} else {
+			c.JSON(200, gin.H{"rep": 400})
+		}
+	})
+	//updateTels
+	Admin.POST("/audit/query_qyk/UpdateTels", func(c *gin.Context) {
+		_id := c.PostForm("_id")
+		//log.Println(_id)
+		contact_persons := c.PostFormArray("contact_persons")
+		contact_types := c.PostFormArray("contact_types")
+		phones := c.PostFormArray("phones")
+		topscopeclasss := c.PostFormArray("topscopeclasss")
+		if bson.IsObjectIdHex(_id) && len(contact_persons) == len(contact_types) && len(phones) == len(topscopeclasss) && len(phones) == len(contact_persons) {
+			contacts := make([]map[string]interface{}, 0)
+			for _, v := range contact_persons {
+				tmp := make(map[string]interface{})
+				tmp["contact_person"] = v
+				contacts = append(contacts, tmp)
+			}
+			for i, v := range contact_types {
+				contacts[i]["contact_type"] = v
+			}
+			for i, v := range phones {
+				contacts[i]["phone"] = v
+			}
+			for i, v := range topscopeclasss {
+				contacts[i]["topscopeclass"] = v
+				contacts[i]["updatetime"] = time.Now().Unix()
+			}
+			//for k,v := range contacts{
+			//	log.Println(k,v)
+			//}
+			b := Mgo.Update("enterprise_qyxy", bson.M{"_id": bson.ObjectIdHex(_id)}, bson.M{"$set": bson.M{"contact": contacts}}, false, false)
+			if b {
+				_,err := util.ElasticClient.Update().Index(util.ElasticClientIndex).Type(util.ElasticClientType).Id(_id).Doc(map[string]interface{}{
+					"contact":contacts,
+				}).Refresh("true").Do(context.TODO())
+				if err != nil{
+					log.Println("update yqk contact err :",err)
+				}
+			}
+			c.JSON(200, gin.H{"rep": 200, "data": b})
+		} else {
+			c.JSON(200, gin.H{"rep": 400})
+		}
+	})
+}

+ 15 - 3
src/jy/extract/extract.go

@@ -32,7 +32,7 @@ var (
 	ClearTaskList map[string]*ClearTask                  //清理任务列表
 	saveLimit     = 100                                  //抽取日志批量保存
 	PageSize      = 5000                                 //查询分页
-	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
+	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
 
@@ -47,12 +47,16 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitSite()
 	ext.InitRulePres()
 	ext.InitRuleBacks(false)
+	ext.InitRuleBacks(true)
 	ext.InitRuleCore(false)
+	ext.InitRuleCore(true)
 	ext.InitPkgCore()
 	ext.InitBlockRule()
 	ext.InfoTypeList()
 	ext.InitTag(false)
+	ext.InitTag(true)
 	ext.InitClearFn(false)
+	ext.InitClearFn(true)
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		ext.InitCityInfo()
@@ -125,12 +129,16 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitSite()
 	ext.InitRulePres()
 	ext.InitRuleBacks(false)
+	ext.InitRuleBacks(true)
 	ext.InitRuleCore(false)
+	ext.InitRuleCore(true)
 	ext.InitPkgCore()
 	ext.InitBlockRule()
 	ext.InfoTypeList()
 	ext.InitTag(false)
+	ext.InitTag(true)
 	ext.InitClearFn(false)
+	ext.InitClearFn(true)
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		//ext.InitCityDFA()
@@ -259,7 +267,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	tmpdocument, err := goquery.NewDocumentFromReader(strings.NewReader(tmpDeatil))
 	if err == nil {
 		conlen := utf8.RuneCountInString(strings.Trim(tmpdocument.Text(), " "))
-		if conlen < 50 {
+		if conlen < 200 {
 			if isextFile {
 				detail += qu.ObjToString(doc["detailfile"])
 				doc["detail"] = detail
@@ -283,7 +291,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	}
 	toMap := qu.ObjToMap(doc["jsondata"])
 	//log.Debug("toMap", toMap)
-	if toMap != nil && len(*toMap) > 0 {
+	if (*toMap) != nil {
 		if (*toMap)["extweight"] == nil {
 			(*toMap)["extweight"] = ju.Config["jsondata_extweight"]
 		}
@@ -294,6 +302,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		CategorySecond: subtype,
 		Content:        qu.ObjToString(doc["detail"]),
 		SpiderCode:     qu.ObjToString(doc["spidercode"]),
+		Site:           qu.ObjToString(doc["site"]),
 		//Domain:     qu.ObjToString(doc["domain"]),
 		//Href:       qu.ObjToString(doc["href"]),
 		Title:     qu.ObjToString(doc["title"]),
@@ -311,6 +320,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 			Category:   toptype,
 			Content:    qu.ObjToString(doc["detailfile"]),
 			SpiderCode: qu.ObjToString(doc["spidercode"]),
+			Site:       qu.ObjToString(doc["site"]),
 			Title:      qu.ObjToString(doc["title"]),
 			Data:       &doc,
 			City:       qu.ObjToString(doc["city"]),
@@ -1550,6 +1560,8 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		auxinfo := auxInfo(j)
 		//从排序结果中取值
 		tmp := map[string]interface{}{} //抽取值
+		tmp["spidercode"] = j.SpiderCode
+		tmp["site"] = j.Site
 		tmp["jsondata"] = j.Jsondata
 		tmp["fieldall"] = auxinfo
 		for _, val := range result {

+ 74 - 4
src/jy/extract/score_jsondata.go

@@ -39,10 +39,80 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				}
 				continue
 			}
-
-			extFields := make([]*util.ExtField, 0)
-			extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: 0.1})
-			j.Result[v] = extFields
+			vv := strings.TrimSpace(util2.ObjToString((*j.Jsondata)[v]))
+			if vv == ""|| strings.Contains(vv,"详见公告"){
+				continue
+			}
+			lockscore.Lock()
+			scoreRule := SoreConfig[v]
+			lockscore.Unlock()
+			tmpExtField := &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: vv, Score: 0.1}
+			//1.长度打分
+			valueLen := utf8.RuneCountInString(fmt.Sprint(tmpExtField.Value))
+			if valueLen < 1 {
+				tmpExtField.Score = -5
+				continue
+			}
+			if valueLen > 100 {
+				tmpExtField.Score = -99
+			}
+			if lengths, ok := scoreRule["length"].([]interface{}); ok {
+				for _, tmp := range lengths {
+					if length, ok := tmp.(map[string]interface{}); ok {
+						if ranges, ok := length["range"].([]interface{}); ok {
+							gt := util2.IntAll(ranges[0])
+							lte := util2.IntAll(ranges[1])
+							if lte < 0 { //∞
+								lte = 999999
+							}
+							score := util2.Float64All(ranges[2])
+							if valueLen > gt && valueLen <= lte {
+								tmpExtField.Score += score
+								break
+							}
+						}
+					}
+				}
+			}
+			//2.负面词打分
+			if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
+				for _, position := range positions {
+					if p, ok := position.(map[string]interface{}); ok {
+						util2.Try(func() {
+							if p["regexp"] != nil {
+								reg := p["regexp"].(*regexp.Regexp)
+								if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
+									tmpExtField.Score += util2.Float64All(p["score"])
+								}
+							}
+						}, func(err interface{}) {
+							log.Println(err)
+						})
+					}
+				}
+			}
+			//3.正面词打分
+			if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
+				for _, position := range positions {
+					if p, ok := position.(map[string]interface{}); ok {
+						util2.Try(func() {
+							if p["regexp"] != nil {
+								reg := p["regexp"].(*regexp.Regexp)
+								if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
+									tmpExtField.Score += util2.Float64All(p["score"])
+								}
+							}
+						}, func(err interface{}) {
+							log.Println(err)
+						})
+					}
+				}
+			}
+			if tmpExtField.Score > 0{
+				extFields := make([]*util.ExtField, 0)
+				extFields = append(extFields,tmpExtField )
+				j.Result[v] = extFields
+			}
 			//AddExtLog("extract", j.SourceMid, nil, (*j.Jsondata)[v], &RegLuaInfo{  "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
 			//AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], (*j.Jsondata)[v], &RegLuaInfo{  "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
 			continue

+ 1 - 0
src/jy/pretreated/analykv.go

@@ -349,6 +349,7 @@ func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, ma
 					bf = true
 					break
 				}
+				//if !filter_zbdw_ky.MatchString(k) && filter_zbdw_ky.MatchString(m.Keys[i]) && !IsContactKvHandle(k, matchMap["中标单位"]) {
 				if from == 1 && !ContactType["中标单位"].MatchString(k) && ContactType["中标单位"].MatchString(m.Keys[i]) && !IsContactKvHandle(k, matchMap["中标单位"]) {
 					matchMap["中标单位"][k] = true
 					k = "中标单位" + k

+ 4 - 2
src/jy/pretreated/analytable.go

@@ -2117,8 +2117,10 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 					continue
 				}
 				if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
-					tn.SortKV.RemoveKey(k1)
-					tn.assemblePackage(k1, val, index[0], isSite, codeSite)
+					if tn.SortKV.Map[k1] != nil{
+						tn.SortKV.RemoveKey(k1)
+						tn.assemblePackage(k1, val, index[0], isSite, codeSite)
+					} 
 					//log.Println("remove", k1, val)
 				}
 			}

+ 3 - 1
src/jy/pretreated/colonkv.go

@@ -758,7 +758,7 @@ func HasOrderContactType(text string) []string {
 //from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
 func GetKVAll(content, title string, contactFormat *ContactFormat, from int, isSite bool, codeSite string) *JobKv {
 	content = formatText(content, "kv")
-	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from, isSite, codeSite)
+	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite,codeSite)
 	//	for _, kvs := range m1Kvs {
 	//		qutil.Debug(kvs.Key, kvs.Value)
 	//	}
@@ -882,6 +882,8 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string, isSite bool, codeSi
 						nextval += GetMoneyUnit(k, nextval)
 					}
 					kvTags[tk.Value] = append(kvTags[tk.Value], &Tag{Key: k, Value: nextval, Weight: tk.Weight})
+				}else if tk.Value == "项目名称" && nextval != ""{
+					kvTags[tk.Value] = append(kvTags[tk.Value], &Tag{Key: k, Value: nextval, Weight: tk.Weight})
 				}
 			}
 		} else {

+ 17 - 0
src/jy/util/elastic_client.go

@@ -0,0 +1,17 @@
+package util
+
+import (
+	"gopkg.in/olivere/elastic.v5"
+)
+
+var ElasticClient *elastic.Client
+var ElasticClientIndex,ElasticClientType string
+
+//func init() {
+//	if eClient, err := elastic.NewClient(elastic.SetURL(util.ObjToString(Config["elasticsearch"])));err != nil{
+//		log.Println(Config["elasticsearch"])
+//		log.Fatalln("ElasticClient err:",err)
+//	}else {
+//		ElasticClient = eClient
+//	}
+//}

+ 15 - 2
src/main.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	log "github.com/donnie4w/go-logger/logger"
 	_ "jy/admin"
 	_ "jy/admin/audit"
 	_ "jy/admin/distribution"
@@ -9,11 +10,12 @@ import (
 	_ "jy/front"
 	. "jy/router"
 	"jy/util"
+	"net/http"
+	_ "net/http/pprof"
 	qu "qfw/util"
 	//"qfw/util/elastic"
+	"gopkg.in/olivere/elastic.v5"
 	"qfw/util/redis"
-
-	log "github.com/donnie4w/go-logger/logger"
 )
 
 func init() {
@@ -34,6 +36,14 @@ func init() {
 	//初始化elastic连接
 	//"winner=172.17.145.179:2710,buyer=172.17.145.179:2711"
 	//elastic.InitElasticSize(qu.ObjToString(util.Config["elasticsearch"]), qu.IntAllDef(util.Config["elasticPoolSize"], 30))
+	if eClient, err := elastic.NewClient(elastic.SetURL(qu.ObjToString(util.Config["elasticsearch"]))); err != nil {
+		log.Debug(util.Config["elasticsearch"])
+		log.Fatal("ElasticClient err:", err)
+	} else {
+		util.ElasticClient = eClient
+		util.ElasticClientIndex = qu.ObjToString(util.Config["elasticsearch_index"])
+		util.ElasticClientType = qu.ObjToString(util.Config["elasticsearch_type"])
+	}
 }
 
 func main() {
@@ -42,6 +52,9 @@ func main() {
 	go extract.Export()
 	go Router.Run(":" + qu.ObjToString(util.Config["port"]))
 	go log.Debug("启动..", qu.ObjToString(util.Config["port"]))
+	go func() {
+		http.ListenAndServe("localhost:10000", nil)
+	}()
 	lock := make(chan bool)
 	<-lock
 }

+ 5 - 1
src/main_blocktest.go

@@ -79,11 +79,15 @@ func com(doc map[string]interface{}) {
 	e.InitSite()
 	e.InitRulePres()
 	e.InitRuleBacks(false)
+	e.InitRuleBacks(true)
 	e.InitRuleCore(false)
+	e.InitRuleCore(true)
 	e.InitBlockRule()
 	e.InitPkgCore()
 	e.InitTag(false)
+	e.InitTag(true)
 	e.InitClearFn(false)
+	e.InitClearFn(true)
 	if e.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		e.InitCityDFA()
@@ -247,7 +251,7 @@ func GetDetail(doc map[string]interface{}) (detail string) {
 	return detail
 }
 func winnerorder() {
-	text := `评审专家名单:
+	text := `评审专家名单:1
 吴殿波、韩屹、孙胜进、郑丹、李海波
  
 中标标的名称、规格型号、数量、单价、服务要求:

+ 20 - 2
src/res/fieldscore.json

@@ -75,7 +75,7 @@
             },
             {
                 "describe": "包含词",
-                "regstr": "(万元|本项目)",
+                "regstr": "(万元|本项目|详见公告)",
                 "score": -10
             },
             {
@@ -250,7 +250,13 @@
                 "score": 3
             }
         ],
-        "negativewords": [],
+        "negativewords": [
+            {
+                "describe": "不在展示",
+                "regstr": "(详见公告)",
+                "score": -10
+            }
+        ],
         "length": [
             {
                 "describe": "[gt,lte,score]",
@@ -297,6 +303,10 @@
                 "describe": "是数字",
                 "regstr": "^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$",
                 "score": -10
+            },  {
+                "describe": "包含负分",
+                "regstr": "(详见公告)",
+                "score": -10
             }
         ],
         "length": [
@@ -318,6 +328,10 @@
                 "describe": "出现中文汉字",
                 "regstr": "[\\u4e00-\\u9fa5]",
                 "score": -10
+            },  {
+                "describe": "包含负分",
+                "regstr": "(详见公告)",
+                "score": -10
             }
         ],
         "length": [
@@ -366,6 +380,10 @@
                 "describe": "是数字",
                 "regstr": "^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$",
                 "score": -10
+            },  {
+                "describe": "包含负分",
+                "regstr": "(详见公告)",
+                "score": -10
             }
         ],
         "length": [

+ 486 - 0
src/web/templates/admin/qiyekuinfo.html

@@ -0,0 +1,486 @@
+{{template "inc"}}
+<!-- Main Header -->
+{{template "header"}}
+<!-- Left side column. 权限菜单 -->
+{{template "memu"}}
+<link href="/res/other/css/other.css" rel="stylesheet" type="text/css">
+<style>
+    #dataTable_filter div {
+        padding: 0px 10px;
+    }
+
+    .operate a {
+        padding: 5px 10px;
+    }
+</style>
+<div class="content-wrapper">
+    <section class="content-header" style="padding: 49px 15px 0 15px;">
+        <h1>
+            <small><a class="btn btn-primary opraaa" opraaa="newqyk">新增企业库信息</a></small>
+        </h1>
+        <ol class="breadcrumb">
+            <li><a href="#"><i class="fa fa-dashboard"></i> 企业库增删改查</a></li>
+        </ol>
+    </section>
+    <!-- Main content -->
+    <section class="content">
+        <div class="row">
+            <div class="col-xs-12">
+                <div class="box">
+                    <div class="box-body">
+                        <table id="dataTable" class="table table-bordered table-hover">
+                            <thead>
+                            <tr>
+                                <th>企业名称</th>
+                                <th>别名</th>
+                                <th>省份</th>
+                                <th>城市</th>
+                                <th>区县</th>
+                                <th>注册资本</th>
+                                <th>企业地址</th>
+                                <th>操作</th>
+                            </tr>
+                            </thead>
+                        </table>
+                    </div>
+                    <!-- /.box-body -->
+                </div>
+                <!-- /.box -->
+            </div>
+        </div>
+    </section>
+
+</div>
+<div class="modal fade" id="info_data">
+    <div class="modal-dialog">
+        <form id="update-dataform" class="form-horizontal" role="form">
+            <div class="modal-content">
+                <div class="modal-header">
+                    <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+                        <span aria-hidden="true">&times;</span></button>
+                    <h4 class="modal-title" id="info_title"></h4>
+                </div>
+                <div class="modal-body" id="infos">
+                </div>
+            </div>
+            <!-- /.modal-content -->
+        </form>
+    </div>
+    <!-- /.modal-dialog -->
+</div>
+{{template "dialog"}}
+{{template "footer"}}
+<script>
+    menuActive("qiyeku_info")
+    var ttable = {}
+    $(function () {
+        ttable = $('#dataTable').DataTable({
+            "paging": true,
+            "lengthChange": false,
+            "searching": true,
+            "ordering": false,
+            "info": true,
+            "autoWidth": false,
+            "serverSide": true,
+            "ajax": {
+                "url": "/admin/audit/query_qyk/list",
+                "type": "post",
+            },
+            "language": {
+                "url": "/res/dist/js/dataTables.chinese.lang"
+            },
+            "columns": [
+                {
+                    "data": "company_name", "width": "18%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "alias", "width": "11%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "province", "width": "4%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "city", "width": "4%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "district", "width": "4%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "capital", "width": "6%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "company_address", "width": "17%", render: function (val, a, row) {
+                        if (val == null) {
+                            return ""
+                        }
+                        return val
+                    }
+                },
+                {
+                    "data": "_id", "width": "24%", render: function (val, a, row) {
+                        var valueStr = JSON.stringify(row);
+                        return "<div class='operate'>" +
+                            "<a class='btn btn-primary' onclick='editdata(" + valueStr + ")'>编辑</a>&nbsp;" +
+                            "<a class='btn btn-default' onclick='showHylxs(\"" + val + "\")'>详细行业类型</a>&nbsp;" +
+                            "<a class='btn btn-sm btn-warning' onclick='showTels(\"" + val + "\")'>详细联系方式</a>&nbsp;" +
+                            "<a class='btn btn-sm btn-danger' onclick='deleteQyk(\"" + val + "\")'>删除</a>&nbsp;" +
+                            "</div>"
+                    }
+                }
+            ]
+        });
+        /*新增企业库*/
+        ttable.on('init.dt', function () {
+            $(".opraaa").click(function () {
+                var n = $(this).attr("opraaa");
+                var _tit = "", htmlObj = {}, obj, tag = [];
+                switch (n) {
+                    case "newqyk":
+                        tag = [
+                            {label: "企业名称:", s_label: "s_company_name", must: true},
+                            {label: "别名:", s_label: "s_alias"},
+                            {label: "省份:", s_label: "s_province"},
+                            {label: "城市:", s_label: "s_city"},
+                            {label: "区县:", s_label: "s_district"},
+                            {label: "注册资本:", s_label: "s_capital", placeholder: "示例:45678.98元;456.78万元;1234.25;"},
+                            {label: "企业地址:", s_label: "s_company_address"},
+                        ];
+                        //新增企业库按钮
+                        htmlObj = {
+                            title: "新增企业库信息",
+                            tag: tag,
+                            bts: [
+                                {
+                                    label: "Save", class: "btn-danger",
+                                    fun: function () {
+                                        var company_name = $("#s_company_name").val();
+                                        var alias = $("#s_alias").val();
+                                        var province = $("#s_province").val();
+                                        var city = $("#s_city").val();
+                                        var district = $("#s_district").val();
+                                        var capital = $("#s_capital").val();
+                                        var company_address = $("#s_company_address").val();
+                                        if (company_name === "") {
+                                            alert("红色标签的表单不能为空!");
+                                            return
+                                        }
+                                        //新增企业库api
+                                        $.post("/admin/audit/qiyeku_info/save", {
+                                            "company_name": company_name,
+                                            "alias": alias,
+                                            "province": province,
+                                            "city": city,
+                                            "district": district,
+                                            "capital": capital,
+                                            "company_address": company_address,
+                                        }, function (data) {
+                                            if (data && data.rep) {
+                                                $("#myModal").modal("hide");
+                                                alert("保存成功");
+                                                setTimeout(function () {
+                                                    ttable.search(company_name).draw();
+                                                    //    history.go(0) //刷新本页
+                                                }, 1500) //停1.5秒
+
+                                            } else {
+                                                alert("内部错误");
+                                            }
+                                        }, 'json')
+                                    }
+                                }
+                            ]
+                        };
+                        OpenDialog(htmlObj, obj);
+                        break;
+                }
+            });
+        })
+    });
+
+    //详细行业类型
+    function showHylxs(_id) {
+        $("#infos").html("");
+        $("#info_title").html("详细行业类型");
+        $.post("/admin/audit/query_qyk/ById", {
+            "_id": _id,
+            "q_field": "industry",
+        }, function (data) {
+            if (data && data.rep) {
+                // console.log(data);
+                var tmp = "<div class='row'>";
+                for (var i in data.data.industry) {
+                    tmp = tmp +
+                        "<div class=\"col-lg-10\">\n" +
+                        "<div class=\"input-group\">\n" +
+                        "   <input type='text' class='form-control' name='industry'  placeholder=\"请填写行业类型\" value='" + data.data.industry[i] + "'>" +
+                        "   <span class=\"input-group-btn\">" +
+                        '       <button class="btn btn-info " type="button" data-toggle="tooltip" title="删除" id="delCenterIpGrp"><span class="glyphicon glyphicon-minus"></span></button>' +
+                        "   </span> " +
+                        "  </div>" +
+                        "</div>";
+                    // console.log(i, data.data.industry[i]);
+                }
+                tmp = tmp +
+                    "<div class=\"col-lg-10\">\n" +
+                    "<button class='btn btn-info col-md-3' type='button'  id='addCenterIpGrpBtn' onclick='addCenterIpGrp(this)' >新增行业类型</button> " +
+                    "<button type='button' class='btn btn-sm btn-default col-md-offset-5 col-md-2'  data-dismiss='modal' aria-label='Close'>取消</button>" +
+                    "<button type='button' class='btn btn-sm btn-danger col-md-2' onclick='updateIndustrys(\"" + _id + "\")'>保存</button>" +
+                    "</div>" +
+                    "</div>";
+                $("#infos").html(tmp);
+
+
+            } else {
+                alert("内部错误");
+            }
+        }, 'json');
+        //展示详细行业类型
+        $("#info_data").modal("show");
+    }
+
+    //添加行业详情
+    function addCenterIpGrp(obj) {
+        html = '<div class="input-group centerIp">' +
+            '<input type="text" name="industry" class="form-control" id="ipInput" placeholder="请填写行业类型">' +
+            '<span class="input-group-btn">' +
+            '<button class="btn btn-info" type="button" data-toggle="tooltip" title="删除" id="delCenterIpGrp"><span class="glyphicon glyphicon-minus"></span></button>' +
+            '</span>' +
+            '</div>'
+        obj.insertAdjacentHTML('beforebegin', html);
+    }
+
+    //更新行业详情
+    function updateIndustrys(_id) {
+        if (_id === "") {
+            alert("参数id不存在");
+            return
+        }
+        var params = $("#update-dataform").serialize();
+        // console.log(_id, params);
+        $.ajax({
+            type: "POST",
+            url: "/admin/audit/query_qyk/UpdateIndustrys",
+            data: params + "&_id=" + _id,
+            success: function (msg) {
+                $("#info_data").modal("hide");
+                if (msg.data) {
+                    alert("success");
+                } else {
+                    alert("更新失败");
+                }
+            }
+        });
+    }
+
+    $(document).on('click', '#delCenterIpGrp', function () {
+        var el = this.parentNode.parentNode;
+        if (confirm('您确定要删除选中的命令?')) {
+            el.parentNode.removeChild(el);
+        }
+    });
+
+    //详细联系方式
+    function showTels(_id) {
+        $("#infos").html("");
+        $("#info_title").html("详细联系方式");
+        $.post("/admin/audit/query_qyk/ById", {
+            "_id": _id,
+            "q_field": "contact",
+        }, function (data) {
+            if (data && data.rep) {
+                // console.log(data);
+                // var tmp = "<div class='row'>";
+                var tmp = "";
+                for (var i in data.data.contact) {
+                    tmp += "<div class='row'>" +
+                        "<div class=\"col-lg-10\">" +
+                        '<h1><div class="input-group centerIp">' +
+                        '<input type="text" name="contact_persons" class="form-control" id="ipInput" placeholder="请填写联系人" value=' + data.data.contact[i].contact_person + '>' +
+                        '<select class="form-control" name="contact_types" value=' + data.data.contact[i].contact_type + '><option value ="法定代表人">法定代表人</option><option value ="项目联系人">项目联系人</option></select>' +
+                        '<input type="text" name="phones" class="form-control" id="ipInput" placeholder="请填写联系电话" value=' + data.data.contact[i].phone + '>' +
+                        '<select class="form-control" name="topscopeclasss" value=' + data.data.contact[i].topscopeclass + '><option value ="企业公示">企业公示</option><option value ="剑鱼标讯">剑鱼标讯</option></select>' +
+                        '<span class="input-group-btn">' +
+                        '<button class="btn btn-info" type="button" data-toggle="tooltip" title="删除" id="delCenterIpGrp"><span class="glyphicon glyphicon-minus"></span></button>' +
+                        '</span>' +
+                        '</div><h1>' +
+                        '</div></div>';
+                    // console.log(i, data.data.contact[i]);
+                }
+                tmp = tmp +
+                    "<div class='row'>" +
+                    "<div class=\"col-lg-10\">" +
+                    "<button class='btn btn-info col-md-3' type='button'  id='addCenterIpGrpBtn' onclick='addCenterIpGrpTels(this)' >新增联系方式</button> " +
+                    "<button type='button' class='btn btn-sm btn-default col-md-offset-5 col-md-2'  data-dismiss='modal' aria-label='Close'>取消</button>" +
+                    "<button type='button' class='btn btn-sm btn-danger col-md-2' onclick='updateTels(\"" + _id + "\")'>保存</button>" +
+                    "</div>" +
+                    "</div>";
+                $("#infos").html(tmp);
+            } else {
+                alert("内部错误");
+            }
+        }, 'json');
+        $("#info_data").modal("show");
+    }
+
+    //添加联系方式
+    function addCenterIpGrpTels(obj) {
+        html = '<h1><div class="input-group centerIp">' +
+            '<input type="text" name="contact_persons" class="form-control" id="ipInput" placeholder="请填写联系人">' +
+            '<select class="form-control" name="contact_types"><option value ="法定代表人">法定代表人</option><option value ="项目联系人">项目联系人</option></select>' +
+            '<input type="text" name="phones" class="form-control" id="ipInput" placeholder="请填写联系电话">' +
+            '<select class="form-control" name="topscopeclasss"><option value ="企业公示">企业公示</option><option value ="剑鱼标讯">剑鱼标讯</option></select>' +
+            '<span class="input-group-btn">' +
+            '<button class="btn btn-info" type="button" data-toggle="tooltip" title="删除" id="delCenterIpGrp"><span class="glyphicon glyphicon-minus"></span></button>' +
+            '</span>' +
+            '</div><h1>'
+        obj.insertAdjacentHTML('beforebegin', html);
+    }
+
+    //更新联系方式
+    function updateTels(_id) {
+        if (_id === "") {
+            alert("参数id不存在");
+            return
+        }
+        var params = $("#update-dataform").serialize();
+        console.log(params)
+        $.ajax({
+            type: "POST",
+            url: "/admin/audit/query_qyk/UpdateTels",
+            data: params + "&_id=" + _id,
+            // contentType: "application/json; charset=utf-8",
+            dataType: "json",
+            traditional: true,
+            success: function (msg) {
+                $("#info_data").modal("hide");
+                if (msg.data) {
+                    alert("success");
+                } else {
+                    alert("更新失败");
+                }
+            }
+        });
+    }
+
+    //删除
+    function deleteQyk(_id) {
+        if (_id === "") {
+            alert("参数id为空");
+            return
+        }
+        if (confirm("您确认要提交此操作?")) {
+            $.post("/admin/audit/qiyeku_info/deleteQyk", {
+                "_id": _id,
+            }, function (data) {
+                if (data && data.rep) {
+                    setTimeout(function () {
+                        alert("删除成功");
+                        ttable.search("").draw();
+                        //    history.go(0) //刷新本页
+                    }, 800) //停1.5秒
+
+                } else {
+                    alert("内部错误");
+                }
+            }, 'json');
+            ttable.draw();
+        }
+    }
+
+    function editdata(_obj) {
+        $("#infos").html("");
+        $("#info_title").html("编辑企业库信息");
+        com = "<div class=\"row form-group\">" +
+            "<label for=\"lastname\" class=\"col-sm-2 control-label\" style=\"color: red;\">企业名称:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_company_name\" must=\"true\" value='" + _obj.company_name + "'></div>" +
+            "</div><div class=\"row form-group\">" +
+            "<label for=\"lastname\" class=\"col-sm-2 control-label\">别名:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_alias\" value='" + _obj.alias + "'></div>" +
+            "</div><div class=\"row form-group\"><label for=\"lastname\" class=\"col-sm-2 control-label\">省份:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_province\" value='" + _obj.province + "'></div>" +
+            "</div><div class=\"row form-group\"><label for=\"lastname\" class=\"col-sm-2 control-label\">城市:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_city\" value='" + _obj.city + "'></div>" +
+            "</div><div class=\"row form-group\"><label for=\"lastname\" class=\"col-sm-2 control-label\">区县:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_district\" value='" + _obj.district + "'></div>" +
+            "</div><div class=\"row form-group\"><label for=\"lastname\" class=\"col-sm-2 control-label\">注册资本:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_capital\" value='" + _obj.capital + "'  placeholder=\"示例:45678.98元;456.78万元;1234.25;\"></div>" +
+            "</div><div class=\"row form-group\"><label for=\"lastname\" class=\"col-sm-2 control-label\">企业地址:</label>" +
+            "<div class=\"col-sm-10\"><input type=\"text\" class=\"form-control\" id=\"s_company_address\" value='" + _obj.company_address + "'></div>" +
+            "</div></div>" +
+            "<div class=\"row form-group\">" +
+            "<button type='button' class='btn btn-sm btn-default col-md-offset-5 col-md-2'  data-dismiss='modal' aria-label='Close'>取消</button>" +
+            "<button type='button' class='btn btn-sm btn-danger col-md-2' onclick='updatedate(\"" + _obj._id + "\")'>更新</button></div>";
+        $("#infos").html(com);
+        $("#info_data").modal("show");
+    }
+
+    function updatedate(_id) {
+        if (_id === "") {
+            alert("id不存在!");
+            return
+        }
+        var company_name = $("#s_company_name").val();
+        var alias = $("#s_alias").val();
+        var province = $("#s_province").val();
+        var city = $("#s_city").val();
+        var district = $("#s_district").val();
+        var capital = $("#s_capital").val();
+        var company_address = $("#s_company_address").val();
+        if (company_name === "") {
+            alert("红色标签的表单不能为空!");
+            return
+        }
+        //新增企业库api
+        $.post("/admin/audit/qiyeku_info/save", {
+            "_id": _id,
+            "company_name": company_name,
+            "alias": alias,
+            "province": province,
+            "city": city,
+            "district": district,
+            "capital": capital,
+            "company_address": company_address,
+        }, function (data) {
+            if (data && data.rep) {
+                alert("更新成功");
+                $("#info_data").modal("hide");
+                setTimeout(function () {
+                    ttable.search(company_name).draw();
+                    //    history.go(0) //刷新本页
+                }, 800) //停1.5秒
+            } else {
+                alert("内部错误");
+            }
+        }, 'json')
+    }
+</script>

+ 5 - 1
udp_ocr_conter/crondata/main6.go

@@ -46,8 +46,12 @@ func main() {
 			if sid.IsZero() {
 				sid = tmp["_id"]
 			} else {
-				if !eid.IsZero(){
+				if !eid.IsZero() && eid != sid{
 					sid = eid
+				}else {
+					log.Println(sid,eid,"为空或者id一致")
+					timer.Reset(time.Minute)
+					continue
 				}
 			}
 			result2 := client.Database("qfw").Collection("bidding").FindOne(ctx, primitive.M{}, options.FindOne().SetSort(primitive.M{"_id": -1}).SetProjection(primitive.M{"_id": 1}))

+ 11 - 27
udpfilterdup/src/config.json

@@ -2,40 +2,24 @@
     "udpport": ":1485",
     "dupdays": 5,
     "mongodb": {
-        "addr": "192.168.3.207:27082",
-        "pool": 15,
-        "db": "zhaolongyue",
-        "extract": "aliyun_0102",
-        "extract_copy": "a_testbidding",
-        "bidding": "bidding_126"
+        "addr": "192.168.3.207:27092",
+        "pool": 5,
+        "db": "extract_kf",
+        "extract": "ceshi_info",
+        "site": {
+            "dbname": "zhaolongyue",
+            "coll": "site"
+        }
     },
     "jkmail": {
-        "to": "renzheng@topnet.net.cn",
+        "to": "zhangjinkun@topnet.net.cn",
         "api": "http://10.171.112.160:19281/_send/_mail"
     },
-    "nextNode": [
-        {
-            "addr": "127.0.0.11",
-            "port": 1482,
-            "stype": "project",
-            "memo": "合并项目"
-        },
-        {
-            "addr": "127.0.0.1",
-            "port": 1483,
-            "stype": "bidding",
-            "memo": "创建招标数据索引"
-        }
-    ],
-    "isMerger":false,
+    "nextNode": [],
+    "isMerger": false,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包)",
     "specialtitle_2": "项目([0-9a-zA-Z一二三四五六七八九十零123456789])",
-
-
     "beifen": "[((]?[0-9一二三四五六七八九十零123456789再][))]?[子分]?[次批标包]|重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研"
-
-
-
 }
 

Fișier diff suprimat deoarece este prea mare
+ 296 - 339
udpfilterdup/src/datamap.go


Fișier diff suprimat deoarece este prea mare
+ 307 - 292
udpfilterdup/src/main.go


+ 688 - 0
udpprojectset/src/heavy_test.go

@@ -0,0 +1,688 @@
+package main
+
+import (
+	"fmt"
+	"github.com/tealeg/xlsx"
+	"log"
+	"qfw/util"
+	"qfw/util/mongodb"
+	"testing"
+)
+
+var (
+	mgo          *mongodb.MongodbSim    //mongodb操作对象
+	//mgo_copy          *mongodb.MongodbSim    //mongodb操作对象
+)
+
+
+//分类爬虫抽取统计
+func Test_crawlerExtractitCompare(t *testing.T) {
+
+	mgo = &mongodb.MongodbSim{
+		MongodbAddr: "192.168.3.207:27092",
+		DbName:      "extract_kf",
+		Size:        util.IntAllDef(15, 10),
+	}
+	mgo.InitPool()
+
+	sess := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess)
+	it :=sess.DB("extract_kf").C("zheng_test_1").Find(nil).Sort("_id").Iter()
+	n:=0
+	crawlerMap := make(map[string]string,0)
+	for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+		if n%10000==0 {
+			log.Println("当前n:",n)
+		}
+
+		//if n>2000 {
+		//	break
+		//}
+		crawlerMap[util.BsonIdToSId(tmp["_id"])] = util.ObjToString(tmp["spidercode"])
+	}
+
+	sess_1 := mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess_1)
+	it_1 :=sess_1.DB("extract_kf").C("zheng_test1_jd1").Find(nil).Sort("_id").Iter()
+	n1:=0
+	crawlerMap_1 := make(map[string][]map[string]interface{},0)
+
+	for tmp := make(map[string]interface{});it_1.Next(&tmp);n1++{
+		if n1%10000==0 {
+			log.Println("当前n1:",n1)
+		}
+
+		//if n1>2000 {
+		//	break
+		//}
+
+		//类别
+		dic :=map[string]interface{}{
+			"_id":util.BsonIdToSId(tmp["_id"]),
+			"href":util.ObjToString(tmp["href"]),
+			"title":util.ObjToString(tmp["title"]),
+			"buyer":util.ObjToString(tmp["buyer"]),
+			"agency":util.ObjToString(tmp["agency"]),
+			"winner":util.ObjToString(tmp["winner"]),
+			"budget":util.ObjToString(tmp["budget"]),
+			"bidamount":util.ObjToString(tmp["bidamount"]),
+			"projectname":util.ObjToString(tmp["projectname"]),
+			"projectcode":util.ObjToString(tmp["projectcode"]),
+			"publishtime":util.ObjToString(tmp["publishtime"]),
+			"bidopentime":util.ObjToString(tmp["bidopentime"]),
+			"agencyaddr":util.ObjToString(tmp["agencyaddr"]),
+		}
+		value :=crawlerMap[util.BsonIdToSId(tmp["_id"])]
+		arr := crawlerMap_1[value]
+		if arr==nil {
+			crawlerMap_1[value] = make([]map[string]interface{},0)
+			crawlerMap_1[value] = append(crawlerMap_1[value],dic)
+		}else {
+			crawlerMap_1[value] = append(crawlerMap_1[value],dic)
+		}
+
+	}
+
+	sess_2 :=mgo.GetMgoConn()
+	defer mgo.DestoryMongoConn(sess_2)
+	it_2 :=sess_2.DB("extract_kf").C("zheng_test1_jd2").Find(nil).Sort("_id").Iter()
+	n2:=0
+	crawlerMap_2 := make(map[string][]map[string]interface{})
+	for tmp := make(map[string]interface{}); it_2.Next(&tmp); n2++ {
+		if n2%10000==0 {
+			log.Println("当前n2:",n2)
+		}
+
+		//if n2>1000 {
+		//	break
+		//}
+
+		//类别
+		dic :=map[string]interface{}{
+			"_id":util.BsonIdToSId(tmp["_id"]),
+			"href":util.ObjToString(tmp["href"]),
+			"buyer":util.ObjToString(tmp["buyer"]),
+			"agency":util.ObjToString(tmp["agency"]),
+			"winner":util.ObjToString(tmp["winner"]),
+			"budget":util.ObjToString(tmp["budget"]),
+			"bidamount":util.ObjToString(tmp["bidamount"]),
+			"projectname":util.ObjToString(tmp["projectname"]),
+			"projectcode":util.ObjToString(tmp["projectcode"]),
+		}
+		value :=crawlerMap[util.BsonIdToSId(tmp["_id"])]
+		arr := crawlerMap_2[value]
+		if arr==nil {
+			crawlerMap_2[value] = make([]map[string]interface{},0)
+			crawlerMap_2[value] = append(crawlerMap_2[value],dic)
+		}else {
+			crawlerMap_2[value] = append(crawlerMap_2[value],dic)
+		}
+	}
+
+	log.Println("爬虫类个数分别为:",len(crawlerMap_1),len(crawlerMap_2))
+
+
+
+
+	if len(crawlerMap_1)!=len(crawlerMap_2)||len(crawlerMap_1)==0 {
+		return
+	}
+	var list = []string{
+		"buyer",
+		"agency",
+		"winner",
+		"budget",
+		"bidamount",
+		"projectname",
+		"projectcode",
+	}
+
+	var crawlerArr = []string{
+		"a_zgzfcgw_zfcghtgg_new",
+		"gd_gdszfcgw_dscght",
+		"a_zgzfcgw_bid_tender_new",
+		"a_ztxygjzbtbzxyxgs_zbxx",
+		"sd_zgsdzfcgw_xxgk_sxhtgk",
+	}
+
+
+	//数量统计
+	AnaNumMap :=map[string]map[string][]int{
+		"a_zgzfcgw_zfcghtgg_new": map[string][]int{"same":[]int{0,0,0,0,0,0,0},"diff":[]int{0,0,0,0,0,0,0},"total":[]int{0}},
+		"gd_gdszfcgw_dscght": map[string][]int{"same":[]int{0,0,0,0,0,0,0},"diff":[]int{0,0,0,0,0,0,0},"total":[]int{0}},
+		"a_zgzfcgw_bid_tender_new": map[string][]int{"same":[]int{0,0,0,0,0,0,0},"diff":[]int{0,0,0,0,0,0,0},"total":[]int{0}},
+		"a_ztxygjzbtbzxyxgs_zbxx": map[string][]int{"same":[]int{0,0,0,0,0,0,0},"diff":[]int{0,0,0,0,0,0,0},"total":[]int{0}},
+		"sd_zgsdzfcgw_xxgk_sxhtgk": map[string][]int{"same":[]int{0,0,0,0,0,0,0},"diff":[]int{0,0,0,0,0,0,0},"total":[]int{0}},
+	}
+
+	fmt.Println(len(AnaNumMap))
+	//-对比数据
+	for _,v:=range crawlerArr {
+		if crawlerMap_1[v]==nil||crawlerMap_2[v]==nil {
+			continue
+		}
+		//取数组数据
+		arr_1 := crawlerMap_1[v]
+		arr_2 := crawlerMap_2[v]
+
+
+		log.Println("数据总量:",len(arr_1))
+		AnaNumMap[v]["total"][0] = len(arr_1)
+		f :=xlsx.NewFile()
+		//创建7个表格
+		for i:=0;i<len(list) ;i++  {
+			isTitle :=false
+			row:=0
+			for j:=0;j<len(arr_1);j++ {
+				string_1 := fmt.Sprint(arr_1[j][list[i]])
+				string_2 := fmt.Sprint(arr_2[j][list[i]])
+				if string_1!=string_2 {
+					if !isTitle{
+						sheet, _ := f.AddSheet(list[i])
+						sheet.Cell(row, 0).Value = "_id"
+						sheet.Cell(row, 1).Value = "href"
+						sheet.Cell(row, 2).Value = fmt.Sprint(list[i])+"_V1"
+						sheet.Cell(row, 3).Value = fmt.Sprint(list[i])+"_V2"
+						isTitle = true
+						row++
+					}
+					sheet :=f.Sheet[list[i]]
+					sheet.Cell(row, 0).Value = util.BsonIdToSId(arr_1[j]["_id"])
+					sheet.Cell(row, 1).Value = util.ObjToString(arr_1[j]["href"])
+					sheet.Cell(row, 2).Value = string_1
+					sheet.Cell(row, 3).Value = string_2
+					row++
+					AnaNumMap[v]["diff"][i] = AnaNumMap[v]["diff"][i]+1
+				}else {
+					AnaNumMap[v]["same"][i] = AnaNumMap[v]["same"][i]+1
+				}
+			}
+		}
+
+		path:="zk_"+v+".xlsx"
+		error := f.Save(path)
+		if error != nil {
+			log.Println("保存xlsx失败:", error)
+			return
+		}
+	}
+
+
+
+	f :=xlsx.NewFile()
+	sheet, _ := f.AddSheet("摘要")
+	for i:=0;i<len(list) ;i++ {
+		sheet.Cell(1, i+3).Value = list[i]
+	}
+
+	for i:=0;i<len(crawlerArr) ;i++ {
+		sheet.Cell(i+2, 0).Value = crawlerArr[i]
+		total:= fmt.Sprint(AnaNumMap[crawlerArr[i]]["total"][0])
+		sheet.Cell(i+2, 1).Value = total
+		same:=AnaNumMap[crawlerArr[i]]["same"]
+		diff:=AnaNumMap[crawlerArr[i]]["diff"]
+		for j:=0;j<len(same) ;j++  {
+			sheet.Cell(i+2, j+3).Value = fmt.Sprint(same[j])+"~"+fmt.Sprint(diff[j])
+		}
+
+	}
+	sheet.Cell(1, 0).Value = "爬虫代码"
+	sheet.Cell(1, 1).Value = "数据总量"
+	sheet.Cell(1, 2).Value = "相同字段对比"
+	sheet.Cell(2, 2).Value = "相同数量~不同数量"
+
+
+
+	fmt.Println(AnaNumMap)
+
+
+
+	path:="摘要"+".xlsx"
+	error := f.Save(path)
+	if error != nil {
+		log.Println("保存xlsx失败:", error)
+	}
+
+}
+
+
+
+//对比判重区别
+//func Test_heavy(t *testing.T) {
+
+	//mapinfo := map[string]interface{}{
+	//	"gtid":  "586b6d7061a0721f15b8f264",
+	//	"lteid": "5e0b2b780cf41612e0639460",
+	//}
+	//task([]byte{}, mapinfo)
+
+	//extract,extract_copy:="a_testbidding_new","a_testbidding"
+	//
+	//sess := mgo.GetMgoConn()
+	//defer mgo.DestoryMongoConn(sess)
+	//res_copy := sess.DB("extract_kf").C(extract_copy).Find(nil).Iter()
+	//
+	//m1 :=map[string]int{} //老版本
+	//m2 :=map[string]int{} //新版本
+	//
+	//i:=0
+	//j:=0
+	//for v1 := make(map[string]interface{}); res_copy.Next(&v1); i++{
+	//	if i%2000==0 {
+	//		log.Println("当前i:",i)
+	//	}
+	//	m1[(v1["_id"].(bson.ObjectId).Hex())]= util.IntAll(v1["repeat"])
+	//}
+	//
+	//sesss := mgo.GetMgoConn()
+	//defer mgo.DestoryMongoConn(sesss)
+	//res := sesss.DB("extract_kf").C(extract).Find(nil).Iter()
+	//
+	//
+	//for v2 := make(map[string]interface{}); res.Next(&v2); j++{
+	//	if j%2000==0 {
+	//		log.Println("当前j:",j)
+	//	}
+	//	m2[(v2["_id"].(bson.ObjectId).Hex())]= util.IntAll(v2["repeat"])
+	//}
+	//
+	//fmt.Println(len(m1),len(m2))
+	//n1:=0
+	//n2:=0
+	//n3:=0
+	//n4:=0
+	//n5:=0
+	//n6:=0
+	//
+	//var arr1 []string
+	//var arr2 []string
+	//for k,v:=range m1{
+	//
+	//	if m2[k]==1&&v==0{//0:1
+	//		n1++
+	//		arr2 = append(arr2,fmt.Sprintf("目标_id:%s",k))
+	//	}
+	//	if m2[k]==0&&v==1{ //1:0
+	//		n2++
+	//		arr1 = append(arr1,fmt.Sprintf("目标_id:%s",k))
+	//	}
+	//	if m2[k]==0&&v==0{ //0:0
+	//		n3++
+	//	}
+	//	if m2[k]==1&&v==1{//1:1
+	//		n4++
+	//	}
+	//	if m2[k]==-1&&v==0{ //0:-1
+	//		n5++
+	//	}
+	//	if m2[k]==-1&&v==1{//1:-1
+	//		n6++
+	//	}
+	//
+	//}
+	////打印 1:0情况    ;
+	//mm:=0
+	//for _,v:=range arr1 {
+	//	mm++
+	//	if mm%200==0 {
+	//		log.Println(v)
+	//	}
+	//}
+	//
+	//log.Println("分割线---------------")
+	//log.Println("分割线---------------")
+	//
+	//
+	////打印 0:1情况
+	//nn:=0
+	//for _,v:=range arr2 {
+	//	nn++
+	//	if nn%200==0 {
+	//		log.Println(v)
+	//	}
+	//}
+	//
+	//log.Println("V1 0:1---",n1)
+	//log.Println("V1 1:0---",n2)
+	//log.Println("V1 0:0---",n3)
+	//log.Println("V1 1:1---",n4)
+	//log.Println("V1 0:-1---",n5)
+	//log.Println("V1 1:-1---",n6)
+//}
+
+//糅合数据
+//func Test_specifiedField(t *testing.T) {
+
+	//mgo = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27081",
+	//	DbName:      "qfw",
+	//	Size:        util.IntAllDef(15, 10),
+	//}
+	//mgo.InitPool()
+	//
+	//mgo_copy = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27092",
+	//	DbName:      "extract_kf",
+	//	Size:        util.IntAllDef(15, 10),
+	//}
+	//mgo_copy.InitPool()
+	//
+	//
+	////固定死的需要分析的字段
+	//field_map := map[string]string{
+	//	"title":"1",
+	//	"area":"1",
+	//	"city":"1",
+	//	"subtype":"1",
+	//	"buyer":"1",
+	//	"agency":"1",
+	//	"winner":"1",
+	//	"budget":"1",
+	//	"bidamount":"1",
+	//	"projectname":"1",
+	//	"projectcode":"1",
+	//	"publishtime":"1",
+	//	"comeintime":"1",
+	//	"bidopentime":"1",
+	//	"agencyaddr":"1",
+	//	"site":"1",
+	//	"href":"1",
+	//}
+	//
+	//
+	//sess := mgo.GetMgoConn()
+	//defer mgo.DestoryMongoConn(sess)
+	//
+	//sess_1 :=mgo_copy.GetMgoConn()
+	//defer mgo_copy.DestoryMongoConn(sess_1)
+	//
+	//sess_2 :=mgo_copy.GetMgoConn()
+	//defer mgo_copy.DestoryMongoConn(sess_2)
+	//
+	//
+	//it := sess.DB(mgo.DbName).C("bidding").Find(nil).Sort("-_id").Iter()
+	//it_1 :=sess_1.DB("extract_kf").C("zheng_test_1")
+	//it_2 :=sess_2.DB("extract_kf").C("zheng_test_2")
+	//n:=0
+	//for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+	//	if n%10000==0 {
+	//		log.Println("当前n:",n)
+	//	}
+	//	if n>1000000 { //约半月数据
+	//		break
+	//	}
+	//	if tmp["spidercode"]=="a_zgzfcgw_zfcghtgg_new"|| tmp["spidercode"]=="gd_gdszfcgw_dscght"||
+	//		tmp["spidercode"]=="a_zgzfcgw_bid_tender_new"||tmp["spidercode"]=="a_ztxygjzbtbzxyxgs_zbxx"||
+	//		tmp["spidercode"]=="sd_zgsdzfcgw_xxgk_sxhtgk"{
+	//		jsonData := util.ObjToMap(tmp["jsondata"])
+	//		if jsonData!=nil {
+	//			for k,v :=range *jsonData  {
+	//				if fmt.Sprint(v) !=""{
+	//					if field_map[k]=="1" {
+	//						it_1.Insert(tmp)
+	//						it_2.Insert(tmp)
+	//						break
+	//					}
+	//				}
+	//			}
+	//		}
+	//	}
+	//}
+	//log.Println("总计",n,"条数据")
+
+//}
+
+
+//统计字段
+//func Test_field(t *testing.T) {
+
+	//mgo = &mongodb.MongodbSim{
+	//	MongodbAddr: "192.168.3.207:27081",
+	//	DbName:      "qfw",
+	//	Size:        util.IntAllDef(15, 10),
+	//}
+	//mgo.InitPool()
+	//
+	////调试 - 导出数据
+	////1:已抽取字段为准,统计对应爬虫字段存在个数,出个结果表格统计(前100名)
+	////2:人工抽查数据质量,用于jsondata权重评估
+	//
+	////取 固有字段 1-为存在
+	////now := int64(time.Now().Unix())
+	////date_time := int64(86400*2)
+	//
+	////field_map := make(map[string]string,0)
+	////sess_field := mgo.GetMgoConn()
+	////defer sess_field.Close()
+	////res_field := sess_field.DB("extract_kf").C("fields").Find(nil).Sort("_id").Iter()
+	////for dict := make(map[string]interface{}); res_field.Next(&dict); {
+	////	field_map[dict["s_field"].(string)] = "1"
+	////}
+	//
+	////固定死的需要分析的字段
+	//field_map := map[string]string{
+	//	"title":"1",
+	//	"area":"1",
+	//	"city":"1",
+	//	"subtype":"1",
+	//	"buyer":"1",
+	//	"agency":"1",
+	//	"winner":"1",
+	//	"budget":"1",
+	//	"bidamount":"1",
+	//	"projectname":"1",
+	//	"projectcode":"1",
+	//	"publishtime":"1",
+	//	"comeintime":"1",
+	//	"bidopentime":"1",
+	//	"agencyaddr":"1",
+	//	"site":"1",
+	//	"href":"1",
+	//}
+	//
+	///*	ObjectId("5da3f2c5a5cb26b9b79847fc") 0
+	//	ObjectId("5da3fd6da5cb26b9b7a8683c") 5000
+	//	ObjectId("5da40bdaa5cb26b9b7bea472") 10000
+	//	ObjectId("5da44deaa5cb26b9b75efb38") 50000
+	//	ObjectId("5da53440a5cb26b9b7d3f9aa") 100000
+	//	ObjectId("5db2735ba5cb26b9b7c99c6f") 761414
+	//*/
+	//
+	///*
+	//qfw-bidding
+	//
+	//ObjectId("5e0d4cdd0cf41612e063fc65")  -1
+	//ObjectId("5df8bfe4e9d1f601e4e87431") 一百万
+	//ObjectId("5dea080ce9d1f601e45cb838") 二百万
+	//
+	//5df834dd // 半月         大约100万条
+	//
+	//*/
+	//sess := mgo.GetMgoConn()
+	//defer mgo.DestoryMongoConn(sess)
+	////q := map[string]interface{}{
+	////	"_id": map[string]interface{}{
+	////		"$gt":  util.StringTOBsonId("5dea080ce9d1f601e45cb838"),
+	////		"$lte": util.StringTOBsonId("5e0d4cdd0cf41612e063fc65"),
+	////	},
+	////}
+	//it := sess.DB(mgo.DbName).C("bidding").Find(nil).Sort("-_id").Iter()
+	//
+	////爬虫组
+	//crawlerMap,n := make(map[string]map[string]interface{},0),0
+	//
+	//for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
+	//	if n%10000==0 {
+	//		log.Println("当前n:",n)
+	//	}
+	//
+	//	if n>3000000 {
+	//		break
+	//	}
+	//
+	//	if tmp["spidercode"]!="" {
+	//		//判断是否有此类别分组
+	//		dict := make(map[string]interface{},0)
+	//		if crawlerMap[tmp["spidercode"].(string)]!= nil {
+	//			dict = crawlerMap[tmp["spidercode"].(string)]
+	//		}
+	//		jsonData := util.ObjToMap(tmp["jsondata"])
+	//
+	//		if jsonData!=nil {
+	//			for k,v :=range *jsonData  {
+	//				if fmt.Sprint(v) ==""{
+	//					//无效数据
+	//				}else {
+	//					if field_map[k]=="1" {
+	//						arr := dict[k]
+	//						if arr==nil {
+	//							dict[k] = make([]string,0)
+	//							dict[k] = append(dict[k].([]string),fmt.Sprint(v))
+	//						}else {
+	//							dict[k] = append(dict[k].([]string),fmt.Sprint(v))
+	//						}
+	//					}
+	//				}
+	//			}
+	//		}
+	//		if dict!=nil {
+	//			crawlerMap[tmp["spidercode"].(string)] = dict
+	//		}
+	//	}
+	//}
+	//
+	//log.Println("总计",n,"条数据")
+	//log.Println("爬虫类别个数:",len(crawlerMap))
+	//
+	//
+	////计算每个爬虫分类的总数-并添加
+	//
+	////ObjectId("5e0d4cdd0cf41612e063fc65")
+	//arr :=make([]map[string]interface{},0)
+	//for k,v :=range crawlerMap  {
+	//	total :=0
+	//	for _,v1 :=range v {
+	//		total =total + len(v1.([]string))
+	//	}
+	//	v["total"]= total
+	//	v["key"] = k
+	//	arr = append(arr,v)
+	//}
+	//
+	//
+	////爬虫类别下-有效字段总数排列 前100
+	//start := time.Now().Unix()
+	//quickSort(0,len(arr)-1,&arr)
+	//end :=time.Now().Unix()
+	//fmt.Println("耗时:",end-start,"秒")
+	//
+	//f :=xlsx.NewFile()
+	//sheet, _ := f.AddSheet("排序")
+	//
+	////第一行先写标题
+	//row1 := sheet.AddRow()
+	//row1.AddCell().Value = "排名"
+	//row1.AddCell().Value = "爬虫类"
+	//row1.AddCell().Value = "字段有效数"
+	//
+	//mapLock := &sync.Mutex{}
+	//limit :=0
+	//for _,v :=range arr  {
+	//	limit++
+	//	row := sheet.AddRow()
+	//	row.AddCell().SetInt(limit)
+	//	row.AddCell().SetString(v["key"].(string))
+	//	row.AddCell().SetInt(v["total"].(int))
+	//
+	//	if limit <=20 {
+	//		mapLock.Lock()
+	//		sheetName := "排名"+util.ObjToString(limit)+":"+util.ObjToString(v["key"])
+	//		sheet_detail, err := f.AddSheet(sheetName)
+	//		if err==nil {
+	//			row_num,col_num :=0,0
+	//			for k1,v1 := range v {
+	//				if a,ok :=v1.([]string);ok {
+	//					for k2, v2 := range a {
+	//						if k2==0 {
+	//							sheet_detail.Cell(row_num, col_num).Value = util.ObjToString(k1)
+	//							row_num++
+	//							sheet_detail.Cell(row_num, col_num).Value = v2
+	//						}else {
+	//							if row_num>2000 {
+	//								continue
+	//							}
+	//							sheet_detail.Cell(row_num, col_num).Value = v2
+	//						}
+	//						row_num++
+	//					}
+	//					row_num = 0
+	//					col_num++
+	//				}
+	//			}
+	//		}
+	//
+	//		mapLock.Unlock()
+	//	}
+	//
+	//
+	//
+	//	if limit >99{
+	//		break
+	//	}
+	//}
+	//
+	//
+	//err := f.Save("zheng.xlsx")
+	//if err != nil {
+	//	log.Println("保存xlsx失败:", err)
+	//	return
+	//}
+	//log.Println("xlsx保存成功")
+//}
+
+
+func quickSort(left int,right int ,array *[]map[string]interface{}) {
+
+	l:=left
+	r:=right
+
+	pivot := util.IntAll((*array)[(left+right)/2]["total"])//中轴
+	//for 的目标  将比pivot小的左边 反之右边
+	for ;l<r;{
+		//左半区找到大于等于pivot的数
+		for ;util.IntAll((*array)[l]["total"]) > pivot; {
+			l++
+		}
+		//右半区找到小于等于pivot的数
+		for ;util.IntAll((*array)[r]["total"])<pivot; {
+			r--
+		}
+		//本次分解任务完成
+		if l>=r {
+			break
+		}
+
+		(*array)[l],(*array)[r] = (*array)[r],(*array)[l]
+		//优化相等的情况
+		if util.IntAll((*array)[l]["total"]) == pivot {
+			r--
+		}
+		if util.IntAll((*array)[r]["total"]) == pivot {
+			l++
+		}
+
+	}
+	if l==r {
+		l++
+		r--
+	}
+	//向左递归
+	if left<r {
+		quickSort(left,r,array)
+	}
+	//向右递归
+	if right>l {
+		quickSort(l,right,array)
+	}
+
+}

BIN
udpprojectset/src/zheng.xlsx


+ 4 - 0
udps/main.go

@@ -25,7 +25,11 @@ func main() {
 
 
 	/*
+	ObjectId("5da3f2c5a5cb26b9b79847fc")
+	ObjectId("5db2735ba5cb26b9b7c99c6f")
 
+	5da3f2c5a5cb26b9b79847fc
+	5db2735ba5cb26b9b7c99c6f
 	*/
 	flag.StringVar(&sid, "sid", "", "开始id")
 	flag.StringVar(&eid, "eid", "", "结束id")

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff