Selaa lähdekoodia

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

fengweiqiang 5 vuotta sitten
vanhempi
commit
64771f010c

+ 12 - 12
fullproject/src_v1/config.json

@@ -1,22 +1,22 @@
 {
-    "loadStart":-1,
+    "loadStart": 0,
 	"validdays":150,
+    "statusdays": 7,
 	"mongodbServers": "192.168.3.207:27082",
     "mongodbPoolSize": 10,
-    "mongodbName": "cesuo",
-	"hints":"_id_1_publishtime_1",
-    "extractColl": "key1_biddingall",
-    "projectColl": "projectset_zjk",
+    "mongodbName": "extract_kf",
+	"hints":"publishtime_1",
+    "extractColl": "jh_info",
+    "projectColl": "jh_project",
+    "backupFlag": true,
+    "backupColl": "jh_project1",
+    "siteColl": "site",
+
     "jkmail": {
-        "to": "zhangjinkun@topnet.net.cn",
+        "to": "wangjianghan@topnet.net.cn",
         "api": "http://10.171.112.160:19281/_send/_mail"
     },
-    "udpport": ":1482",
+    "udpport": ":1182",
     "nextNode": [
-        {
-            "addr": "172.17.145.163",
-            "port": 1483,
-            "memo": "创建项目索引new"
-        }
     ]
 }

+ 17 - 5
fullproject/src_v1/init.go

@@ -5,14 +5,11 @@ import (
 	"math"
 	mu "mfw/util"
 	"qfw/util"
-	//"qfw/util/mongodb"
 	"regexp"
 	"sort"
 	"strings"
 	"sync"
 
-	//"gopkg.in/mgo.v2/bson"
-	//"go.mongodb.org/mongo-driver/bson"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 )
 
@@ -23,7 +20,7 @@ const (
 var (
 	Sysconfig                map[string]interface{} //读取配置文件
 	MongoTool                *MongodbSim            //mongodb连接
-	ExtractColl, ProjectColl string                 //抽取表、项目
+	ExtractColl, ProjectColl, BackupColl, SiteColl string	//抽取表、项目表、项目快照表、站点
 	//NextNode                 []interface{}
 )
 
@@ -65,6 +62,8 @@ func init() {
 
 	ExtractColl = Sysconfig["extractColl"].(string)
 	ProjectColl = Sysconfig["projectColl"].(string)
+	BackupColl = Sysconfig["backupColl"].(string)
+	SiteColl = Sysconfig["siteColl"].(string)
 	//NextNode = Sysconfig["nextNode"].([]interface{})
 	udpport, _ := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
@@ -201,7 +200,6 @@ type KeyMap struct {
 type ID struct {
 	Id   string
 	Lock sync.Mutex
-	pos  int
 	P    *ProjectInfo
 }
 type Key struct {
@@ -225,6 +223,7 @@ type Info struct {
 	Id          string                 `json:"_id"`
 	Href        string                 `json:"href"` //源地址
 	Publishtime int64                  `json:"publishtime"`
+	Comeintime  int64				   `json:"comeintime"`
 	Title       string                 `json:"title"`
 	TopType     string                 `json:"toptype"`
 	SubType     string                 `json:"subtype"`
@@ -237,6 +236,7 @@ type Info struct {
 	Area        string                 `json:"area"`
 	City        string                 `json:"city"`
 	District    string                 `json:"district"`
+	Infoformat  int					   `json:"infoformat"`
 	HasPackage  bool                   // `json:"haspackage"`
 	Package     map[string]interface{} `json:"package"`
 	//PNum          string                 `json:"pnum"`
@@ -282,6 +282,8 @@ type ProjectInfo struct {
 	Area          string             `json:"area"`                    //地区
 	City          string             `json:"city"`                    //地市
 	District      string             `json:"district"`                //区县
+	Bidstatus     string			 `json:"bidstatus"`				  //
+	Bidtype		  string			 `json:"bidtype"`				  //
 	//HasPackage    bool                   `json:"haspackage"`              //是否有分包
 	Package     map[string]interface{} `json:"package,omitempty"`     //分包的对比对象
 	Buyerclass  string                 `json:"buyerclass"`            //采购单位分类
@@ -294,6 +296,16 @@ type ProjectInfo struct {
 	score         int
 	comStr        string
 	resVal, pjVal int
+	IdStatusInfo  map[string]map[string]interface{}
+}
+
+type Site struct {
+	Id			string				`json:"_id"`
+	Site		string				`json:"site"`			//站点名字
+	Area 		string				`json:"area"`			//省
+	City		string				`json:"city"`			//市
+	District	string				`json:"district"`		//区、县
+	Domain		string				`json:"domain"`			//地址
 }
 
 //二分字符串查找

+ 48 - 4
fullproject/src_v1/load_data.go

@@ -9,7 +9,7 @@ import (
 //初始加载数据,默认加载最近6个月的数据
 
 func (p *ProjectTask) loadData(starttime int64) {
-	log.Println("load start..", starttime)
+	log.Println("load project start..", starttime)
 	p.findLock.Lock()
 	defer p.findLock.Unlock()
 	p.AllIdsMapLock.Lock()
@@ -23,7 +23,6 @@ func (p *ProjectTask) loadData(starttime int64) {
 		"list": 0,
 	}).Iter()
 	n := 0
-	//	tmp := &ProjectInfo{}
 	pool := make(chan *ProjectInfo, 100)
 	over := make(chan bool)
 	go func() {
@@ -86,7 +85,7 @@ func (p *ProjectTask) loadData(starttime int64) {
 			go func(res map[string]interface{}) {
 				bys, _ := json.Marshal(result)
 				var tmp *ProjectInfo
-				json.Unmarshal(bys, &tmp)
+				_ = json.Unmarshal(bys, &tmp)
 				pool <- tmp
 			}(result)
 		} else {
@@ -95,5 +94,50 @@ func (p *ProjectTask) loadData(starttime int64) {
 	}
 	time.Sleep(2 * time.Second)
 	over <- true
-	log.Println("load over..", n)
+	log.Println("load project over..", n)
+}
+
+func (p *ProjectTask) loadSite() {
+	log.Println("load site start..")
+	p.findLock.Lock()
+	defer p.findLock.Unlock()
+	p.mapSiteLock.Lock()
+	defer p.mapSiteLock.Unlock()
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	q := map[string]interface{}{}
+	it := sess.DB(MongoTool.DbName).C(SiteColl).Find(&q).Iter()
+	n := 0
+	pool := make(chan *Site, 100)
+	over := make(chan bool)
+	go func() {
+		for {
+			select {
+			case tmp := <-pool:
+				n++
+				if tmp != nil {
+					p.mapSite[tmp.Site] = tmp
+				}
+			case <-over:
+				return
+			}
+		}
+	}()
+	for {
+		result := make(map[string]interface{})
+		if it.Next(&result) {
+			go func(res map[string]interface{}) {
+				bys, _ := json.Marshal(result)
+				var tmp *Site
+				_ = json.Unmarshal(bys, &tmp)
+				pool <- tmp
+			}(result)
+		} else {
+			break
+		}
+	}
+	time.Sleep(2 * time.Second)
+	over <- true
+	log.Println("load site over..", n)
+
 }

+ 12 - 3
fullproject/src_v1/main.go

@@ -68,7 +68,8 @@ func main() {
 			P_QL.loadData(loadStart)
 		}
 	}
-	go checkMapJob()
+	P_QL.loadSite()
+	//go checkMapJob()
 	time.Sleep(99999 * time.Hour)
 }
 
@@ -80,7 +81,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 		err := json.Unmarshal(data, &mapInfo)
 		log.Println("err:", err, "mapInfo:", mapInfo)
 		if err != nil {
-			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+			_ = udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
 		} else if mapInfo != nil {
 			key, _ := mapInfo["key"].(string)
 			if key == "" {
@@ -110,7 +111,15 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					P_QL.pici = time.Now().Unix()
 					P_QL.taskZl(mapInfo)
 				}()
-
+			case "updateInfo":		//招标字段变更
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					P_QL.currentType = tasktype
+					P_QL.pici = time.Now().Unix()
+					P_QL.taskUpdateInfo(mapInfo)
+				}()
 			case "history": //历史数据合并,暂时不写
 				go func() {
 					defer func() {

+ 10 - 0
fullproject/src_v1/mgotool.go

@@ -247,3 +247,13 @@ func (m *MongodbSim) Delete(c, id string) int64 {
 	}
 	return r.DeletedCount
 }
+
+func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	result := map[string]interface{}{}
+	_ = r.Decode(&result)
+	return result
+}

+ 410 - 180
fullproject/src_v1/project.go

@@ -124,7 +124,16 @@ func (p *ProjectTask) startProjectMerge(info *Info, tmp map[string]interface{})
 		//问题出地LastTime!!!!!
 		diffTime := int64(math.Abs(float64(info.Publishtime - compareProject.LastTime)))
 		if diffTime <= p.validTime {
-			//"A 相等 	B 被包含 	C 不相等	 	D不存在  E被包含
+
+			//代理机构完全不相同,直接新建项目
+			if CheckContain(compareProject.Agency, info.Agency) == 3 {
+				continue
+			}
+			//地区(省、市、区)不同,直接新建项目
+			if ComparePlace(compareProject, info) {
+				continue
+			}
+
 			info.PNBH = 0
 			info.PCBH = 0
 			info.PTCBH = 0
@@ -142,72 +151,7 @@ func (p *ProjectTask) startProjectMerge(info *Info, tmp map[string]interface{})
 				compareProject.pjVal = pjVal
 				compareProject.resVal = resVal
 				//log.Println(compareProject.comStr)
-				eqV := 0
-				switch resVal {
-				case 3:
-					if pjVal == 3 && comStr[3:] != "CCCDCCC" {
-						eqV = 1
-					} else if compareBuyer < "C" {
-						if pjVal > 1 {
-							eqV = 1
-						} else { //if (compareCity[1:1] != "C" || compareTime != "D") && score2 > 0
-							eqV = 2
-						}
-					} else if compareBuyer == "D" {
-						if pjVal > 1 && (compareCity[1:1] != "C" || score2 > 0) {
-							eqV = 2
-						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
-							eqV = 3
-						}
-					} else {
-						if pjVal == 3 && (score2 > 0 || compareCity[1:1] != "C") {
-							eqV = 2
-						} else if pjVal == 2 && compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
-							eqV = 3
-						} else if compareCity == "AA" && compareTime == "A" && score2 > 0 {
-							eqV = 3
-						}
-					}
-				case 2:
-					if compareBuyer < "C" {
-						if pjVal > 1 {
-							eqV = 2
-						} else if compareCity[1:1] != "C" && compareTime == "A" || score2 > 0 {
-							eqV = 3
-						}
-					} else if compareBuyer == "D" {
-						if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
-							eqV = 2
-						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
-							eqV = 3
-						}
-
-					} else {
-						if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
-							eqV = 2
-						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 0) && (compareBudget == "A" || compareBidmount == "A") {
-							eqV = 3
-						}
-					}
-				case 1:
-					if compareBuyer < "C" {
-						if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
-							eqV = 2
-						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 0) && (compareBudget == "A" || compareBidmount == "A") {
-							eqV = 3
-						}
-					} else if compareBuyer == "D" {
-						if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
-							eqV = 2
-						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 0) && (compareBudget == "A" || compareBidmount == "A") {
-							eqV = 3
-						}
-					} else {
-						if pjVal > 1 && compareTime == "A" && score2 > 0 && (compareBudget == "A" || compareBidmount == "A") && compareCity[1:1] != "C" {
-							eqV = 3
-						}
-					}
-				}
+				eqV := compareResult(resVal, pjVal, score2, comStr, compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount)
 				if eqV == 1 {
 					comRes1 = append(comRes1, compareProject)
 				} else if eqV == 2 {
@@ -215,14 +159,11 @@ func (p *ProjectTask) startProjectMerge(info *Info, tmp map[string]interface{})
 				} else if eqV == 3 {
 					comRes3 = append(comRes3, compareProject)
 				}
-				//				else if resVal == 3 || pjVal > 1 {
-				//					log.Println("===", resVal, pjVal, comStr, info.ProjectCode, compareProject.ProjectCode,
-				//						info.ProjectName, compareProject.ProjectName, info.Buyer, compareProject.Buyer, info.Id, compareProject.Id.Hex())
-				//				}
 			}
 		}
 	}
 	//--------------------------------对比完成-----------------------
+
 	//更新数组、更新项目
 	for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
 		if len(resN) > 0 {
@@ -232,47 +173,60 @@ func (p *ProjectTask) startProjectMerge(info *Info, tmp map[string]interface{})
 				})
 			}
 
-			bFindProject = true
-			findPid = resN[0].Id.Hex()
-			for k2, bv := range []int{bpn, bpc, bptc, bpb} {
-				if bv > -1 {
-					pids[bv].Arr = append(pids[bv].Arr, findPid)
-					if k2 == 0 {
-						if resN[0].ProjectName == "" {
-							resN[0].ProjectName = info.ProjectName
-						} else {
-							if resN[0].MPN == nil {
-								resN[0].MPN = []string{info.ProjectName}
+			ex := 0
+			resArr := []*ProjectInfo{}
+			for _, res := range resN {
+				choose, e := p.CompareStatus(resN[0], info)
+				if !choose {
+					ex = e
+					resArr = append(resArr, res)
+				}
+			}
+			if len(resArr) > 0 {
+				bFindProject = true
+				findPid = resArr[0].Id.Hex()
+				for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+					if bv > -1 {
+						pids[bv].Arr = append(pids[bv].Arr, findPid)
+						if k2 == 0 {
+							if resArr[0].ProjectName == "" {
+								resArr[0].ProjectName = info.ProjectName
 							} else {
-								resN[0].MPN = append(resN[0].MPN, info.ProjectName)
+								if resArr[0].MPN == nil {
+									resArr[0].MPN = []string{info.ProjectName}
+								} else {
+									resArr[0].MPN = append(resArr[0].MPN, info.ProjectName)
+								}
 							}
-						}
 
-					} else if k2 < 3 {
-						if resN[0].ProjectCode == "" {
-							resN[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
-						} else {
-							if resN[0].MPC == nil {
-								resN[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+						} else if k2 < 3 {
+							if resArr[0].ProjectCode == "" {
+								resArr[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
 							} else {
-								resN[0].MPC = append(resN[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+								if resArr[0].MPC == nil {
+									resArr[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+								} else {
+									resArr[0].MPC = append(resArr[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+								}
 							}
-						}
 
-					} else {
-						if resN[0].Buyer == "" {
-							resN[0].Buyer = info.Buyer
+						} else {
+							if resArr[0].Buyer == "" {
+								resArr[0].Buyer = info.Buyer
+							}
 						}
 					}
 				}
+				p.UpdateProject(tmp, info, resArr[0], kv+1, resArr[0].comStr, ex)
+			} else {
+				bFindProject = false
+				findPid = ""
 			}
-			p.UpdateProject(tmp, info, resN[0], kv+1, resN[0].comStr)
 			break
 		}
 	}
 
 	if !bFindProject {
-		//没有找到
 		id, p1 := p.NewProject(tmp, info)
 		p.AllIdsMapLock.Lock()
 		p.AllIdsMap[id] = &ID{Id: id, P: p1}
@@ -281,7 +235,6 @@ func (p *ProjectTask) startProjectMerge(info *Info, tmp map[string]interface{})
 			m.Arr = append(m.Arr, id)
 		}
 	}
-
 }
 
 func (p *ProjectTask) compareBCTABB(info *Info, cp *ProjectInfo, diffTime int64, score int) (compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount string, score2 int) {
@@ -384,6 +337,76 @@ func (p *ProjectTask) compareBCTABB(info *Info, cp *ProjectInfo, diffTime int64,
 	return
 }
 
+func compareResult(resVal, pjVal, score2 int, comStr, compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount string) int {
+	eqV := 0
+	switch resVal {
+	case 3:
+		if pjVal == 3 && comStr[3:] != "CCCDCCC" {
+			eqV = 1
+		} else if compareBuyer < "C" {
+			if pjVal > 1 {
+				eqV = 1
+			} else { //if (compareCity[1:1] != "C" || compareTime != "D") && score2 > 0
+				eqV = 2
+			}
+		} else if compareBuyer == "D" {
+			if pjVal > 1 && (compareCity[1:1] != "C" || score2 > 0) {
+				eqV = 2
+			} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+				eqV = 3
+			}
+		} else {
+			if pjVal == 3 && (score2 > 0 || compareCity[1:1] != "C") {
+				eqV = 2
+			} else if pjVal == 2 && compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+				eqV = 3
+			} else if compareCity == "AA" && compareTime == "A" && score2 > 0 {
+				eqV = 3
+			}
+		}
+	case 2:
+		if compareBuyer < "C" {
+			if pjVal > 1 {
+				eqV = 2
+			} else if compareCity[1:1] != "C" && compareTime == "A" || score2 > 0 {
+				eqV = 3
+			}
+		} else if compareBuyer == "D" {
+			if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
+				eqV = 2
+			} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+				eqV = 3
+			}
+
+		} else {
+			if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
+				eqV = 2
+			} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 0) && (compareBudget == "A" || compareBidmount == "A") {
+				eqV = 3
+			}
+		}
+	case 1:
+		if compareBuyer < "C" {
+			if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
+				eqV = 2
+			} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 0) && (compareBudget == "A" || compareBidmount == "A") {
+				eqV = 3
+			}
+		} else if compareBuyer == "D" {
+			if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
+				eqV = 2
+			} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 0) && (compareBudget == "A" || compareBidmount == "A") {
+				eqV = 3
+			}
+		} else {
+			if pjVal > 1 && compareTime == "A" && score2 > 0 && (compareBudget == "A" || compareBidmount == "A") && compareCity[1:1] != "C" {
+				eqV = 3
+			}
+		}
+	}
+	return eqV
+}
+
 var FIELDS = []string{
 	"area",
 	"city",
@@ -409,33 +432,27 @@ var FIELDS = []string{
 
 var bidtype = map[string]string{
 	"招标": "招标",
-	"邀标": "邀标",
 	"询价": "询价",
 	"竞谈": "竞谈",
 	"单一": "单一",
 	"竞价": "竞价",
+	"变更": "变更",
+	"邀标": "邀标",
 }
 
-//var bidstatus = map[string]string{
-//	"中标": "中标",
-//	"成交": "成交",
-//	"废标": "废标",
-//	"流标": "流标",
-//	"合同": "合同",
-//}
 var bidstatus = map[string]string{
 	"预告": "预告",
 	"中标": "中标",
 	"成交": "成交",
 	"废标": "废标",
 	"流标": "流标",
+	"合同": "合同",
 }
 
 //招标时间zbtime、中标时间jgtime、项目状态bidstatus、招标类型bidtype、最后发布时间lasttime、首次发布时间firsttime
 
 func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (string, *ProjectInfo) {
 	pId := primitive.NewObjectID() //NewObjectId()
-	p1 := p.NewCachePinfo(pId, thisinfo)
 	set := map[string]interface{}{}
 	set["_id"] = pId
 	for _, f := range FIELDS {
@@ -454,7 +471,7 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 		set["bidopentime"] = bidopentime
 	}
 	if thisinfo.ProjectName != "" {
-		set["s_projectname"] = tmp["projectname"] //兼容老版本
+		set["s_projectname"] = tmp["projectname"]
 	}
 	now := time.Now().Unix()
 	set["createtime"] = now
@@ -469,19 +486,40 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 	} else if thisinfo.TopType == "结果" {
 		set["jgtime"] = tmp["publishtime"]
 	}
+	//异常标记
+	if thisinfo.TopType != "招标" && thisinfo.TopType != "拟建" && thisinfo.TopType != "预告" {
+		set["exception"] = 1
+	}
+	//projecthref保存
+	if jsonData, ok := tmp["jsondata"].(map[string]interface{}); ok {
+		if jsonData != nil && jsonData["projecthref"] != "" {
+			set["projecthref"] = jsonData["projecthref"]
+		}
+	}
+
 	//招标类型
+	p.mapBidLock.Lock()
 	bt := bidtype[thisinfo.SubType]
+	p.mapBidLock.Unlock()
 	if bt == "" {
 		bt = "招标"
 	}
 	set["bidtype"] = bt
-	bs, _ := tmp["bidstatus"].(string)
-	if bidstatus[bs] != "" {
-		set["bidstatus"] = bs
-	}
-	if set["bidstatus"] == nil && thisinfo.TopType == "结果" {
-		set["bidstatus"] = thisinfo.SubType
+	set["bidstatus"] = thisinfo.SubType
+
+	p1, pkg := p.NewCachePinfo(pId, thisinfo, bt)
+	//招标信息是中标或者成交,保存bidstatus、budget、bidamount
+	if thisinfo.SubType == "中标" || thisinfo.SubType == "成交" {
+		p1.IdStatusInfo = map[string]map[string]interface{}{
+			thisinfo.Id: {
+				"projectname": thisinfo.ProjectName,
+				"bidstatus": thisinfo.SubType,
+				"budget": thisinfo.Budget,
+				"bidamount": thisinfo.Bidamount,
+			},
+		}
 	}
+
 	if len(thisinfo.Subscopeclass) > 0 {
 		s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
 		set["s_subscopeclass"] = s_subscopeclass
@@ -492,6 +530,7 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 	}
 	if thisinfo.HasPackage {
 		set["multipackage"] = 1
+		set["package"] = pkg
 	} else {
 		set["multipackage"] = 0
 	}
@@ -501,10 +540,10 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 	}
 	//p.savePool <- set
 	p.updatePool <- []map[string]interface{}{
-		map[string]interface{}{
+		{
 			"_id": pId,
 		},
-		map[string]interface{}{
+		{
 			"$set": set,
 		},
 	}
@@ -534,6 +573,7 @@ var INFOFIELDS = []string{
 	"buyertel",
 	"area",
 	"city",
+	"list",
 }
 
 //项目中list的信息
@@ -550,7 +590,11 @@ func (p *ProjectTask) PushListInfo(tmp map[string]interface{}, infoid string) bs
 }
 
 //生成存放在内存中的对象
-func (p *ProjectTask) NewCachePinfo(id primitive.ObjectID, thisinfo *Info) ProjectInfo {
+func (p *ProjectTask) NewCachePinfo(id primitive.ObjectID, thisinfo *Info, bidtype string) (ProjectInfo, map[string]interface{}) {
+	pkg := map[string]interface{}{}
+	if thisinfo.HasPackage {
+		pkg = PackageFormat(thisinfo, nil)
+	}
 	p1 := ProjectInfo{
 		Id:            id,
 		Ids:           []string{thisinfo.Id},
@@ -571,18 +615,20 @@ func (p *ProjectTask) NewCachePinfo(id primitive.ObjectID, thisinfo *Info) Proje
 		FirstTime:     thisinfo.Publishtime,
 		LastTime:      thisinfo.Publishtime,
 		Budget:        thisinfo.Budget,
-		Package:       thisinfo.Package,
+		Package:       pkg,
 		Bidamount:     thisinfo.Bidamount,
+		Bidstatus:     thisinfo.SubType,
+		Bidtype:       bidtype,
 	}
 	if thisinfo.LenPTC > 5 {
 		p1.MPC = append(p1.MPC, thisinfo.PTC)
 	}
-	return p1
+	return p1, pkg
 }
 
 //更新项目
-func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string) {
-	if p.currentType != "ql" {
+func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string, ex int) {
+	if p.currentType != "ql" && p.currentType != "updateInfo" {
 		if BinarySearch(pInfo.Ids, thisinfo.Id) > -1 {
 			log.Println("repeat", thisinfo.Id)
 			return
@@ -603,33 +649,45 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 	if thisinfo.Publishtime > pInfo.LastTime {
 		pInfo.LastTime = thisinfo.Publishtime
 		set["lasttime"] = thisinfo.Publishtime
+		p.mapBidLock.Lock()
+		defer p.mapBidLock.Unlock()
 		bt := bidtype[thisinfo.SubType]
 		if bt != "" {
 			set["bidtype"] = bt
 		}
-		//if thisinfo.TopType == "结果" {
-		//	set["bidstatus"] = thisinfo.SubType
-		//	set["jgtime"] = tmp["publishtime"]
-		//}
-		bs, _ := tmp["bidstatus"].(string)
+		bs, _ := tmp["subtype"].(string)
 		if bidstatus[bs] != "" {
 			set["bidstatus"] = thisinfo.SubType
-			if bidstatus[bs] != "预告" {
+			if bidstatus[bs] != "预告" && bidstatus[bs] != "合同" {
 				set["jgtime"] = tmp["publishtime"]
 			}
-		}else if tmp["infoformat"] == 2 {
+		} else if tmp["infoformat"] == 2 {
 			set["bidstatus"] = "拟建"
-		}else if tmp["bidstatus"] == "招标" {
+		} else if tmp["subytpe"] == "招标" {
 			set["bidstatus"] = thisinfo.TopType
-		}else {
-			set["bidstatus"] = thisinfo.SubType
+		} else {
+			set["bidstatus"] = "其它"
 		}
+	}
 
+	//废标、流标   处理时间
+	if thisinfo.SubType == "流标" || thisinfo.SubType == "废标" {
+		pInfo.FirstTime = thisinfo.Publishtime
+		pInfo.Bidopentime = int64(0)
+		pInfo.LastTime = thisinfo.Publishtime
+
+		set["firsttime"] = thisinfo.Publishtime
+		set["zbtime"] = int64(0)
+		set["publishtime"] = thisinfo.Publishtime
+		set["bidopentime"] = int64(0)
 	}
 
+	//异常标记
+	if ex > 0 {
+		set["exception"] = ex
+	}
 	//3\4\5--省、市、县
 	if thisinfo.Area != "全国" {
-		//xt := true
 		if pInfo.Area == "全国" {
 			pInfo.Area = thisinfo.Area
 			set["area"] = thisinfo.Area
@@ -646,10 +704,6 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 			pInfo.District = thisinfo.District
 			set["district"] = thisinfo.District
 		}
-		//省市县有不相同的
-		//		if !xt {
-		//			log.Println(pInfo.Area, pInfo.City, thisinfo.Area, thisinfo.District)
-		//		}
 	}
 	//6--项目名称
 	if (thisinfo.ProjectName != "" && pInfo.ProjectName == "") || (len([]rune(pInfo.ProjectName)) < 6 && thisinfo.LenPN > 6) {
@@ -673,11 +727,6 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 	if pInfo.Buyer == "" {
 		set["buyerclass"] = ""
 	}
-	//if thisinfo.Buyerclass != "" && pInfo.Buyerclass == "" {
-	//	pInfo.Buyerclass = thisinfo.Buyerclass
-	//	set["buyerclass"] = pInfo.Buyerclass
-	//}
-
 
 	//8--代理机构
 	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
@@ -699,15 +748,6 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 		pInfo.Bidopentime = thisinfo.Bidopentime
 		set["bidopentime"] = pInfo.Bidopentime
 	}
-	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
-		pInfo.Bidamount = thisinfo.Bidamount
-		set["bidamount"] = pInfo.Bidamount
-	}
-
-	if thisinfo.Budget > 0 && pInfo.Budget < 1 { //多包的会有问题,没有进行合计。
-		pInfo.Budget = thisinfo.Budget
-		set["budget"] = pInfo.Budget
-	}
 
 	if len(thisinfo.Topscopeclass) > 0 {
 		sort.Strings(pInfo.Topscopeclass)
@@ -745,42 +785,23 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 	}
 
 	if thisinfo.HasPackage { //多包处理
-		p2, _ := tmp["package"].(map[string]interface{})
-		if p2 != nil {
-			if pInfo.Package != nil {
-				for pk2, pv2 := range p2 {
-					if pInfo.Package[pk2] != nil { //合并
-						item1, _ := pInfo.Package[pk2].(map[string]interface{})
-						item2, _ := pv2.(map[string]interface{})
-						if item1 != nil && item2 != nil { //原始项
-							for ik1, iv1 := range item2 {
-								if item1[ik1] == nil {
-									item1[ik1] = iv1
-								}
-							}
-						}
-						pInfo.Package[pk2] = item1
-					} else {
-						pInfo.Package[pk2] = pv2
-					}
-				}
-			} else {
-				pInfo.Package = p2
-			}
-		}
+		set["multipackage"] = 1
+		pkg := PackageFormat(thisinfo, pInfo)
+		pInfo.Package = pkg
 		set["package"] = pInfo.Package
+	} else {
+		set["multipackage"] = 0
 	}
+	//处理多包后,计算预算金额、中标金额
+	CountAmount(pInfo, thisinfo)
+	set["budget"] = pInfo.Budget
+	set["bidamount"] = pInfo.Bidamount
+	//
 
 	set["mpn"] = pInfo.MPN
 	set["mpc"] = pInfo.MPC
 	set["pici"] = p.pici
 
-	if thisinfo.HasPackage {
-		set["multipackage"] = 1
-	} else {
-		set["multipackage"] = 0
-	}
-
 	update := map[string]interface{}{}
 	if len(set) > 0 {
 		update["$set"] = set
@@ -796,7 +817,7 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 	}
 	if len(update) > 0 {
 		updateInfo := []map[string]interface{}{
-			map[string]interface{}{
+			{
 				"_id": pInfo.Id,
 			},
 			update,
@@ -804,3 +825,212 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 		p.updatePool <- updateInfo
 	}
 }
+
+/**
+ *	更新项目时,项目状态的处理
+ *	返回是否新增项目,异常标记
+ *	1、项目时,新项目时,招标信息的状态(toptype)不是招标、拟建、预告	异常:1
+ *	   异常1是在项目新建的时候才会产生
+ *	3、项目合并时,项目状态是”流标“/”废标“,招标信息状态不是”招标“		异常:2
+ *	4、项目合并时,项目状态是”合同“/”其它“,招标信息类型是”结果“		异常:3
+ */
+func (p *ProjectTask) CompareStatus(project *ProjectInfo, info *Info) (bool, int) {
+	if info.TopType == "拟建" || info.TopType == "预告" || info.TopType == "招标" {
+		if project.Bidstatus == "拟建" || project.Bidstatus == "预告" || project.Bidstatus == "招标" {
+			return false, 0
+		} else {
+			return true, 0
+		}
+	} else if info.TopType == "结果" {
+		if project.Bidstatus == "拟建" || project.Bidstatus == "预告" || project.Bidstatus == "招标" {
+			return false, 0
+		}  else if project.Bidstatus == info.SubType {
+			//状态一样,根据发布时间判断是否合并
+			if (info.Publishtime - project.FirstTime) > p.statusTime {
+				return true, 0
+			} else {
+				return true, 0
+			}
+		} else if project.Bidstatus == "成交" && info.SubType == "中标" {
+			return true, 0
+		} else if project.Bidstatus == "流标" || project.Bidstatus == "废标" {
+			return false, 2
+		} else if project.Bidstatus == "合同" || project.Bidstatus == "其它" {
+			return false, 3
+		} else {
+			return false, 0
+		}
+	} else {
+		return false, 0
+	}
+}
+
+/*
+ *	对比地区(省、市、区),存在且不同,不能合并
+ *	返回是否新建项目
+ */
+func ComparePlace(project *ProjectInfo, info *Info) bool {
+	if info.Area == "全国" || info.Area == "" {
+		return false
+	}
+	if project.Area == "全国" || project.Area == "" {
+		return false
+	}
+	if info.Area == project.Area {
+		if info.City == "" {
+			return false
+		} else if info.City == project.City {
+			if info.District == "" || info.District == project.District {
+				return false
+			} else {
+				return true
+			}
+		} else {
+			return true
+		}
+	} else {
+		return true
+	}
+}
+
+var PackageEle = []string{
+	"origin",
+	"name",
+	"text",
+	"budget",
+	"winner",
+	"bidamount",
+	"bidamounttype",
+	"currency",
+	"bidstatus",
+}
+
+func packageEle(map1 map[string]interface{}, id string) map[string]interface{} {
+	p2 := map[string]interface{}{}
+	for _, k := range PackageEle {
+		if map1[k] != nil {
+			p2[k] = map1[k]
+		}
+		infoid := p2["infoid"]
+		if infoid == nil {
+			p2["infoid"] = id
+		}
+	}
+	return p2
+}
+
+func PackageFormat(info *Info, project *ProjectInfo) map[string]interface{} {
+	budget := 0
+	bidamount := 0
+	p1 := map[string]interface{}{}
+	if project != nil && project.Package != nil && len(project.Package) > 0 {
+		p1 = project.Package
+		for k, v := range info.Package {
+			if v1, ok := v.(map[string]interface{}); ok {
+				v2 := map[string]interface{}{}
+				v2 = packageEle(v1, info.Id)
+				if v2["bidstatus"] == nil {
+					v2["bidstatus"] = info.SubType
+				}
+				if isCount(project, v2[""]) {
+					
+				}
+				if v2["budget"] != nil {
+					budget = budget + v2["budget"].(float64)
+				}
+				if v2["bidamount"] != nil {
+					bidamount = bidamount + v2["bidamount"].(float64)
+				}
+				addFlag := false
+				for k1, v3 := range p1 {
+					if v4, ok := v3.([]map[string]interface{}); ok {
+						if qu.ObjToString(v4[0]["origin"]) == qu.ObjToString(v2["origin"]) && qu.ObjToString(v4[0]["name"]) == qu.ObjToString(v2["name"]) {
+							v4 = append(v4, v2)
+							p1[k1] = v4
+							addFlag = true
+							break
+						}
+					}
+				}
+				if !addFlag {
+					p1[k] = []map[string]interface{}{v2}
+				}
+			}
+		}
+	} else {
+		for k, v := range info.Package {
+			v1, _ := v.(map[string]interface{})
+			p2 := map[string]interface{}{}
+			p2 = packageEle(v1, info.Id)
+			if p2["bidstatus"] == nil {
+				p2["bidstatus"] = info.SubType
+			}
+			if p2["budget"] != nil {
+				budget = p2["budget"].(float64)
+			}
+			if p2["bidamount"] != nil {
+				bidamount = p2["bidamount"].(float64)
+			}
+			p1[k] = []map[string]interface{}{p2}
+		}
+	}
+	info.Budget = budget
+	info.Bidamount = bidamount
+	return p1
+}
+
+//计算预算(budget)、中标金额(bidamount)
+func CountAmount(project *ProjectInfo, info *Info) {
+	//if project!= nil && project.Package != nil && len(project.Package) > 0 {
+	////暂时未考虑太多情况,简单处理
+	//}
+
+	if info.Budget > 0 {
+		//项目中第一条招标信息是否是包/段项目
+		key := titleGetPc.FindStringSubmatch(project.ProjectName)
+		if len(key) > 0 {
+			//判断项目中是否已经计算过该包/段的预算
+			if !isCount(project, info.ProjectName) {
+				project.Budget = project.Budget + info.Budget
+			}
+		}else {
+			if project.Budget < info.Budget {
+				project.Budget = info.Budget
+			}
+		}
+	}
+	//中标、成交、合同     处理中标金额(bidamount)
+	if info.SubType == "中标" || info.SubType == "成交" || info.SubType == "合同" {
+		if info.Bidamount > 0 {
+			key := titleGetPc.FindStringSubmatch(project.ProjectName)
+			if len(key) > 0 {
+				if !isCount(project, info.ProjectName) {
+					project.Bidamount = project.Bidamount + info.Bidamount
+				}
+			}else {
+				if project.Bidamount < project.Bidamount {
+					project.Bidamount = info.Bidamount
+				}
+			}
+		}
+	}
+
+	//保存信息到IdStatusInfo
+	project.IdStatusInfo[info.Id] = map[string]interface{}{
+		"projectname": info.ProjectName,
+		"bidstatus": info.SubType,
+		"budget": info.Budget,
+		"bidamount": info.Bidamount,
+	}
+}
+
+func isCount(project *ProjectInfo, infoName string) bool {
+	if project.IdStatusInfo != nil && len(project.IdStatusInfo) > 0 {
+		for _, v := range project.IdStatusInfo{
+			if v["projectname"] == infoName {
+				return true
+			}
+		}
+	}
+	return false
+}

+ 241 - 161
fullproject/src_v1/task.go

@@ -37,7 +37,14 @@ type ProjectTask struct {
 	AllIdsMap map[string]*ID
 	//采购单位、项目名称、项目编号
 	mapPb, mapPn, mapPc map[string]*Key
-	//	mapPbLock, mapPnLock, mapPcLock sync.Mutex
+	//流程数据  字段相同,直接合并
+	mapHref     map[string]string
+	mapHrefLock sync.Mutex
+	//站点
+	mapSite     map[string]*Site
+	mapSiteLock sync.Mutex
+	//bidtype、bidstatus 锁
+	mapBidLock sync.Mutex
 	//更新或新增通道
 	updatePool chan []map[string]interface{}
 	//savePool   chan map[string]interface{}
@@ -54,6 +61,7 @@ type ProjectTask struct {
 	saveSize  int
 	pici      int64
 	validTime int64
+	statusTime int64
 	//	LockPool     chan *sync.Mutex
 	//	LockPoolLock sync.Mutex
 	//	m1, m23, m4  map[int]int
@@ -73,12 +81,15 @@ func NewPT() *ProjectTask {
 		mapPb:     make(map[string]*Key, 1500000),
 		mapPn:     make(map[string]*Key, 5000000),
 		mapPc:     make(map[string]*Key, 5000000),
+		mapHref:   make(map[string]string, 1500000),
+		mapSite:   make(map[string]*Site, 1000000),
 		saveSize:  400,
 
 		//saveSign:   make(chan bool, 1),
 		//updateSign: make(chan bool, 1),
 		coll:      ProjectColl,
 		validTime: int64(util.IntAllDef(Sysconfig["validdays"], 150) * 86400),
+		statusTime: int64(util.IntAllDef(Sysconfig["statusdays"], 7) * 86400),
 	}
 	return p
 }
@@ -134,7 +145,7 @@ func (p *ProjectTask) clearMem() {
 	c := cron.New()
 	//在内存中保留最近6个月的信息
 	//跑全量时每4分钟跑一次,跑增量时400分钟跑一次
-	c.AddFunc("50 0/15 * * * *", func() {
+	_ = c.AddFunc("50 0/15 * * * *", func() {
 		if p.currentType == "ql" || p.clearContimes >= 60 {
 			//跳过的次数清零
 			p.clearContimes = 0
@@ -146,6 +157,7 @@ func (p *ProjectTask) clearMem() {
 			//遍历id
 			//所有内存中的项目信息
 			p.AllIdsMapLock.Lock()
+			p.mapHrefLock.Lock()
 
 			//清除计数
 			clearNum := 0
@@ -194,12 +206,18 @@ func (p *ProjectTask) clearMem() {
 							}
 						}
 					}
+					for kHref, pid := range p.mapHref {
+						if pid == k {
+							delete(p.mapHref, kHref)
+						}
+					}
 					v = nil
 				}
 			}
+			p.mapHrefLock.Unlock()
 			p.AllIdsMapLock.Unlock()
 			p.findLock.Unlock()
-			log.Println("清除完成:", clearNum, len(p.AllIdsMap), len(p.mapPn), len(p.mapPc), len(p.mapPb))
+			log.Println("清除完成:", clearNum, len(p.AllIdsMap), len(p.mapPn), len(p.mapPc), len(p.mapPb), len(p.mapHref))
 		} else {
 			p.clearContimes++
 		}
@@ -296,6 +314,37 @@ func (p *ProjectTask) taskZl(udpInfo map[string]interface{}) {
 	}
 }
 
+//招标字段更新
+func (p *ProjectTask) taskUpdateInfo(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	db, _ := udpInfo["db"].(string)
+	if db == "" {
+		db = MongoTool.DbName
+	}
+	coll, _ := udpInfo["coll"].(string)
+	if coll == "" {
+		coll = ExtractColl
+	}
+	thread := util.IntAllDef(udpInfo["thread"], 4)
+	if thread > 0 {
+		p.thread = thread
+	}
+	q, _ := udpInfo["query"].(map[string]interface{})
+	gtid := udpInfo["gtid"].(string)
+	lteid := udpInfo["lteid"].(string)
+	if q == nil {
+		q = map[string]interface{}{
+			"_id": map[string]interface{}{
+				"$gt":  StringTOBsonId(gtid),
+				"$lte": StringTOBsonId(lteid),
+			},
+			"is_m": 1,
+		}
+	}
+	log.Println("查询语句:", q)
+	p.enter(db, coll, q)
+}
+
 func StringTOBsonId(id string) primitive.ObjectID {
 	objectId, _ := primitive.ObjectIDFromHex(id)
 	return objectId
@@ -313,7 +362,7 @@ func nextNode(mapInfo map[string]interface{}, pici int64) {
 		datas, _ := json.Marshal(mapInfo)
 		node := &udpNode{datas, to, time.Now().Unix(), 0}
 		udptaskmap.Store(key, node)
-		udpclient.WriteUdp(datas, mu.OP_TYPE_DATA, to)
+		_ = udpclient.WriteUdp(datas, mu.OP_TYPE_DATA, to)
 	}
 }
 
@@ -344,10 +393,15 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 						<-pool
 					}()
 					if util.IntAll(tmp["repeat"]) == 0 {
+						p.fillInPlace(tmp)
 						info := ParseInfo(tmp)
-						if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
-							p.currentTime = info.Publishtime
-							p.startProjectMerge(info, tmp)
+						p.currentTime = info.Publishtime
+						if p.currentType == "updateInfo" {
+							//招标信息更改合并
+							p.updateJudge(tmp, info)
+						} else {
+							//普通合并
+							p.CommonMerge(tmp, info)
 						}
 					} else {
 						//信息错误,进行更新
@@ -364,7 +418,6 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 		ms.Hint(Sysconfig["hints"])
 	}
 	query := ms.Iter()
-	//
 	var lastid interface{}
 L:
 	for {
@@ -378,7 +431,7 @@ L:
 			tmp := make(map[string]interface{})
 			if query.Next(&tmp) {
 				lastid = tmp["_id"]
-				if count%2000 == 0 {
+				if count%10000 == 0 {
 					log.Println("current", count, lastid)
 				}
 				infoPool <- tmp
@@ -409,12 +462,50 @@ var (
 	StrOrNum = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$")
 	//纯数字或纯字母
 	StrOrNum2 = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$")
+	//含分包词,招标未识别分包  合并到一个项目
+	KeyPackage = regexp.MustCompile("[0-9a-zA-Z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]+.{0,2}(包|段)|(包|段)[0-9a-zA-Z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]+.{0,2}")
 )
 
+func (p *ProjectTask) CommonMerge(tmp map[string]interface{}, info *Info) {
+	if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
+		if jsonData, ok := tmp["jsondata"].(map[string]interface{}); ok {
+			proHref := util.ObjToString(jsonData["projecthref"])
+			if jsonData != nil && proHref != "" {
+				//projectHref字段合并
+				tmp["projecthref"] = proHref
+				p.mapHrefLock.Lock()
+				pid := p.mapHref[proHref]
+				p.mapHrefLock.Unlock()
+				if pid != "" {
+					p.AllIdsMapLock.Lock()
+					comparePro := p.AllIdsMap[pid].P
+					p.AllIdsMapLock.Unlock()
+					_, ex := CompareStatus(comparePro, info)
+					p.UpdateProject(tmp, info, comparePro, -1, "AAAAAAAAAA", ex)
+				} else {
+					id, p1 := p.NewProject(tmp, info)
+					p.mapHrefLock.Lock()
+					p.mapHref[proHref] = id
+					p.mapHrefLock.Unlock()
+					p.AllIdsMapLock.Lock()
+					p.AllIdsMap[id] = &ID{Id: id, P: p1}
+					p.AllIdsMapLock.Unlock()
+				}
+			} else {
+				//项目合并
+				p.startProjectMerge(info, tmp)
+			}
+		} else {
+			//项目合并
+			p.startProjectMerge(info, tmp)
+		}
+	}
+}
+
 func ParseInfo(tmp map[string]interface{}) (info *Info) {
 	bys, _ := json.Marshal(tmp)
 	var thisinfo *Info
-	json.Unmarshal(bys, &thisinfo)
+	_ = json.Unmarshal(bys, &thisinfo)
 	if thisinfo == nil {
 		return nil
 	}
@@ -425,6 +516,11 @@ func ParseInfo(tmp map[string]interface{}) (info *Info) {
 		thisinfo.Subscopeclass = []string{}
 	}
 
+	if thisinfo.Publishtime == 0 {
+		thisinfo.Publishtime = thisinfo.Comeintime
+		tmp["publishtime"] = thisinfo.Comeintime
+	}
+
 	//从标题中查找项目编号
 	res := titleGetPc.FindStringSubmatch(thisinfo.Title)
 	if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
@@ -501,6 +597,141 @@ func ParseInfo(tmp map[string]interface{}) (info *Info) {
 	return thisinfo
 }
 
+func (p *ProjectTask) updateJudge(tmp map[string]interface{}, info *Info) {
+	index := -1
+	pInfoId := ""
+	p.AllIdsMapLock.Lock()
+F:
+	for k, ID := range p.AllIdsMap {
+		for i, id := range ID.P.Ids {
+			if info.Id == id {
+				pInfoId = k
+				index = i
+				break F
+			}
+		}
+	}
+	p.AllIdsMapLock.Unlock()
+	//未找到招标信息
+	if index == -1 {
+		if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
+			p.currentTime = info.Publishtime
+			p.startProjectMerge(info, tmp)
+		}
+	} else {
+		tmpPro := MongoTool.FindById(ProjectColl, pInfoId)
+		infoList := []interface{}(tmpPro["list"].(primitive.A))
+		infoMap := infoList[index].(map[string]interface{})
+		modifyMap, f := modifyEle(infoMap, tmp)
+		//projecthref字段
+		jsonData := tmp["jsondata"].(map[string]interface{})
+		if jsonData != nil && jsonData["projecthref"] != nil {
+			proHref := jsonData["projecthref"].(string)
+			tmp["projecthref"] = proHref
+			p.mapHrefLock.Lock()
+			pid := p.mapHref[proHref]
+			p.mapHrefLock.Unlock()
+			if pid == pInfoId {
+				p.modifyUpdate(pInfoId, index, info, tmp, tmpPro, modifyMap)
+				return
+			}
+		}
+
+		if f {
+			//合并、修改
+			log.Println("合并修改更新", "----------------------------")
+			p.mergeAndModify(pInfoId, index, info, tmp, tmpPro, modifyMap)
+		} else {
+			//修改
+			log.Println("修改更新", "----------------------------")
+			p.modifyUpdate(pInfoId, index, info, tmp, tmpPro, modifyMap)
+		}
+	}
+}
+
+var Elements = []string{
+	"projectname",
+	"projectcode",
+	"agency",
+	"budget",
+	"bidamount",
+	"buyerperson",
+	"area",
+	"city",
+	"publishtime",
+}
+
+/**
+判断修改的字段是否是影响合并流程的要素字段
+*/
+func modifyEle(tmpPro map[string]interface{}, tmp map[string]interface{}) (map[string]interface{}, bool) {
+	modifyMap := map[string]interface{}{}
+	for k, _ := range tmpPro {
+		for k1, _ := range tmp {
+			if k == k1 && tmpPro[k] != tmp[k1] {
+				modifyMap[k] = tmp[k1]
+				break
+			}
+		}
+	}
+	for k, _ := range modifyMap {
+		for _, str := range Elements {
+			if k == str {
+				return modifyMap, true
+			}
+		}
+	}
+	delete(modifyMap, "_id")
+	return modifyMap, false
+}
+
+//补全位置信息
+func (p *ProjectTask) fillInPlace(tmp map[string]interface{}) {
+	area := util.ObjToString(tmp["area"])
+	city := util.ObjToString(tmp["city"])
+	district := util.ObjToString(tmp["district"])
+	if area != "" && city != "" && district != "" {
+		return
+	}
+
+	tmpSite := util.ObjToString(tmp["site"])
+	if tmpSite == "" {
+		return
+	}
+	p.mapSiteLock.Lock()
+	defer p.mapSiteLock.Unlock()
+	site := p.mapSite[tmpSite]
+	if site != nil {
+		if area != "" {
+			if area == "全国" {
+				tmp["area"] = site.Area
+				tmp["city"] = site.City
+				tmp["district"] = site.District
+				return
+			}
+			if area != site.Area {
+				return
+			} else {
+				if city == site.City {
+					if district == "" {
+						tmp["district"] = site.District
+						return
+					}
+				} else if city == "" {
+					tmp["city"] = site.City
+					tmp["district"] = site.District
+					return
+				}
+			}
+		} else {
+			tmp["area"] = site.Area
+			tmp["city"] = site.City
+			tmp["district"] = site.District
+			return
+		}
+	}
+}
+
 //从数组中删除元素
 func deleteSlice(arr []string, v string) []string {
 	for k, v1 := range arr {
@@ -510,154 +741,3 @@ func deleteSlice(arr []string, v string) []string {
 	}
 	return arr
 }
-
-//			if taskcount > 0 && taskcount%50000 == 0 { //歇歇
-//				log.Println("pause start..", taskcount)
-//				for n := 0; n < p.thread; n++ {
-//					pool <- true
-//				}
-//				for n := 0; n < p.thread; n++ {
-//					<-pool
-//				}
-//				log.Println("pause over..")
-//			}
-//lastid = tmp["_id"]
-//tmp = make(map[string]interface{})
-//		if count > 40000 {
-//			query.Close()
-//			break
-//		}
-//over++
-//func (p *ProjectTask) saveQueue() {
-//	arr := make([]map[string]interface{}, p.saveSize)
-//	indexs := 0
-//	for {
-//		select {
-//		case <-p.saveSign:
-//			if indexs > 0 {
-//				MongoTool.SaveBulk(p.coll, arr[:indexs]...)
-//				arr = make([]map[string]interface{}, p.saveSize)
-//				indexs = 0
-//			}
-//			p.updateSign <- true
-//		case v := <-p.savePool:
-//			arr[indexs] = v
-//			indexs++
-//			if indexs == p.saveSize {
-//				MongoTool.SaveBulk(p.coll, arr...)
-//				arr = make([]map[string]interface{}, p.saveSize)
-//				indexs = 0
-//			}
-//		case <-time.After(100 * time.Millisecond):
-//			if indexs > 0 {
-//				MongoTool.SaveBulk(p.coll, arr[:indexs]...)
-//				arr = make([]map[string]interface{}, p.saveSize)
-//				indexs = 0
-//			}
-//		}
-//	}
-//}
-
-////项目保存和更新通道
-//func (p *ProjectTask) updateQueue() {
-//	arru := make([][]map[string]interface{}, p.saveSize)
-//	indexu := 0
-//	for {
-//		select {
-//		case v := <-p.updatePool:
-//			arru[indexu] = v
-//			indexu++
-//			if indexu == p.saveSize {
-//				//更新之前先保存
-//				p.saveSign <- true
-//				<-p.updateSign
-//				MongoTool.UpdateBulk(p.coll, arru...)
-//				arru = make([][]map[string]interface{}, p.saveSize)
-//				indexu = 0
-//			}
-//		case <-time.After(100 * time.Millisecond):
-//			if indexu > 0 {
-//				p.saveSign <- true
-//				<-p.updateSign
-//				MongoTool.UpdateBulk(p.coll, arru[:indexu]...)
-//				arru = make([][]map[string]interface{}, p.saveSize)
-//				indexu = 0
-//			}
-//		}
-//	}
-//}
-//func (p *ProjectTask) ConCurrentLock(n1, n2, n3, n4 int) {
-//	var lock *sync.Mutex
-//	p.LockPoolLock.Lock()
-//	if p.m1[n1] > 0 || p.m23[n2] > 0 || p.m23[n3] > 0 || p.m4[n4] > 0 {
-//		if p.l1[n1] != nil {
-//			lock = p.l1[n1]
-//		} else if p.l23[n2] != nil {
-//			lock = p.l23[n2]
-//		} else if p.l23[n3] != nil {
-//			lock = p.l23[n3]
-//		} else if p.l4[n4] != nil {
-//			lock = p.l4[n4]
-//		}
-//	} else {
-//		lock = <-p.LockPool
-//	}
-//	if n1 > 0 {
-//		p.m1[n1]++
-//		p.l1[n1] = lock
-//	}
-//	if n2 > 0 {
-//		p.m23[n2]++
-//		p.l23[n2] = lock
-//	}
-//	if n3 > 0 {
-//		p.m23[n3]++
-//		p.l23[n3] = lock
-//	}
-//	if n4 > 0 {
-//		p.m4[n4]++
-//		p.l4[n4] = lock
-//	}
-//	p.LockPoolLock.Unlock()
-//	lock.Lock()
-//}
-
-//func (p *ProjectTask) ConCurrentUnLock(n1, n2, n3, n4 int) {
-//	var lock1 *sync.Mutex
-//	p.LockPoolLock.Lock()
-//	if p.l1[n1] != nil {
-//		lock1 = p.l1[n1]
-//	} else if p.l23[n2] != nil {
-//		lock1 = p.l23[n2]
-//	} else if p.l23[n3] != nil {
-//		lock1 = p.l23[n3]
-//	} else if p.l4[n4] != nil {
-//		lock1 = p.l4[n4]
-//	}
-//	if p.m1[n1] > 0 {
-//		p.m1[n1]--
-//		if p.m1[n1] == 0 {
-//			p.l1[n1] = nil
-//		}
-//	}
-//	if p.m23[n2] > 0 {
-//		p.m23[n2]--
-//		if p.m23[n2] == 0 {
-//			p.l23[n2] = nil
-//		}
-//	}
-//	if p.m23[n3] > 0 {
-//		p.m23[n3]--
-//		if p.m23[n3] == 0 {
-//			p.l23[n3] = nil
-//		}
-//	}
-//	if p.m4[n4] > 0 {
-//		p.m4[n4]--
-//		if p.m4[n4] == 0 {
-//			p.l4[n4] = nil
-//		}
-//	}
-//	p.LockPoolLock.Unlock()
-//	lock1.Unlock()
-//}

+ 448 - 0
fullproject/src_v1/update.go

@@ -0,0 +1,448 @@
+package main
+
+import (
+	"github.com/go-xweb/log"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	"math"
+	qu "qfw/util"
+	"reflect"
+	"sort"
+	"strings"
+)
+
+func (p *ProjectTask) modifyUpdate(pInfoId string, index int, info *Info, tmp map[string]interface{}, tmpPro map[string]interface{}, modifyMap map[string]interface{}) {
+	backupPro(tmpPro)
+	infoList := []interface{}(tmpPro["list"].(primitive.A))
+	infoMap := infoList[index].(map[string]interface{})
+	infoList[index] = updateValue(infoMap, modifyMap)
+	set := map[string]interface{}{
+		"$set": tmpPro,
+	}
+	_ = MongoTool.UpdateById(ProjectColl, pInfoId, set)
+}
+
+func (p *ProjectTask) mergeAndModify(pInfoId string, index int, info *Info, tmp map[string]interface{}, tmpPro map[string]interface{}, modifyMap map[string]interface{}) {
+	p.AllIdsMapLock.Lock()
+	currentPro := p.AllIdsMap[pInfoId]
+	p.AllIdsMapLock.Unlock()
+	merge, pId, i, comStr, mergePro := p.updateMerge(index, info, pInfoId, tmp, tmpPro)
+	//招标信息在list中第1个
+	if index == 0 {
+		log.Println("第1条合并", "------1------")
+		//快照
+		backupPro(tmpPro)
+		p.AllIdsMapLock.Lock()
+		delete(p.AllIdsMap, pInfoId)
+		p.AllIdsMapLock.Unlock()
+		count := MongoTool.Delete(ProjectColl, pInfoId)
+		log.Println(count)
+		for _, infoId := range currentPro.P.Ids{
+			if infoId == info.Id {
+				continue
+			}
+			tmp1 := MongoTool.FindById(ExtractColl, infoId)
+			info1 := ParseInfo(tmp1)
+			p.CommonMerge(tmp1, info1)
+		}
+	}else {
+		log.Println("第n条合并", "------n------")
+		//快照    招标信息所在的项目
+		backupPro(tmpPro)
+		if merge {
+			//跟原项目list第0条信息作对比,可以合并
+			if pId == pInfoId {
+				//合并到当前项目
+				mergeProject(p, currentPro.P, info, tmpPro, index)
+				p.AllIdsMapLock.Lock()
+				p.AllIdsMap[currentPro.P.Id.Hex()].P = currentPro.P
+				p.AllIdsMapLock.Unlock()
+				infoList := []interface{}(tmpPro["list"].(primitive.A))
+				infoMap := infoList[index].(map[string]interface{})
+				infoList[index] = updateValue(infoMap, modifyMap)
+				set := map[string]interface{}{
+					"$set": tmpPro,
+				}
+				MongoTool.UpdateById(ProjectColl, pInfoId, set)
+			}else {
+				//合并到的其它项目里边
+				//更新其它的项目
+				pro := MongoTool.FindById(ProjectColl, mergePro.Id.Hex())
+				backupPro(pro, )
+				choose, ex := CompareStatus(mergePro, info)
+				if !choose {
+					p.UpdateProject(tmp, info, mergePro, i, comStr, ex)
+				}else {
+					id, p1 := p.NewProject(tmp, info)
+					p.AllIdsMapLock.Lock()
+					p.AllIdsMap[id] = &ID{Id: id, P: p1}
+					p.AllIdsMapLock.Unlock()
+				}
+				//内部合并
+				p.innerMerge(currentPro.P, info, tmp, tmpPro)
+			}
+		}else {
+			//合并到新建的项目里边,原有项目内部合并
+			p.innerMerge(currentPro.P, info, tmp, tmpPro)
+		}
+	}
+}
+
+func (p *ProjectTask) updateMerge(index int, info *Info, pInfoId string, tmp map[string]interface{}, tmpPro map[string]interface{}) (bool, string, int, string, *ProjectInfo) {
+	bpn, bpc, bptc, bpb, pids, _, IDArr := p.getCompareIds(info.ProjectName, info.ProjectCode, info.PTC, info.Buyer)
+	defer p.wg.Done()
+
+	for _, m := range pids {
+		defer m.Lock.Unlock()
+	}
+	for _, id := range IDArr {
+		defer id.Lock.Unlock()
+	}
+
+	bFindProject := false
+	findPid := ""
+	//获取完id,进行计算
+	//定义两组
+	comRes1 := []*ProjectInfo{} //优先级最高的对比结果数组
+	comRes2 := []*ProjectInfo{} //优化级其次
+	comRes3 := []*ProjectInfo{}
+	for _, v := range IDArr {
+		if index == 0 && pInfoId == v.P.Id.Hex() {
+			//招标条目信息在原项目中是第0条时,忽略
+			continue
+		}
+		comStr := ""
+		compareProject := v.P
+		compareProject.score = 0
+		diffTime := int64(math.Abs(float64(info.Publishtime - compareProject.LastTime)))
+		if diffTime <= p.validTime {
+
+			//代理机构完全不相同,直接新建项目
+			if CheckContain(compareProject.Agency, info.Agency) == 3 {
+				continue
+			}
+
+			//地区(省、市、区)不同,直接新建项目
+			if ComparePlace(compareProject, info) {
+				continue
+			}
+
+			info.PNBH = 0
+			info.PCBH = 0
+			info.PTCBH = 0
+			compareStr, score := comparePNC(info, compareProject)
+			resVal, pjVal := Select(compareStr, info, compareProject)
+			if resVal > 0 {
+				compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount, score2 := p.compareBCTABB(info, compareProject, diffTime, score)
+
+				//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+				comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency + compareBudget + compareBidmount
+				compareProject.comStr = comStr
+				compareProject.pjVal = pjVal
+				compareProject.resVal = resVal
+				eqV := compareResult(resVal, pjVal, score2, comStr, compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount)
+				if eqV == 1 {
+					comRes1 = append(comRes1, compareProject)
+				} else if eqV == 2 {
+					comRes2 = append(comRes2, compareProject)
+				} else if eqV == 3 {
+					comRes3 = append(comRes3, compareProject)
+				}
+			}
+		}
+	}
+	//--------------------------------对比完成-----------------------
+	k := -1
+	comStr := ""
+	mergePro := &ProjectInfo{}
+	for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
+		if len(resN) > 0 {
+			if len(resN) > 1 {
+				sort.Slice(resN, func(i, j int) bool {
+					return resN[i].score > resN[j].score
+				})
+			}
+
+			ex := 0
+			resArr := []*ProjectInfo{}
+			for _, res := range resN{
+				choose, e := CompareStatus(resN[0], info)
+				if !choose {
+					ex = e
+					resArr = append(resArr, res)
+				}
+			}
+			if len(resArr) > 0 {
+				bFindProject = true
+				findPid = resArr[0].Id.Hex()
+				for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+					if bv > -1 {
+						pids[bv].Arr = append(pids[bv].Arr, findPid)
+						if k2 == 0 {
+							if resArr[0].ProjectName == "" {
+								resArr[0].ProjectName = info.ProjectName
+							} else {
+								if resArr[0].MPN == nil {
+									resArr[0].MPN = []string{info.ProjectName}
+								} else {
+									resArr[0].MPN = append(resArr[0].MPN, info.ProjectName)
+								}
+							}
+
+						} else if k2 < 3 {
+							if resArr[0].ProjectCode == "" {
+								resArr[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
+							} else {
+								if resArr[0].MPC == nil {
+									resArr[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+								} else {
+									resArr[0].MPC = append(resArr[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+								}
+							}
+
+						} else {
+							if resArr[0].Buyer == "" {
+								resArr[0].Buyer = info.Buyer
+							}
+						}
+					}
+				}
+				if index != 0 {
+					//招标条目信息在原项目中是第非0条时,返回对比结果
+					k = kv+1
+					comStr = resArr[0].comStr
+					mergePro = resArr[0]
+					bFindProject = false
+					findPid = ""
+				}else {
+					mergePro = resArr[0]
+					//如果有合并,合并之前备份(快照)
+					backupPro(tmpPro)
+					p.UpdateProject(tmp, info, resArr[0], kv+1, resArr[0].comStr, ex)
+				}
+			}else {
+				bFindProject = false
+				findPid = ""
+			}
+			break
+		}
+	}
+
+	if !bFindProject {
+		id, p1 := p.NewProject(tmp, info)
+		p.AllIdsMapLock.Lock()
+		p.AllIdsMap[id] = &ID{Id: id, P: p1}
+		p.AllIdsMapLock.Unlock()
+		for _, m := range pids {
+			m.Arr = append(m.Arr, id)
+		}
+	}
+	return bFindProject, findPid, k, comStr, mergePro
+}
+
+//内部合并
+func (p *ProjectTask) innerMerge(pInfo *ProjectInfo, info *Info, tmp map[string]interface{}, tmpPro map[string]interface{}) {
+	infoList := []interface{}(tmpPro["list"].(primitive.A))
+	for index, info1 := range infoList{
+		info2 := info1.(map[string]interface{})
+		if info2["infoid"] == info.Id {
+			deleteSlice1(infoList, info1)
+			continue
+		}
+		mergeProject(p, pInfo, info, tmpPro, index)
+	}
+	set := map[string]interface{}{
+		"$set": tmpPro,
+	}
+	MongoTool.UpdateById(ProjectColl, pInfo.Id.Hex(), set)
+}
+
+//合并字段到project
+func mergeProject(p *ProjectTask, pInfo *ProjectInfo, thisinfo *Info, set map[string]interface{}, index int) map[string]interface{} {
+	//1--firsttime
+	if thisinfo.Publishtime < pInfo.FirstTime && thisinfo.Publishtime > 0 {
+		pInfo.FirstTime = thisinfo.Publishtime
+		set["firsttime"] = thisinfo.Publishtime
+		if thisinfo.TopType == "招标" {
+			set["zbtime"] = thisinfo.Publishtime
+		}
+	}
+	//2--lasttime
+	if thisinfo.Publishtime > pInfo.LastTime {
+		pInfo.LastTime = thisinfo.Publishtime
+		set["lasttime"] = thisinfo.Publishtime
+		p.mapBidLock.Lock()
+		bt := bidtype[thisinfo.SubType]
+		p.mapBidLock.Unlock()
+		if bt != "" {
+			set["bidtype"] = bt
+		}
+		if thisinfo.SubType != "" {
+			set["bidstatus"] = thisinfo.SubType
+			if thisinfo.SubType != "预告" {
+				set["jgtime"] = thisinfo.Publishtime
+			}
+		}else if thisinfo.Infoformat == 2 {
+			set["bidstatus"] = "拟建"
+		}else if thisinfo.SubType == "招标" {
+			set["bidstatus"] = thisinfo.TopType
+		}else {
+			set["bidstatus"] = thisinfo.SubType
+		}
+
+	}
+
+	//3\4\5--省、市、县
+	if thisinfo.Area != "全国" {
+		//xt := true
+		if pInfo.Area == "全国" {
+			pInfo.Area = thisinfo.Area
+			set["area"] = thisinfo.Area
+		} else if pInfo.Area != thisinfo.Area {
+			//xt = false
+		}
+		if pInfo.City == "" && thisinfo.City != "" {
+			pInfo.City = thisinfo.City
+			set["city"] = thisinfo.City
+		} else if pInfo.City != thisinfo.City {
+			//xt = false
+		}
+		if thisinfo.District != "" && pInfo.District == "" {
+			pInfo.District = thisinfo.District
+			set["district"] = thisinfo.District
+		}
+	}
+	//6--项目名称
+	if (thisinfo.ProjectName != "" && pInfo.ProjectName == "") || (len([]rune(pInfo.ProjectName)) < 6 && thisinfo.LenPN > 6) {
+		pInfo.ProjectName = thisinfo.ProjectName
+		set["projectname"] = thisinfo.ProjectName
+	}
+	//7--项目编号
+	if (pInfo.ProjectCode == "" && thisinfo.ProjectCode != "") || (len([]rune(pInfo.ProjectCode)) < 6 && len([]rune(thisinfo.ProjectCode)) > 6) {
+		pInfo.ProjectCode = thisinfo.ProjectCode
+		set["projectcode"] = thisinfo.ProjectCode
+	}
+	//7--采购单位
+	if (pInfo.Buyer == "" && thisinfo.Buyer != "") || (len([]rune(pInfo.Buyer)) < 5 && len([]rune(thisinfo.Buyer)) > 5) {
+		pInfo.Buyer = thisinfo.Buyer
+		set["buyer"] = thisinfo.Buyer
+
+		pInfo.Buyerclass = thisinfo.Buyerclass
+		set["buyerclass"] = thisinfo.Buyerclass
+	}
+	if pInfo.Buyer == "" {
+		set["buyerclass"] = ""
+	}
+
+	//8--代理机构
+	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
+		pInfo.Agency = thisinfo.Agency
+		set["agency"] = thisinfo.Agency
+	}
+	//9--采购单位联系人
+	if thisinfo.Buyerperson != "" && strings.Index(pInfo.Buyerperson, thisinfo.Buyerperson) < 0 {
+		pInfo.Buyerperson = thisinfo.Buyerperson
+		set["buyerperson"] = pInfo.Buyerperson
+	}
+	//10--采购单位電話
+	if thisinfo.Buyertel != "" && strings.Index(pInfo.Buyertel, thisinfo.Buyertel) < 0 {
+		pInfo.Buyertel = thisinfo.Buyertel
+		set["buyertel"] = pInfo.Buyertel
+	}
+
+	if thisinfo.Bidopentime > pInfo.Bidopentime {
+		pInfo.Bidopentime = thisinfo.Bidopentime
+		set["bidopentime"] = pInfo.Bidopentime
+	}
+	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
+		pInfo.Bidamount = thisinfo.Bidamount
+		set["bidamount"] = pInfo.Bidamount
+	}
+
+	if thisinfo.Budget > 0 && pInfo.Budget < 1 {
+		pInfo.Budget = thisinfo.Budget
+		set["budget"] = pInfo.Budget
+	}
+
+	if len(thisinfo.Topscopeclass) > 0 {
+		sort.Strings(pInfo.Topscopeclass)
+		for _, k := range thisinfo.Topscopeclass {
+			if BinarySearch(pInfo.Topscopeclass, k) == -1 {
+				pInfo.Topscopeclass = append(pInfo.Topscopeclass, k)
+				sort.Strings(pInfo.Topscopeclass)
+			}
+		}
+		set["topscopeclass"] = pInfo.Topscopeclass
+	}
+
+	if len(thisinfo.Subscopeclass) > 0 {
+		sort.Strings(pInfo.Subscopeclass)
+		for _, k := range thisinfo.Subscopeclass {
+			if BinarySearch(pInfo.Subscopeclass, k) == -1 {
+				pInfo.Subscopeclass = append(pInfo.Subscopeclass, k)
+				sort.Strings(pInfo.Subscopeclass)
+			}
+		}
+		set["subscopeclass"] = pInfo.Subscopeclass
+		set["s_subscopeclass"] = strings.Join(pInfo.Subscopeclass, ",")
+	}
+	//winner
+	if len(thisinfo.Winners) > 0 {
+		sort.Strings(pInfo.Winners)
+		for _, k := range thisinfo.Winners {
+			if BinarySearch(pInfo.Winners, k) == -1 {
+				pInfo.Winners = append(pInfo.Winners, k)
+				sort.Strings(pInfo.Winners)
+			}
+		}
+		//set["winners"] = pInfo.Winners
+		set["s_winner"] = strings.Join(pInfo.Winners, ",")
+	}
+
+	if thisinfo.HasPackage {
+		pkg, _, _ := PackageFormat(thisinfo, pInfo)
+		set["multipackage"] = 1
+		pInfo.Package = pkg
+	}else {
+		set["multipackage"] = 0
+	}
+
+	set["mpn"] = pInfo.MPN
+	set["mpc"] = pInfo.MPC
+	set["pici"] = p.pici
+
+	return set
+}
+
+func deleteSlice1(arr []interface{}, v interface{}) []interface{} {
+	for k, v1 := range arr {
+		if reflect.DeepEqual(v1, v) {
+			return append(arr[:k], arr[k+1:]...)
+		}
+	}
+	return arr
+}
+
+func updateValue(info map[string]interface{}, tmp map[string]interface{}) map[string]interface{} {
+	for k := range info{
+		for k1 := range tmp {
+			if k == k1 && info[k] != tmp[k1] {
+				info[k] = tmp[k]
+				break
+			}
+		}
+	}
+	return info
+}
+
+//备份(快照)
+func backupPro(tmp map[string]interface{}) {
+	tmp1 := make(map[string]interface{})
+	for k, v := range tmp{
+		tmp1[k] = v
+	}
+	if Sysconfig["backupFlag"].(bool) {
+		tmp1["sourceprojectid"] = tmp1["_id"]
+		delete(tmp1, "_id")
+		MongoTool.Save(BackupColl, tmp1)
+	}
+}

+ 3 - 3
src/config.json

@@ -3,18 +3,18 @@
     "mgodb": "192.168.3.207:27092",
     "dbsize": 10,
     "dbname": "extract_kf",
-    "redis": "buyer=127.0.0.1:6379,winner=127.0.0.1:6379,agency=127.0.0.1:6379",
+    "redis": "buyer=192.168.3.207:1679,winner=192.168.3.207:1679,agency=192.168.3.207:1679",
     "elasticsearch": "http://192.168.3.11:9800",
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": false,
+    "saveresult": true,
     "qualityaudit": false,
     "saveblock": false,
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,
-    "udptaskid": "5dc92520d0fcef1a582f869d",
+    "udptaskid": "5cdd3025698414032c8322b1",
     "udpport": "1484",
     "nextNode": [
         {

+ 7 - 0
src/jy/clear/clear.go

@@ -27,6 +27,7 @@ func init() {
 	BindFn("getPhone", GetPhone)                 //取手机号
 	BindFn("chiToEng", ChiToEng)                 //中文符号转英文
 	BindFn("clearBuyerPerson", ClearBuyerPerson) //处理较长采购联系人
+	BindFn("clearNumber", ClearNumber)           //一般用于处理抽取联系人后带有电话的情况
 }
 
 //绑定清理方法
@@ -56,3 +57,9 @@ func GetPhone(data []interface{}) []interface{} {
 	data[0] = PhoneReg.FindString(fmt.Sprint(data[0]))
 	return data
 }
+
+//去除数字
+func ClearNumber(data []interface{}) []interface{} {
+	data[0] = clearNum.ReplaceAllString(fmt.Sprint(data[0]), "")
+	return data
+}

+ 2 - 0
src/jy/clear/cutspace.go

@@ -12,6 +12,7 @@ var (
 	catSymbol      *regexp.Regexp
 	separateSymbol *regexp.Regexp
 	placeReg       *regexp.Regexp
+	clearNum       *regexp.Regexp
 )
 
 var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
@@ -22,6 +23,7 @@ func init() {
 	catSymbol, _ = regexp.Compile(`[]+`)
 	separateSymbol, _ = regexp.Compile("[\\s\u3000\u2003\u00a0\\n,,、/。|]")
 	placeReg, _ = regexp.Compile("^.*(公司|学(校)?|集团|单位|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合(会|体)|工作室)$")
+	clearNum, _ = regexp.Compile("\\d+")
 }
 
 var LableStr = "&?(amp|nbsp|#8266);?|(&lt;).*?(&gt;?)"

+ 1 - 1
src/jy/clear/specialsymbols.go

@@ -226,7 +226,7 @@ func RemoveAsy(text string) string {
 		if AsyReg.MatchString(first) {
 			textRune = textRune[1:]
 		}
-		if len(textRune) > 0 && AsyReg.MatchString(last) && len(text) > 0 {
+		if len(textRune) > 0 && AsyReg.MatchString(last) {
 			textRune = textRune[:len(textRune)-1]
 		}
 		text = string(textRune)

+ 2 - 9
src/jy/extract/extract.go

@@ -47,16 +47,12 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitSite()
 	ext.InitRulePres()
 	ext.InitRuleBacks(false)
-	ext.InitRuleBacks(true)
 	ext.InitRuleCore(false)
-	ext.InitRuleCore(true)
 	ext.InitPkgCore()
 	ext.InitBlockRule()
 	ext.InfoTypeList()
 	ext.InitTag(false)
-	ext.InitTag(true)
 	ext.InitClearFn(false)
-	ext.InitClearFn(true)
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		ext.InitCityInfo()
@@ -129,16 +125,12 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitSite()
 	ext.InitRulePres()
 	ext.InitRuleBacks(false)
-	ext.InitRuleBacks(true)
 	ext.InitRuleCore(false)
-	ext.InitRuleCore(true)
 	ext.InitPkgCore()
 	ext.InitBlockRule()
 	ext.InfoTypeList()
 	ext.InitTag(false)
-	ext.InitTag(true)
 	ext.InitClearFn(false)
-	ext.InitClearFn(true)
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		//ext.InitCityDFA()
@@ -440,7 +432,7 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
 		}
 	}
 
-	//分析抽取结果并保存 todo
+	//分析抽取结果并保存
 	AnalysisSaveResult(j, jf, e)
 	<-e.TaskInfo.ProcessPool
 }
@@ -1104,6 +1096,7 @@ func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kv
 						"blocktag":    bl.Classify,
 						"weight":      vv.Weight,
 					})
+					break //暂定取第一个
 				}
 			}
 		}

+ 9 - 8
src/jy/extract/extractInit.go

@@ -2,7 +2,6 @@
 package extract
 
 import (
-	"gopkg.in/mgo.v2/bson"
 	db "jy/mongodbutil"
 	ju "jy/util"
 	qu "qfw/util"
@@ -13,6 +12,8 @@ import (
 	"sync"
 	"time"
 
+	"gopkg.in/mgo.v2/bson"
+
 	log "github.com/donnie4w/go-logger/logger"
 	"github.com/go-ego/gse"
 )
@@ -86,18 +87,18 @@ type ExtractTask struct {
 	ResultChanel chan bool //抽取结果详情
 	sync.RWMutex
 	ResultArr [][]map[string]interface {
-	}                   //抽取结果详情
+	} //抽取结果详情
 	BidChanel chan bool //抽取结果
 	BidArr    [][]map[string]interface {
-	}                   //抽取结果
-	BidTotal int        //结果数量
+	} //抽取结果
+	BidTotal int //结果数量
 
 	RecogFieldMap map[string]map[string]interface {
-	}                    //识别字段
+	} //识别字段
 	FidClassMap map[string][]map[string]interface {
-	}                    //分类
+	} //分类
 	CidRuleMap map[string][]map[string]interface {
-	}                    //规则
+	} //规则
 	AuditFields []string //需要审核的字段名称
 
 	SiteCityMap          map[string]*SiteCity //站点对应的省市区
@@ -1612,7 +1613,7 @@ func (e *ExtractTask) InitAuditRule() {
 func (e *ExtractTask) InitAuditFields() {
 	if len(e.AuditFields) == 0 {
 		v, _ := db.Mgo.FindOne("version", `{"isuse":true,"delete":false}`) //查找当前使用版本
-		if v != nil && len(*v) > 0 { //查找当前使用版本中属性配置需要审核的字段
+		if v != nil && len(*v) > 0 {                                       //查找当前使用版本中属性配置需要审核的字段
 			vid := qu.BsonIdToSId((*v)["_id"])
 			query := map[string]interface{}{
 				"isaudit": true,

+ 46 - 20
src/jy/pretreated/analytable.go

@@ -94,11 +94,12 @@ var (
 	projectnameReg = regexp.MustCompile("((公开)?招标)*[((第]*[一二三四五六七八九十a-zA-Z0-9]+(标段|包|标|段)[))]*$")
 	MhSpilt        = regexp.MustCompile("[::]")
 	//识别采购单位联系人、联系电话、代理机构联系人、联系电话
-	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|名称|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?((电话([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
+	ContactInfoVagueReg = regexp.MustCompile("邮政编码|邮编|(征求意见|报名审核购买)?((联系人?(及|和)?|办公|单位)?(((联系)?(电话|方式)([//]传真|及手机)?|手机)(号码)?|邮箱(地址)?|(详细)?(地(址|点)))|(联系|收料)(人(姓名)?|方式)|传真|电子邮件|(主要负责|项目(负责|联系)|经办)人)|采购方代表")
 	ContactInfoMustReg  = regexp.MustCompile("^(" + ContactInfoVagueReg.String() + ")$")
 	ContactType         = map[string]*regexp.Regexp{
 		"采购单位": regexp.MustCompile("(采购(项目.{2}|服务)?|比选|询价|招标(服务)?|甲|建设|委托|发包|业主|使用|谈判|本招标项目经办|征求意见联系|项目实施)(人|单位|部门|机构|机关|(执行)?方$)|(项目|建(库|设))单位|招标人信息|采购中心地址|业主|收料人|采购部"),
 		"代理机构": regexp.MustCompile("(代理|受托|集中采购).{0,2}(人|方|单位|公司|机构)|招标机构|采购代理"),
+		"中标单位": regexp.MustCompile("拟?(中标|成交|中选|供(货|应))[^候选]{0,2}(人|方|单位|公司|(服务|供应)?商|企业)"),
 	}
 	ContactBuyerPersonFilterReg = regexp.MustCompile("(管理局)$")
 	MultipleValueSplitReg       = regexp.MustCompile("[,,、\\s\u3000\u2003\u00a0]")
@@ -110,7 +111,7 @@ var (
 	jsonReg                     = regexp.MustCompile(`\{.+:[^}]*\} `) //  \{".*\":\".+\"}
 	regHz                       = regexp.MustCompile("[\u4e00-\u9fa5]")
 	winnerOrderAndBidResult     = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
-	WinnerOrderStr = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|\[大中小\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`)
+	WinnerOrderStr              = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|\[大中小\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`)
 )
 
 //在解析时,判断表格元素是否隐藏
@@ -415,11 +416,11 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 
 										}
 									}
-								}else if kv == "预算"{
-									if strings.Contains(k,"万元"){
+								} else if kv == "预算" {
+									if strings.Contains(k, "万元") {
 										for vsk, vsv := range vs {
-											if !strings.Contains(vsv,"万元"){
-												vs[vsk] = vsv+"万元"
+											if !strings.Contains(vsv, "万元") {
+												vs[vsk] = vsv + "万元"
 											}
 										}
 									}
@@ -460,8 +461,8 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 			}
 			for kk, vv := range kvTags {
 				if vsss, ok := v.([]string); ok {
-					if len(vv) > 0{
-						for _,vvvvvv := range vsss{
+					if len(vv) > 0 {
+						for _, vvvvvv := range vsss {
 							tmp := u.Tag{}
 							tmp.Weight = vv[0].Weight
 							tmp.Key = vv[0].Key
@@ -1992,17 +1993,17 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 			for nk, v := range index {
 				if tn.BlockPackage.Map[v] == nil {
 					kv := u.NewJobKv()
-					for tnk,tnv := range tn.StandKV{
-						if nk >= len(tnv){
+					for tnk, tnv := range tn.StandKV {
+						if nk >= len(tnv) {
 							continue
 						}
 						kv.KvTags[tnk] = append(kv.KvTags[tnk], tnv[nk])
 					}
 					//kv.KvTags = tn.StandKV
 					bp := &u.BlockPackage{}
-					bp.Index = v                  //序号 (转换后编号,只有数字或字母)
-					bp.Origin = oldIndex[nk]      //包的原始值
-					bp.TableKV = kv               //table kv (分出的对应的KV值)
+					bp.Index = v             //序号 (转换后编号,只有数字或字母)
+					bp.Origin = oldIndex[nk] //包的原始值
+					bp.TableKV = kv          //table kv (分出的对应的KV值)
 					bp.Text = tn.Html
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
@@ -2524,7 +2525,7 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, co
 	reCreate := false
 	matchCount := 0
 	contactTypeTagMap := map[string]map[string][]interface{}{}
-	//u.Debug(mustMatchFirst, indexMap, matchMap)
+	//qutil.Debug("============================", mustMatchFirst, indexMap, matchMap)
 	notMatchTrCount := 0
 	allAscFind := true //开启正序查询
 	//涉及变量allAscFind,indexMap
@@ -2536,23 +2537,30 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, co
 		for _, tr := range tn.TRs {
 			for td_index, td := range tr.TDs {
 				thisTdKvs := tn.tdkv(td) //获取td冒号kv
+				//qutil.Debug(td.Val, len(thisTdKvs))
+				//				for _, v := range thisTdKvs {
+				//					qutil.Debug(v.Key, v.Value)
+				//				}
 				if len(thisTdKvs) != 1 {
 					continue
 				}
 				//1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
 				goOnFunc, isContinue, td_k := tn.tdKV(thisTdKvs[0].Key, &matchPrevFlag, &isCanAddToIndexMap, &indexMap, "LS")
+				//qutil.Debug("goOnFunc---", goOnFunc, "isContinue---", isContinue, "indexMap---", indexMap, "isCanAddToIndexMap---", isCanAddToIndexMap)
 				if !goOnFunc {
 					break LS
 				}
 				if isContinue {
 					continue
 				}
-				//采购单位,代理机构
+				//采购单位,代理机构,中标单位
+				//qutil.Debug("td_k---", td_k, HasOrderContactType(td_k))
 				for _, k := range HasOrderContactType(td_k) {
-					if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构跳过
+					if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构,中标单位跳过
 						continue
 					}
 					if len(indexMap) == 0 {
+						//qutil.Debug("isCanAddToIndexMap---", isCanAddToIndexMap, "prevCanAddToIndexMap---", prevCanAddToIndexMap, len(tr.TDs))
 						if isCanAddToIndexMap || (prevCanAddToIndexMap && len(tr.TDs) == 1) {
 							myPrevTdVal := ""
 							if td_index-2 >= 0 {
@@ -2578,6 +2586,8 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat, isSite bool, co
 		}
 	}
 	//////
+	//qutil.Debug("indexMap-------------------------", indexMap)
+	//indexMap = map[int]string{}
 L:
 	for tr_index, tr := range tn.TRs {
 		thisTrHasMatch := false
@@ -2591,19 +2601,33 @@ L:
 				thisTdKvs = tn.tdkv(td) //获取冒号kv
 			}
 			tdAscFind := true //开启td正序查询
+			//qutil.Debug("---", td.Val, len(thisTdKvs), len(indexMap))
 			if len(thisTdKvs) == 0 {
 				continue
 			} else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
 				//采购人在联系人、电话后面的处理
 				tdAscFind = tn.hasIndexMap(thisTdKvs, &indexMap, tdAscFind)
 			}
+			//qutil.Debug(len(thisTdKvs), len(tr.TDs))
+			if len(thisTdKvs) >= 2 && len(tr.TDs) == 1 { //td中包含多个kv值 5d6b2aa2a5cb26b9b73e79d2
+				tmpIndexMap := map[int]string{}
+				start := 0
+				for _, td_kv := range thisTdKvs {
+					//qutil.Debug(td_kv.Key)
+					for _, k := range HasOrderContactType(td_kv.Key) {
+						tmpIndexMap[start] = k
+						start++
+					}
+				}
+				indexMap = tmpIndexMap
+			}
 			prevKey := ""
 			oldIndexMapLength := len(indexMap)
 			thidTdIndex := td_index
 			//notmatchCount := 0
 			kvTitle := ""
+			//qutil.Debug("indexMap++++++++++++++++++", indexMap, oldIndexMapLength)
 			for _, td_kv := range thisTdKvs {
-				//u.Debug(td_kv.Key, td_kv.Value, td_kv.Title)
 				iscontinue := false
 				td_v := td_kv.Value
 				td_k := FilterContactKey(td_kv.Key) //带括号()[]的采购单位,代理机构处理
@@ -2612,6 +2636,7 @@ L:
 					continue
 				}
 				//都为正序查询
+				//qutil.Debug("td_k+++", td_k, "td_v+++", td_v, "allAscFind+++", allAscFind, "tdAscFind+++", tdAscFind)
 				if allAscFind && tdAscFind {
 					//都为正序查询处理
 					matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex, isSite, codeSite)
@@ -2626,7 +2651,6 @@ L:
 					indexMap = map[int]string{}
 				}
 				kvTitle = td_kv.Title
-				//u.Debug(indexMap, td_k, td_v, matchMap)
 				if td_k_length < 2 || td_k_length > 10 {
 					continue
 				}
@@ -2663,11 +2687,13 @@ L:
 					}
 					//u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
 					//myContactType
+
 					myContactType := indexMap[index]
 					if myContactType == "" && len(indexMap) == 1 {
 						_, onlyContactType := u.FirstKeyValueInMap(indexMap)
 						myContactType, _ = onlyContactType.(string)
 					}
+					//qutil.Debug("indexMap+++", indexMap, "index+++", index, "myContactType+++", myContactType)
 					if myContactType == "" {
 						continue
 					}
@@ -2687,7 +2713,7 @@ L:
 					modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index, isSite, codeSite)
 				}
 			}
-			//u.Debug(td.SortKV.Map)
+			//qutil.Debug("map===", td.SortKV.Map)
 		}
 		if allAscFind && !thisTrHasMatch {
 			notMatchTrCount++
@@ -3217,7 +3243,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 56 - 23
src/jy/pretreated/colonkv.go

@@ -67,10 +67,10 @@ func (ce *ColonkvEntity) divisionMoreKV(con string) string {
 }
 
 //获取冒号kv入口
-func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int,isSite bool,codeSite string) ([]*Kv, map[string]string) {
+func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int, isSite bool, codeSite string) ([]*Kv, map[string]string) {
 	kvs := ce.GetKvs(con, title, from)
 	if from == 1 {
-		FormatContactKv(&kvs, title, nil, contactFormat,isSite,codeSite)
+		FormatContactKv(&kvs, title, nil, contactFormat, isSite, codeSite)
 	}
 	kv := map[string]string{}
 	for _, v := range kvs {
@@ -163,14 +163,14 @@ func (ce *ColonkvEntity) getColonKv(con, title string, from int) []*Kv {
 }
 
 //冒号kv和空格kv结合
-func (ce *ColonkvEntity) getColonSpaceKV(con string,isSite bool,codeSite string) []*Kv {
+func (ce *ColonkvEntity) getColonSpaceKV(con string, isSite bool, codeSite string) []*Kv {
 	con = colonkvEntity.processText(con)
 	lines := SspacekvEntity.getLines(con)
 	kvMaps := []*Kv{}
 	for _, line := range lines {
 		kvs := colonkvEntity.getColonKv(line, "", 1)
 		if len(kvs) == 0 {
-			kv := SspacekvEntity.divideKV(line,isSite,codeSite)
+			kv := SspacekvEntity.divideKV(line, isSite, codeSite)
 			if kv != nil {
 				kvMaps = append(kvMaps, kv...)
 			}
@@ -276,7 +276,7 @@ func IsContactKvHandle(value string, m map[string]bool) bool {
 
 //kv关于联系人信息的处理
 //采购人>集中采购机构
-func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat,isSite bool,codeSite string) {
+func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat, isSite bool, codeSite string) {
 	////////////////////////////
 	//处理联系人信息
 	var indexMap map[int]string
@@ -294,14 +294,19 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 	ascFind := true
 	ascFindFlag := len(indexMap) == 0 && buyers == nil
 	//采购人在联系人、电话后面的处理
-	isCanAddToIndexMap := false
+	//qutil.Debug("indexMap---", indexMap)
+	//qutil.Debug("ascFind---", ascFind, "ascFindFlag---", ascFindFlag, "isCanAddToIndex---", isCanAddToIndexMap)
 	for _, kv := range *kvs {
-		k := FilterContactKey(kv.Key)
+		isCanAddToIndexMap := false
+		k := FilterContactKey(kv.Key) //过滤key
+		//qutil.Debug(k, "---", kv.Value)
 		k_length := len([]rune(k))
 		if k_length < 2 || k_length > 15 {
 			continue
 		}
-		isContinue := ContactInfoMustReg.MatchString(k)
+		isContinue := ContactInfoMustReg.MatchString(k) //精确匹配 邮编、电话、联系人等
+		//qutil.Debug("isContinue---", isContinue, ContactInfoVagueReg.MatchString(k), IsMapHasValue(k, ContactType), ascFindFlag)
+		//qutil.Debug((isContinue || (ContactInfoVagueReg.MatchString(k) && IsMapHasValue(k, ContactType))) && ascFindFlag)
 		if (isContinue || (ContactInfoVagueReg.MatchString(k) && IsMapHasValue(k, ContactType))) && ascFindFlag {
 			if len(indexMap) > 0 {
 				ascFind = true
@@ -311,30 +316,39 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 			isCanAddToIndexMap = true
 		}
 		n := 1
+		//qutil.Debug("isCanAddToIndexMap---", isCanAddToIndexMap, "ascFind---", ascFind, "ascFindFlag---", ascFindFlag, "indexMap---", indexMap)
+		//qutil.Debug(" HasOrderContactType(k)---", HasOrderContactType(k))
 		for _, ct_k := range HasOrderContactType(k) {
+			//qutil.Debug("ct_k---", ct_k, !ContactType[ct_k].MatchString(k))
 			if !ContactType[ct_k].MatchString(k) {
 				continue
 			}
 			totalIndexMap[ct_k] = true
+			//qutil.Debug(isContinue, !ascFindFlag, totalIndexMap)
 			/////////////////////////////
 			if isContinue || !ascFindFlag {
 				continue
 			}
-			//			if isCanAddToIndexMap && len(indexMap) == 0 {
+			//qutil.Debug("isCanAddToIndexMap---", isCanAddToIndexMap)
 			if isCanAddToIndexMap {
 				indexMap[n] = ct_k
 				n++
 				ascFind = false
 			}
+			//qutil.Debug(n, ascFind, indexMap)
 		}
+		//qutil.Debug("indexMap---", indexMap)
 	}
+	//qutil.Debug("indexMap1---", indexMap)
 	mustMatchFirst := len(indexMap) > 0 //第一个必须匹配上
 	titleMatch := false
+	//qutil.Debug("title---", title, ContactTypeTitleMatch(title))
 	if titleMatchType := ContactTypeTitleMatch(title); titleMatchType != "" {
 		titleMatch = true
 		mustMatchFirst = false
 		indexMap = map[int]string{1: titleMatchType}
 	}
+	//qutil.Debug("titleMatch---", titleMatch, "mustMatchFirst---", mustMatchFirst, "indexMap---", indexMap)
 	//	if titleMatchType := ContactTypeTitleMatch(title); len(titleMatchType) != 0 {
 	//		titleMatch = true
 	//		mustMatchFirst = false
@@ -351,6 +365,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 	//			Debug("bbbbbbbbbb", kv.Key, kv.Value)
 	//		}
 	//	}
+	//qutil.Debug("=========================================================")
 	startIndex := 0
 	prevKey := ""
 	index, tmpindex, notmatchCount, allMatchCount := 0, 0, 0, 0
@@ -360,6 +375,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 	copy(kvsTemp, *kvs)
 	//again := 0
 	ishad := false
+	afterWinner := false
 	for kv_index, kv := range *kvs {
 		isBreak := true
 		v := strings.TrimSpace(kv.Value)
@@ -367,6 +383,12 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 		isContinue := false
 		k := FilterContactKey(kv.Key)
 		k_length := len([]rune(k))
+		//3.4新增winnerperson和winnertel抽取
+		if indexMap[1] == "中标单位" && ContactInfoMustReg.MatchString(k) { //中标后是否出现电话、联系人、地址等信息
+			//qutil.Debug("kkkkkk:", k, indexMap)
+			afterWinner = true
+		}
+		//qutil.Debug(kv.Key, "++++++++++", kv.Value, buyers != nil, ascFind, isContinue)
 		if buyers != nil {
 			for _, buyer := range buyers {
 				if buyer == "" {
@@ -415,13 +437,16 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 				}
 			}
 		} else if ascFind {
+			//qutil.Debug("HasOrderContactType(k)+++", HasOrderContactType(k))
 			for _, ct_k := range HasOrderContactType(k) {
 				ishad = false
+				//qutil.Debug("ct_k+++", ct_k, "ishad+++", ishad)
 				//again++
 				if k_length < 3 || k_length > 15 {
 					isBreak = false
 					continue
 				}
+				//qutil.Debug("+++", !ContactType[ct_k].MatchString(k))
 				if !ContactType[ct_k].MatchString(k) {
 					continue
 				}
@@ -436,7 +461,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 						matchMap[ct_k] = map[string]bool{}
 					}
 					if !strings.HasSuffix(k, "方式") {
-						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts,isSite,codeSite)
+						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts, isSite, codeSite)
 						if len(kvTags) == 1 {
 							tagVal, weightVal := FirstKeyValueInMap(kvTags)
 							if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
@@ -468,7 +493,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 					}
 				}
 				if ct_k == "采购单位" { //打标签,权重高的重新覆盖
-					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"},isSite,codeSite)
+					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"}, isSite, codeSite)
 					tagVal, weightVal := FirstKeyValueInMap(kvTags)
 					if tagVal == ct_k {
 						if weightMap[ct_k][ct_k] == nil || (weightVal != nil && weightVal.(int) > weightMap[ct_k][ct_k].(int)) {
@@ -503,6 +528,13 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 				isContinue = true
 			}
 		}
+		//qutil.Debug(len(indexMap), !afterWinner)
+		if len(indexMap) == 2 && !afterWinner { //处理同时出现winner、buyer、agency 5d6b2aa2a5cb26b9b73e79d3
+			//qutil.Debug("+++++++++++++++++++")
+			delete(indexMap, 1)
+			indexMap = map[int]string{1: indexMap[2]}
+		}
+		//qutil.Debug("isContinue+++", isContinue, indexMap)
 		if isContinue {
 			continue
 		}
@@ -557,6 +589,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 				index++
 			}
 		}
+		//qutil.Debug("index+++", index, "prevKey+++", prevKey, "indexmap+++", indexMap)
 		//		if startIndex == 0 || startIndex%2 == 1 || index == 0 {
 		//			index = 1
 		//		} else if startIndex%2 == 0 {
@@ -569,6 +602,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 			continue
 		}
 		myContactType := indexMap[index]
+		//qutil.Debug("myContactType+++", myContactType)
 		if myContactType == "" {
 			continue
 		}
@@ -595,7 +629,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 		allMatchCount++
 		delete(totalIndexMap, myContactType)
 		if !strings.HasSuffix(k, "方式") {
-			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts,isSite,codeSite)
+			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts, isSite, codeSite)
 			if len(kvTags) == 1 {
 				tagVal, _ := FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
@@ -619,6 +653,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 		kvTemp := *kv
 		kvTemp.Key = myContactType + k
 		kvTemp.Value = v
+		//qutil.Debug(kvTemp.Key, "----------------", kvTemp.Value)
 		(*kvs)[kv_index] = &kvTemp
 		if ascFind && isBreak && len(indexMap) > 0 {
 			break
@@ -707,21 +742,19 @@ func HasOrderContactType(text string) []string {
 
 //两种冒号kv结合到一起
 //from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
-func GetKVAll(content, title string, contactFormat *ContactFormat, from int,isSite bool,codeSite string) *JobKv {
+func GetKVAll(content, title string, contactFormat *ContactFormat, from int, isSite bool, codeSite string) *JobKv {
 	content = formatText(content, "kv")
-	//log.Println(content)
-	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite,codeSite)
 	//	for _, kvs := range m1Kvs {
 	//		qutil.Debug(kvs.Key, kvs.Value)
 	//	}
-	kvTags := GetKvTags(m1Kvs, title, nil,isSite,codeSite)
+	kvTags := GetKvTags(m1Kvs, title, nil, isSite, codeSite)
 	//	for k, kvs := range kvTags {
 	//		qutil.Debug("kkkkk--", k)
 	//		for _, kv := range kvs {
 	//			qutil.Debug(kv.Key, kv.Value)
 	//		}
 	//	}
-	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from,isSite,codeSite)
+	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from, isSite, codeSite)
 	//	for k, kvs := range m2KvTags {
 	//		qutil.Debug("kkkkk--", k)
 	//		for _, kv := range kvs {
@@ -775,7 +808,7 @@ func PrintKvTags(kvTags map[string][]*Tag) {
 }
 
 //KVTags转kv
-func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite string) map[string][]*Tag {
+func GetKvTags(findkvs []*Kv, title string, tagdbs []string, isSite bool, codeSite string) map[string][]*Tag {
 	kvTags := map[string][]*Tag{}
 	if title != "" && BlockTagMap[title] {
 		kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
@@ -793,17 +826,17 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite
 		}
 		key = colonkvEntity.blockTitleKV(title, key)
 		//先用新的key
-		tags := GetAppointTags(key, tagdbs,isSite,codeSite) //找标签库
+		tags := GetAppointTags(key, tagdbs, isSite, codeSite) //找标签库
 		if len(tags) == 0 && len(key) < 10 && len(title) > 0 && len(title) < 15 {
 			key = title + key
-			tags = GetAppointTags(key, tagdbs,isSite,codeSite)
+			tags = GetAppointTags(key, tagdbs, isSite, codeSite)
 		}
 		//再用老的key
 		if len(tags) == 0 && k != key {
-			tags = GetAppointTags(k, tagdbs,isSite,codeSite)
+			tags = GetAppointTags(k, tagdbs, isSite, codeSite)
 			if len(tags) == 0 && len(k) < 10 && len(title) > 0 && len(title) < 15 {
 				k = title + k
-				tags = GetAppointTags(k, tagdbs,isSite,codeSite)
+				tags = GetAppointTags(k, tagdbs, isSite, codeSite)
 				if len(tags) > 0 {
 					key = k
 				}
@@ -825,7 +858,7 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite
 							if strings.TrimSpace(nextval) == "" {
 								continue
 							}
-							if GetAppointTags(nextval, tagdbs,isSite,codeSite).Len() > 0 || GetAppointTags(k, tagdbs,isSite,codeSite).Len() > 0 {
+							if GetAppointTags(nextval, tagdbs, isSite, codeSite).Len() > 0 || GetAppointTags(k, tagdbs, isSite, codeSite).Len() > 0 {
 								continue
 							}
 						}

+ 12 - 15
src/main_blocktest.go

@@ -16,7 +16,7 @@ import (
 var f *os.File
 var m = map[string]bool{}
 
-func main12() {
+func main1() {
 	//winnerorder()
 	//return
 	//log.Println(pretreated.ProcTitle("以上公告内容如有变动将在相关网络媒体上另行通知凡购买本招标文件的单位必须就此采购项目的相关事宜详细咨询否则参与投标即被视为已经充分了解了招标方的需求中标后承担该文件范围内的所有要求投标前如对招标文件存有疑问请在投标截止日期前三个工作日以实名制书面文件向我公司询问否则视为接受已报名购买招标文件的投标商未递交投标文件或虽递交投标文件但未参加开标大会的投标商不得再参加该项目的采购活动"))
@@ -51,7 +51,7 @@ func all() {
 }
 func one() {
 	m := mongodbutil.MgoFactory(3, 3, 120, "192.168.3.207:27081", "qfw")
-	d, _ := m.FindById("bidding", "5d424df7a5cb26b9b7b61fde", extract.Fields)
+	d, _ := m.FindById("bidding", "5d6b2aa2a5cb26b9b73e79d2", extract.Fields)
 	com(*d)
 }
 func com(doc map[string]interface{}) {
@@ -67,12 +67,11 @@ func com(doc map[string]interface{}) {
 	}
 	e := &extract.ExtractTask{
 		TaskInfo: &extract.TaskInfo{
-			Version:     "V3.1.2",
-			VersionId:   "5cdd1c70e138234848c1d703",
+			Version:     "v3.6",
+			VersionId:   "5cdd3025698414032c8322b1",
 			ProcessPool: make(chan bool, 1),
 		},
 	}
-
 	e.Id = qu.ObjToString(ju.Config["udptaskid"])
 	e.InitTaskInfo()
 	//d.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
@@ -80,15 +79,11 @@ func com(doc map[string]interface{}) {
 	e.InitSite()
 	e.InitRulePres()
 	e.InitRuleBacks(false)
-	e.InitRuleBacks(true)
 	e.InitRuleCore(false)
-	e.InitRuleCore(true)
 	e.InitBlockRule()
 	e.InitPkgCore()
 	e.InitTag(false)
-	e.InitTag(true)
 	e.InitClearFn(false)
-	e.InitClearFn(true)
 	if e.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		e.InitCityDFA()
@@ -121,8 +116,8 @@ func com(doc map[string]interface{}) {
 		RuleBlock: e.RuleBlock,
 	}
 	e.TaskInfo.ProcessPool <- true
-	pretreated.AnalyStart(j,false,"")
-	e.ExtractProcess(j, nil,false)
+	pretreated.AnalyStart(j, false, "")
+	e.ExtractProcess(j, nil, false)
 
 	log.Println("=============块信息================")
 	for _, v := range j.Block {
@@ -152,13 +147,15 @@ func com(doc map[string]interface{}) {
 		//log.Println("Tag", v.Tag)
 	}
 	log.Println("=============抽取结果================")
+	log.Println(e.ResultArr)
 	set := (e.ResultArr[0][1]["$set"]).(map[string]interface{})
 	for k, v := range set {
-		if k == "budget" || k == "bidamount" || k == "winner" || k == "amount" || k == "projectname" || k == "projectcode" || k == "buyer" || k == "buyerperson" || k == "buyertel" || k == "agency" {
-			log.Println(k, "---", v)
-		}
+		//if k == "budget" || k == "bidamount" || k == "winner" || k == "amount" || k == "projectname" || k == "projectcode" || k == "buyer" || k == "buyerperson" || k == "buyertel" || k == "agency" {
+		log.Println(k, "---", v)
+		//}
 	}
 	log.Println("=============抽取结果 result================")
+	return
 	for k, v := range set["result"].(map[string][]*ju.ExtField) {
 		if k != "winner" {
 			continue
@@ -257,5 +254,5 @@ func winnerorder() {
 第一入围供货商:沈阳曲暖鼎盛保温安装有限公司 、总单价:11.833300
 第二入围供货商:沈阳国盛防腐保温有限公司、总单价:11.102100
 第三入围供货商:沈阳泰豪管材有限公司、总单价:13.258100`
-	log.Println((&pretreated.WinnerOrderEntity{}).Find(text, true, 1,false,""))
+	log.Println((&pretreated.WinnerOrderEntity{}).Find(text, true, 1, false, ""))
 }

+ 1 - 1
src/main_test.go

@@ -28,7 +28,7 @@ func Test_han(t *testing.T) {
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
-	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "58369e4161a0721f1583247a", "1", "mxs_v1", "mxs_v1")
+	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5d6b2aa2a5cb26b9b73e79d3", "1", "mxs_v1", "mxs_v1")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }

+ 80 - 2
src/res/fieldscore.json

@@ -200,7 +200,7 @@
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(附件|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
+                "regstr": "(附件|否决原因|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
                 "score": -10
             },
 			{
@@ -347,7 +347,85 @@
             }
         ]
     },
-    "projectcode": {
+    "winnerperson": {
+        "type": "string",
+        "positivewords": [
+            {
+                "describe": "以*结尾",
+                "regstr": ".{2,100}(工|老师|经理|女士|先生|主任|科长)$",
+                "score": 3
+            }
+        ],
+        "negativewords": [
+            {
+                "describe": "出现符号",
+                "regstr": "[*]",
+                "score": -10
+            },
+			{
+                "describe": "是数字",
+                "regstr": "^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$",
+                "score": -10
+            }
+        ],
+        "length": [
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    0,
+                    1,
+                    -5
+                ]
+            },
+			{
+                "describe": "[gt,lte,score]",
+                "range": [
+                    1,
+                    4,
+                    10
+                ]
+            }
+        ]
+    },
+	"winnertel": {
+        "type": "string",
+        "positivewords": [],
+        "negativewords": [
+            {
+                "describe": "出现中文汉字",
+                "regstr": "[\\u4e00-\\u9fa5]",
+                "score": -10
+            }
+        ],
+        "length": [
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    0,
+                    6,
+                    -5
+                ]
+            },
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    6,
+                    14,
+                    3
+                ]
+            },
+            {
+                "describe": "[gt,∞,score]",
+                "range": [
+                    14,
+                    -1,
+                    -1
+                ]
+            }
+        ]
+    },
+    
+	"projectcode": {
         "type": "string",
         "positivewords": [
             {

+ 5 - 2
src/res/specialsymbols.json

@@ -80,10 +80,13 @@
             "buyer": true,
             "winner": true,
             "agency": true,
-            "agency": true,
             "buyertel": true,
             "buyerperson": true,
-			"buyerzipcode":true
+			"buyerzipcode": true,
+			"agencytel": true,
+			"agencyperson": true,
+			"winnertel": true,
+			"winnerperson": true
         },
         "symbol": [
             ":",

+ 1 - 1
src/web/templates/admin/clear.html

@@ -102,7 +102,7 @@ menuActive("version")
 var field = {{.field}};
 var _id = "";
 //var clearArr = ["cutspace","cutallspace","cutSymbol","cutNotPrs","clearAllWord","clearMaxAmount","clearProjectName","toint","tofloat","totimestamp","tomoney","getcurrency","getrate","getPhone","rateToFloat"]; 
-var clearMap = {"中文符号转英文":"chiToEng","去除首尾空格":"cutspace","去除所有空格":"cutallspace","清理符号":"cutSymbol","清理不成对符号后面的内容":"cutNotPrs","清理全部是汉字或者特殊符号的情况":"clearAllWord","过滤大于1万亿":"clearMaxAmount","清理项目名称":"clearProjectName","转int":"toint","转float":"tofloat","转时间戳":"totimestamp","转换金额":"tomoney","获取币种":"getcurrency","获取汇率":"getrate","取手机号":"getPhone","费率转小数":"rateToFloat","处理较长采购联系人":"clearBuyerPerson"}
+var clearMap = {"中文符号转英文":"chiToEng","去除首尾空格":"cutspace","去除所有空格":"cutallspace","清理符号":"cutSymbol","清理不成对符号后面的内容":"cutNotPrs","清理全部是汉字或者特殊符号的情况":"clearAllWord","过滤大于1万亿":"clearMaxAmount","清理项目名称":"clearProjectName","转int":"toint","转float":"tofloat","转时间戳":"totimestamp","转换金额":"tomoney","获取币种":"getcurrency","获取汇率":"getrate","取手机号":"getPhone","清理数字":"clearNumber","费率转小数":"rateToFloat","处理较长采购联系人":"clearBuyerPerson"}
 $(function () {
 	ttableclear=$('#clearTable').DataTable({
 		"lengthChange": false,

+ 1 - 1
src/web/templates/admin/version.html

@@ -162,7 +162,7 @@ $(function () {
 		"columns": [
 				{ "data": "version","width":"5%"},
 			{ "data": "s_username","width":"5%"},
-			{ "data": "s_descript","width":"25%"},
+			{ "data": "s_descript","width":"21%"},
 			{ "data": "isuse","width":"7%",render:function(val,a,row){
 				tmp=""
 				if(val){

+ 3 - 2
udpfilterdup/src/config.json

@@ -5,8 +5,8 @@
         "addr": "192.168.3.207:27082",
         "pool": 15,
         "db": "extract_kf",
-        "extract": "bidding_20190910_01",
-        "extract_copy": "a_testbidding_copy",
+        "extract": "a_testbidding_new",
+        "extract_copy": "a_testbidding",
         "bidding": "bidding_126"
     },
     "jkmail": {
@@ -27,6 +27,7 @@
             "memo": "创建招标数据索引"
         }
     ],
+    "isMerger":false,
     "specialwords": "(重招|重新招标|勘察|设计|施工|监理|总承包|土石方|可研)",
     "specialtitle_1": "[0-9a-zA-Z一二三四五六七八九十零123456789](次|包|标段|标包)",
     "specialtitle_2": "项目([0-9a-zA-Z一二三四五六七八九十零123456789])",

+ 110 - 64
udpfilterdup/src/datamap.go

@@ -3,7 +3,6 @@ package main
 import (
 	"fmt"
 	"log"
-	"math"
 	qutil "qfw/util"
 	"qfw/util/mongodb"
 	"strconv"
@@ -39,13 +38,13 @@ type Info struct {
 	titleSpecialWord   bool 		//标题特殊词
 	specialWord        bool	 		//再次判断的特殊词
 	mergemap           map[string]interface{}   //合并记录
-	accurateTime       int64		//最终准确的时间
 
 
-}
 
 
 
+}
+
 var datelimit = float64(432000)  //五天
 var reason string //判重原因记录
 
@@ -85,7 +84,6 @@ func NewDatamap(days int, lastid string) *datamap {
 			continuSum++
 		} else {
 			cm := tmp["comeintime"] //时间单位?
-			//cm := tmp["publishtime"]
 			comeintime := qutil.Int64All(cm)
 			if comeintime == 0 {
 				id := qutil.BsonIdToSId(tmp["_id"])[0:8]
@@ -238,14 +236,6 @@ func NewInfo(tmp map[string]interface{}) *Info {
 	}
 
 
-	info.accurateTime = qutil.Int64All(tmp["publishtime"])
-	if info.accurateTime ==0 {
-		info.accurateTime = qutil.Int64All(tmp["comeintime"])
-		if info.accurateTime ==0{
-			info.accurateTime, _ = strconv.ParseInt(qutil.BsonIdToSId(tmp["_id"]), 16, 64)
-		}
-	}
-
 	return info
 }
 //判重方法
@@ -262,6 +252,7 @@ func (d *datamap) check(info *Info) (b bool,  source *Info,reasons string) {
 			keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
 		}
 	}
+
 L:
 	for _, k := range keys {
 		data := d.data[k]
@@ -271,39 +262,27 @@ L:
 				if v.id == info.id {//正常重复
 					return false, v,""
 				}
-				//备份  新增发布时间为空-取入库时间-在为空取id
-				if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
-					continue   //是否为5天内数据
-				}
 				//类型分组
 				if info.subtype==v.subtype {
 					//站点配置--
 					if info.site!="" {
-						dict := SiteMap[info.site].(map[string]string)
+						dict := SiteMap[info.site]
+
 						if dict!=nil{
 							//临时改变--具体值
 							if info.area=="全国" &&dict["area"]!="" {
-								info.area = dict["area"]
-								info.city = dict["city"]
+								info.area = qutil.ObjToString(dict["area"])
+								info.city = qutil.ObjToString(dict["city"])
 							}else {
 								if info.city=="" &&dict["city"]!="" {
-									info.area = dict["area"]
-									info.city = dict["city"]
+									info.area = qutil.ObjToString(dict["area"])
+									info.city = qutil.ObjToString(dict["city"])
 								}
 							}
 						}
 					}
 
-					//前置条件2个不重复  一个重复
-					if info.titleSpecialWord&&info.title!=v.title&&v.title!="" {
-						continue
-					}
-					if info.buyer != "" &&v.buyer == info.buyer {
-						//满足标题
-						if len([]rune(v.title)) >= 10 && len([]rune(info.title)) >= 10 && v.title != info.title && (info.specialWord || v.specialWord) {
-							continue
-						}
-					}
+					//前置条件1  	站点相关
 					if info.site!=""&&info.site==v.site{
 						if info.href!=""&&info.href==v.href {
 							reason = "href相同"
@@ -312,7 +291,34 @@ L:
 							reasons = reason
 							break L
 						}
+						if info.href!=""&&info.href!=v.href {
+							continue
+						}
+					}
+
+					//前置条件2  标题相关 - 有且一个关键词
+					if ((info.titleSpecialWord&&!v.titleSpecialWord)||(info.specialWord&&!v.specialWord))&&
+						info.title!=v.title&&v.title!=""&&info.title!="" {
+						continue
 					}
+
+					//前置条件3 	标题相关 - 均含有关键词
+					if ((info.titleSpecialWord&&v.titleSpecialWord)||(info.specialWord&&v.specialWord))&&
+						len([]rune(v.title))>10 && len([]rune(info.title))>10&&v.title!=""&&info.title!=""{
+						if !(strings.Contains(v.title, info.title)||strings.Contains(info.title, v.title)) {
+							continue //无包含关系
+						}
+						if strings.Contains(v.title, info.title)||strings.Contains(info.title, v.title) {
+							reason = "标题关键词且包含关系"
+							b = true
+							source = v
+							reasons = reason
+							break L
+						}
+					}
+
+
+
 					//代理机构相同-非空相等
 					if v.agency != "" && info.agency != "" && v.agency == info.agency {
 						reason = reason + "同机构-"
@@ -349,7 +355,16 @@ L:
 
 	//往预存数据 d 添加
 	if !b {
-		ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
+		ct:=int64(0)
+		if info.publishtime>0 {
+			ct = info.publishtime
+		}else {
+			ct, _ = strconv.ParseInt(info.id[:8], 16, 64)
+		}
+
+
+
+		//ct, _ := strconv.ParseInt(info.id[:8], 16, 64)
 		dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
 		k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
 		data := d.data[k]
@@ -365,6 +380,7 @@ L:
 			d.data[k] = data
 		}
 	}
+
 	return
 }
 
@@ -390,39 +406,27 @@ L:
 				if v.id == info.id {//正常重复
 					return false, v,""
 				}
-				//备份  新增发布时间为空-取入库时间-在为空取id
-				if math.Abs(qutil.Float64All(v.accurateTime-info.accurateTime)) > datelimit {
-					continue   //是否为5天内数据
-				}
 				//类型分组
 				if info.subtype==v.subtype {
 					//站点配置--
 					if info.site!="" {
-						dict := SiteMap[info.site].(map[string]string)
+						dict := SiteMap[info.site]
+
 						if dict!=nil{
 							//临时改变--具体值
 							if info.area=="全国" &&dict["area"]!="" {
-								info.area = dict["area"]
-								info.city = dict["city"]
+								info.area = qutil.ObjToString(dict["area"])
+								info.city = qutil.ObjToString(dict["city"])
 							}else {
 								if info.city=="" &&dict["city"]!="" {
-									info.area = dict["area"]
-									info.city = dict["city"]
+									info.area = qutil.ObjToString(dict["area"])
+									info.city = qutil.ObjToString(dict["city"])
 								}
 							}
 						}
 					}
 
-					//前置条件2个不重复  一个重复
-					if info.titleSpecialWord&&info.title!=v.title&&v.title!="" {
-						continue
-					}
-					if info.buyer != "" &&v.buyer == info.buyer {
-						//满足标题
-						if len([]rune(v.title)) >= 10 && len([]rune(info.title)) >= 10 && v.title != info.title && (info.specialWord || v.specialWord) {
-							continue
-						}
-					}
+					//前置条件1  	站点相关
 					if info.site!=""&&info.site==v.site{
 						if info.href!=""&&info.href==v.href {
 							reason = "href相同"
@@ -431,7 +435,34 @@ L:
 							reasons = reason
 							break L
 						}
+						if info.href!=""&&info.href!=v.href {
+							continue
+						}
 					}
+
+					//前置条件2  标题相关 - 有且一个关键词
+					if ((info.titleSpecialWord&&!v.titleSpecialWord)||(info.specialWord&&!v.specialWord))&&
+						info.title!=v.title&&v.title!=""&&info.title!="" {
+						continue
+					}
+
+					//前置条件3 	标题相关 - 均含有关键词
+					if ((info.titleSpecialWord&&v.titleSpecialWord)||(info.specialWord&&v.specialWord))&&
+						len([]rune(v.title))>10 && len([]rune(info.title))>10&&v.title!=""&&info.title!=""{
+						if !(strings.Contains(v.title, info.title)||strings.Contains(info.title, v.title)) {
+							continue //无包含关系
+						}
+						if strings.Contains(v.title, info.title)||strings.Contains(info.title, v.title) {
+							reason = "标题关键词且包含关系"
+							b = true
+							source = v
+							reasons = reason
+							break L
+						}
+					}
+
+
+
 					//代理机构相同-非空相等
 					if v.agency != "" && info.agency != "" && v.agency == info.agency {
 						reason = reason + "同机构-"
@@ -610,7 +641,17 @@ func quickHeavyMethodOne(v *Info ,info *Info) bool {
 			return false
 		}
 	}else {
-
+		//招标结果
+		if tenderRepeat_A(v,info) {
+			if tenderRepeat_C(v,info) {
+				return false
+			}else {
+				reason = reason+"---类别空-招标类"
+				return true
+			}
+		}else {
+			return false
+		}
 	}
 
 	return false
@@ -662,7 +703,17 @@ func quickHeavyMethodTwo(v *Info ,info *Info) bool {
 				return false
 			}
 		}else {
-
+			//招标结果
+			if tenderRepeat_B(v,info) {
+				if tenderRepeat_C(v,info) { //有不同
+					return false
+				}else {
+					reason = reason+"---类别空-招标类"
+					return true
+				}
+			}else{
+				return false
+			}
 		}
 	}
 
@@ -679,6 +730,10 @@ func quickHeavyMethodTwo(v *Info ,info *Info) bool {
 			return false
 		}
 	}
+
+
+
+
 	return false
 }
 
@@ -720,7 +775,7 @@ func tenderRepeat_A(v *Info ,info *Info) bool {
 	}
 
 	if (p1&&p2&&p3)||(p1&&p2&&p4)||(p1&&p2&&p9)||
-		(p1&&p2&&p10)||(p1&&p3&&p9)||(p1&&p3&&p10)||
+		(p1&&p2&&p10)||(p1&&p2&&p11)||(p1&&p3&&p9)||(p1&&p3&&p10)||
 		(p1&&p4&&p9)||(p1&&p4&&p10)||(p2&&p3&&p4)||
 		(p2&&p3&&p9)||(p2&&p3&&p10)||(p2&&p3&&p11)||
 		(p2&&p4&&p9)||(p2&&p4&&p10)||(p2&&p4&&p11)||
@@ -785,11 +840,6 @@ func tenderRepeat_C(v *Info ,info *Info) bool {
 	if v.agencyaddr!=""&&info.agencyaddr!=""&&v.agencyaddr!=info.agencyaddr {
 		return true
 	}
-	if info.specialWord||v.specialWord||info.titleSpecialWord||v.titleSpecialWord{
-		return true
-	}
-
-
 
 	return false
 }
@@ -800,7 +850,7 @@ func winningRepeat_A(v *Info ,info *Info) bool {
 	var ss string
 	p1,p2,p3,p5,p6,p11 := false,false,false,false,false,false
 	if v.projectname!=""&&v.projectname==info.projectname {
-		ss = ss+"p1(标题)-"
+		ss = ss+"p1(项目名称)-"
 		p1 = true
 	}
 	if v.buyer!=""&&v.buyer==info.buyer {
@@ -884,10 +934,6 @@ func winningRepeat_C(v *Info ,info *Info) bool {
 	}
 	//原始地址...
 
-	if info.specialWord||v.specialWord||info.titleSpecialWord||v.titleSpecialWord{
-		return true
-	}
-
 	return false
 }
 

+ 243 - 256
udpfilterdup/src/main.go

@@ -6,8 +6,8 @@ package main
 
 import (
 	"encoding/json"
+	"flag"
 	"fmt"
-	"gopkg.in/mgo.v2/bson"
 	"log"
 	mu "mfw/util"
 	"net"
@@ -25,9 +25,7 @@ var (
 	Sysconfig    map[string]interface{} //配置文件
 	mconf        map[string]interface{} //mongodb配置信息
 	mgo          *mongodb.MongodbSim    //mongodb操作对象
-	siteMgo             *mongodb.MongodbSim
-	//mgoTest          *mongodb.MongodbSim    //mongodb操作对象
-
+	//siteMgo      *mongodb.MongodbSim
 	extract      string
 	extract_copy string
 	bidding      string
@@ -36,27 +34,26 @@ var (
 	dupdays      = 5                      //初始化判重范围
 	DM           *datamap                 //
 	HM           *historymap                 //判重数据
-	lastid       = "5d767728a5cb26b9b7748868"
-	//ObjectId("5d767728a5cb26b9b7748868")
+	lastid       = ""
+	/*
+	5da3f2c5a5cb26b9b79847fc
+	*/
 	//正则筛选相关
 	FilterRegTitle = regexp.MustCompile("^_$")
 	FilterRegTitle_1 = regexp.MustCompile("^_$")
 	FilterRegTitle_2 = regexp.MustCompile("^_$")
 
-
-
-
-	SiteMap  map[string]interface{} //站点map
+	isMerger bool //是否合并
+	SiteMap  map[string]map[string]interface{} //站点map
 )
 
 func init() {
-	//flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
-	//flag.Parse()
+	flag.StringVar(&lastid, "id", "", "最后加载id") //以小于等于此id开始加载最近几天的数据
+	flag.Parse()
 	//172.17.145.163:27080
 	util.ReadConfig(&Sysconfig)
 	nextNode = util.ObjArrToMapArr(Sysconfig["nextNode"].([]interface{}))
 	mconf = Sysconfig["mongodb"].(map[string]interface{})
-
 	mgo = &mongodb.MongodbSim{
 		MongodbAddr: mconf["addr"].(string),
 		DbName:      mconf["db"].(string),
@@ -64,184 +61,43 @@ func init() {
 	}
 	extract = mconf["extract"].(string)
 	extract_copy = mconf["extract_copy"].(string)
-	//bidding = mconf["bidding"].(string)
 	mgo.InitPool()
 
 
-	//测试临时注释
+	//测试可以临时注释
 	dupdays = util.IntAllDef(Sysconfig["dupdays"], 3)
 	//加载数据
 	DM = NewDatamap(dupdays, lastid)
 	FilterRegTitle = regexp.MustCompile(util.ObjToString(Sysconfig["specialwords"]))
 	FilterRegTitle_1 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_1"]))
 	FilterRegTitle_2 = regexp.MustCompile(util.ObjToString(Sysconfig["specialtitle_2"]))
+	isMerger = Sysconfig["isMerger"].(bool)
 
-	//站点相关数据库
-	mongodb.InitMongodbPool(5, "192.168.3.207:27082", "")
-
-	siteMgo = &mongodb.MongodbSim{
-		MongodbAddr: "192.168.3.207:27082",
-		Size:        5,
-		DbName:      "zhaolongyue",
-	}
-	siteMgo.InitPool()
-
-
-	SiteMap = make(map[string]interface{},0)
 
+	//配置站点Map
+	SiteMap = make(map[string]map[string]interface{},0)
 	start := int(time.Now().Unix())
 	//站点配置
-	sess_site := siteMgo.GetMgoConn()
+	sess_site := mgo.GetMgoConn()
 	defer sess_site.Close()
 	res_site := sess_site.DB("zhaolongyue").C("site").Find(nil).Sort("_id").Iter()
 	for site_dict := make(map[string]interface{}); res_site.Next(&site_dict); {
-			data_map := map[string]string{
+			data_map := map[string]interface{}{
 				"area":util.ObjToString(site_dict["area"]),
 				"city":util.ObjToString(site_dict["city"]),
 				"district":util.ObjToString(site_dict["district"]),
+				"sitetype":util.ObjToString(site_dict["sitetype"]),
+				"level":util.ObjToString(site_dict["level"]),
 			}
-		SiteMap[site_dict["site"].(string)]= data_map
+		SiteMap[util.ObjToString(site_dict["site"])]= data_map
 	}
-	
 	fmt.Printf("用时:%d秒,%d个",int(time.Now().Unix())-start,len(SiteMap))
 
 
 }
 
-//新增一个方法 判断
-func mainTest()  {
-
-	//log.Println("1")
-	//代码copy数据
-	//sessTest :=mgoTest.GetMgoConn()
-	//defer sessTest.Close()
-	//
-	//sess := mgo.GetMgoConn()
-	//defer sess.Close()
-	//
-	////var arr []map[string]interface{}
-	//
-	//res_test := sessTest.DB("qfw").C("bidding").Find(mongodb.ObjToMQ(`{"comeintime":{"$gte": 1571025600, "$lte": 1571976000}}`, true)).Iter()
-	//res :=sess.DB("extract_kf").C("a_testbidding")
-	//5
-	//
-	//
-	//
-	//
-	//i:=0
-	//for dict := make(map[string]interface{}); res_test.Next(&dict); i++{
-	//
-	//	//插入
-	//	if i%2000==0 {
-	//		log.Println("当前:",i)
-	//	}
-	//	res.Insert(dict)
-	//	//if len(arr)>=500 {
-	//	//	arr = make([]map[string]interface{},0)
-	//	//}else {
-	//	//	arr = append(arr,dict)
-	//	//}
-	//}
-
-
-
-	sess := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sess)
-	res_copy := sess.DB("extract_kf").C(extract_copy).Find(nil).Iter()
-
-	m1 :=map[string]int{} //老版本
-	m2 :=map[string]int{} //新版本
-
-	i:=0
-	j:=0
-	for v1 := make(map[string]interface{}); res_copy.Next(&v1); i++{
-		if i%2000==0 {
-			log.Println("当前i:",i)
-		}
-		m1[(v1["_id"].(bson.ObjectId).Hex())]= util.IntAll(v1["repeat"])
-	}
-
-	sesss := mgo.GetMgoConn()
-	defer mgo.DestoryMongoConn(sesss)
-	res := sesss.DB("extract_kf").C(extract).Find(nil).Iter()
-
-
-	for v2 := make(map[string]interface{}); res.Next(&v2); j++{
-		if j%2000==0 {
-			log.Println("当前j:",j)
-		}
-		m2[(v2["_id"].(bson.ObjectId).Hex())]= util.IntAll(v2["repeat"])
-	}
-
-	fmt.Println(len(m1),len(m2))
-	n1:=0
-	n2:=0
-	n3:=0
-	n4:=0
-	n5:=0
-	n6:=0
-
-	var arr1 []string
-	var arr2 []string
-	for k,v:=range m1{
-
-		if m2[k]==1&&v==0{//0:1
-			n1++
-			arr2 = append(arr2,fmt.Sprintf("目标_id:%s",k))
-		}
-		if m2[k]==0&&v==1{ //1:0
-			n2++
-			arr1 = append(arr1,fmt.Sprintf("目标_id:%s",k))
-		}
-		if m2[k]==0&&v==0{ //0:0
-			n3++
-		}
-		if m2[k]==1&&v==1{//1:1
-			n4++
-		}
-		if m2[k]==-1&&v==0{ //0:-1
-			n5++
-		}
-		if m2[k]==-1&&v==1{//1:-1
-			n6++
-		}
-
-	}
-	//打印 1:0情况    66989;
-	mm:=0
-	for _,v:=range arr1 {
-		mm++
-		if mm%222==0 {
-			log.Println(v)
-		}
-	}
-
-	log.Println("分割线---------------")
-	log.Println("分割线---------------")
-
-
-	//打印 0:1情况  8729
-	nn:=0
-	for _,v:=range arr2 {
-		nn++
-		if nn%30==0 {
-			log.Println(v)
-		}
-	}
-
-	log.Println("V1 0:1---",n1)
-	log.Println("V1 1:0---",n2)
-	log.Println("V1 0:0---",n3)
-	log.Println("V1 1:1---",n4)
-	log.Println("V1 0:-1---",n5)
-	log.Println("V1 1:-1---",n6)
-
-}
-
-
 
 func main() {
-
 	go checkMapJob()
 
 	updport := Sysconfig["udpport"].(string)
@@ -264,12 +120,17 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
 		} else if mapInfo != nil {
 
-			//更新流程
-			go historyTask(data,mapInfo)
-
-
-			//判重流程
-			//go task(data, mapInfo)
+			taskType:= util.ObjToString(mapInfo["stype"])
+			if taskType == "historyTask" {
+				//更新流程
+				go historyTask(data,mapInfo)
+			}else if taskType == "normalTask" {
+				//判重流程
+				go task(data, mapInfo)
+			}else {
+				//其他
+				go task(data, mapInfo)
+			}
 
 			key, _ := mapInfo["key"].(string)
 			if key == "" {
@@ -300,7 +161,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 			"$lte": util.StringTOBsonId(mapInfo["lteid"].(string)),
 		},
 	}
-	it := sess.DB(mgo.DbName).C(extract).Find(&q).Iter()
+	it := sess.DB(mgo.DbName).C(extract).Find(&q).Sort("publishtime").Iter()
 	updateExtract := [][]map[string]interface{}{}
 	pool := make(chan bool, 16)
 	wg := &sync.WaitGroup{}
@@ -330,7 +191,7 @@ func task(data []byte, mapInfo map[string]interface{}) {
 					},
 					map[string]interface{}{
 						"$set": map[string]interface{}{
-							"repeat":   -1,
+							"repeat":-1,
 						},
 					},
 				})
@@ -349,59 +210,69 @@ func task(data []byte, mapInfo map[string]interface{}) {
 					var mergeArr  = []int64{} 	//更改合并数组记录
 					var newData  = &Info{}		//更换新的数据池数据
 					var id_map  = map[string]interface{}{}
-					repeat_id := ""
-
-					//合并操作--评功权重打分-合并完替换原始数据池
-					basic_bool := basicDataScore(source,info)
-					if basic_bool {
-						//已原始数据为标准-对比数据打判重标签
-						newData,mergeArr= mergeDataFields(source,info)
-						DM.replaceSourceData(newData,source.id) //替换
-						id_map["_id"]= util.StringTOBsonId(source.id)
-						repeat_id = source.id
-					}else {
-						//已对比数据为标准 ,数据池的数据打判重标签
-						newData,mergeArr= mergeDataFields(info,source)
-						DM.replaceSourceData(newData,source.id)//替换
-						id_map["_id"]= util.StringTOBsonId(info.id)
-						repeat_id = info.id
+					repeat_id := source.id
+					id_map["_id"]= util.StringTOBsonId(info.id)
+
+					if isMerger{
+						//需要合并相关操作
+						//合并操作--评功权重打分-合并完替换原始数据池
+						basic_bool := basicDataScore(source,info)
+						if basic_bool {
+							//已原始数据为标准-对比数据打判重标签
+							newData,mergeArr= mergeDataFields(source,info)
+							DM.replaceSourceData(newData,source.id) //替换
+							id_map["_id"]= util.StringTOBsonId(source.id)
+							repeat_id = source.id
+						}else {
+							//已对比数据为标准 ,数据池的数据打判重标签
+							newData,mergeArr= mergeDataFields(info,source)
+							DM.replaceSourceData(newData,source.id)//替换
+							id_map["_id"]= util.StringTOBsonId(info.id)
+							repeat_id = info.id
+						}
 					}
 
+
+
 					var update_map  = map[string]interface{}{
 						"$set": map[string]interface{}{
-							"reason":reason,
-							"repeat":"1",
+							"repeat_reason":reason,
+							"repeat":1,
 							"repeatid":repeat_id,
 						},
 					}
 
-					//合并记录
-					if len(newData.mergemap)>0 {
-						update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
-					}
+					if isMerger {
+						//合并记录
+						if len(newData.mergemap)>0 {
+							update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
+							//fmt.Println("合并长度:",len(newData.mergemap))
+						}
 
-					//更新合并后的数据
-					for _,value :=range mergeArr {
-						if value==1 {
-							update_map["$set"].(map[string]interface{})["area"] = newData.area
-							update_map["$set"].(map[string]interface{})["city"] = newData.city
-						}else if value==2 {
-							update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
-						}else if value==3 {
-							update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
-						}else if value==4 {
-							update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
-						}else if value==5 {
-							update_map["$set"].(map[string]interface{})["budget"] = newData.budget
-						}else if value==6 {
-							update_map["$set"].(map[string]interface{})["winner"] = newData.winner
-						}else if value==7 {
-							update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
-						}else if value==8 {
-							update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
-						}else {
+						//更新合并后的数据
+						for _,value :=range mergeArr {
+							if value==1 {
+								update_map["$set"].(map[string]interface{})["area"] = newData.area
+								update_map["$set"].(map[string]interface{})["city"] = newData.city
+							}else if value==2 {
+								update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
+							}else if value==3 {
+								update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
+							}else if value==4 {
+								update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
+							}else if value==5 {
+								update_map["$set"].(map[string]interface{})["budget"] = newData.budget
+							}else if value==6 {
+								update_map["$set"].(map[string]interface{})["winner"] = newData.winner
+							}else if value==7 {
+								update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
+							}else if value==8 {
+								update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
+							}else {
 
+							}
 						}
+
 					}
 
 					//构建数据库更新用到的
@@ -552,7 +423,7 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 							map[string]interface{}{
 								"$set": map[string]interface{}{
 									"repeat":   0,
-									"repeatid": "-1",
+									"repeatid": -2,
 								},
 							},
 						})
@@ -566,60 +437,71 @@ func historyTask(data []byte, mapInfo map[string]interface{}) {
 						var mergeArr  = []int64{} 	//更改合并数组记录
 						var newData  = &Info{}		//更换新的数据池数据
 						var id_map  = map[string]interface{}{}
-						repeat_id := ""
+						repeat_id := source.id
+						id_map["_id"]= util.StringTOBsonId(info.id)
 
-						//合并操作--评功权重打分-合并完替换原始数据池
-						basic_bool := basicDataScore(source,info)
-						if basic_bool {
-							//已原始数据为标准-对比数据打判重标签
-							newData,mergeArr= mergeDataFields(source,info)
-							DM.replaceSourceData(newData,source.id) //替换。
-							id_map["_id"]= util.StringTOBsonId(source.id)
-							repeat_id = source.id
-						}else {
-							//已对比数据为标准 ,数据池的数据打判重标签
-							newData,mergeArr= mergeDataFields(info,source)
-							DM.replaceSourceData(newData,source.id)//替换
-							id_map["_id"]= util.StringTOBsonId(info.id)
-							repeat_id = info.id
+						if isMerger{
+							//需要合并相关操作
+							//合并操作--评功权重打分-合并完替换原始数据池
+							basic_bool := basicDataScore(source,info)
+							if basic_bool {
+								//已原始数据为标准-对比数据打判重标签
+								newData,mergeArr= mergeDataFields(source,info)
+								DM.replaceSourceData(newData,source.id) //替换
+								id_map["_id"]= util.StringTOBsonId(source.id)
+								repeat_id = source.id
+							}else {
+								//已对比数据为标准 ,数据池的数据打判重标签
+								newData,mergeArr= mergeDataFields(info,source)
+								DM.replaceSourceData(newData,source.id)//替换
+								id_map["_id"]= util.StringTOBsonId(info.id)
+								repeat_id = info.id
+							}
 						}
 
+
+
 						var update_map  = map[string]interface{}{
 							"$set": map[string]interface{}{
-								"reason":reason,
-								"repeat":"1",
+								"repeat_reason":reason,
+								"repeat":1,
 								"repeatid":repeat_id,
 							},
 						}
 
-						//合并记录
-						if len(newData.mergemap)>0 {
-							update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
-						}
-
-						//更新合并后的数据
-						for _,value :=range mergeArr {
-							if value==1 {
-								update_map["$set"].(map[string]interface{})["area"] = newData.area
-								update_map["$set"].(map[string]interface{})["city"] = newData.city
-							}else if value==2 {
-								update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
-							}else if value==3 {
-								update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
-							}else if value==4 {
-								update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
-							}else if value==5 {
-								update_map["$set"].(map[string]interface{})["budget"] = newData.budget
-							}else if value==6 {
-								update_map["$set"].(map[string]interface{})["winner"] = newData.winner
-							}else if value==7 {
-								update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
-							}else if value==8 {
-								update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
-							}else {
+						if isMerger {
+							//合并记录
+							if len(newData.mergemap)>0 {
+								update_map["$set"].(map[string]interface{})["merge"] = newData.mergemap
+								//fmt.Println("合并长度:",len(newData.mergemap))
+							}
 
+							//更新合并后的数据
+							for _,value :=range mergeArr {
+								if value==1 {
+									update_map["$set"].(map[string]interface{})["area"] = newData.area
+									update_map["$set"].(map[string]interface{})["city"] = newData.city
+								}else if value==2 {
+									update_map["$set"].(map[string]interface{})["projectname"] = newData.projectname
+								}else if value==3 {
+									update_map["$set"].(map[string]interface{})["projectcode"] = newData.projectcode
+								}else if value==4 {
+									update_map["$set"].(map[string]interface{})["buyer"] = newData.buyer
+								}else if value==5 {
+									update_map["$set"].(map[string]interface{})["budget"] = newData.budget
+								}else if value==6 {
+									update_map["$set"].(map[string]interface{})["winner"] = newData.winner
+								}else if value==7 {
+									update_map["$set"].(map[string]interface{})["bidamount"] = newData.bidamount
+								}else if value==8 {
+									update_map["$set"].(map[string]interface{})["bidopentime"] = newData.bidopentime
+								}else {
+
+								}
 							}
+
 						}
+
 						//构建数据库更新用到的
 						updateExtract = append(updateExtract, []map[string]interface{}{
 							id_map,
@@ -706,6 +588,8 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 		source.area = info.area
 		source.city = info.city
 		mergeArr = append(mergeArr,1)
+
+		//fmt.Println("合并-城市")
 	}
 	//2、项目名称
 	if source.projectname==""&&info.projectname!=""{
@@ -720,6 +604,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.projectname = info.projectname
 		mergeArr = append(mergeArr,2)
+		//fmt.Println("合并-项目名称")
 	}
 	//3、项目编号
 	if source.projectcode==""&&info.projectcode!=""{
@@ -734,6 +619,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.projectcode = info.projectcode
 		mergeArr = append(mergeArr,3)
+		//fmt.Println("合并-项目标号")
 	}
 	//4、采购单位
 	if source.buyer==""&&info.buyer!=""{
@@ -748,6 +634,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.buyer = info.buyer
 		mergeArr = append(mergeArr,4)
+		//fmt.Println("合并-采购单位")
 	}
 	//5、预算
 	if source.budget==0&&info.budget!=0{
@@ -762,6 +649,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.budget = info.budget
 		mergeArr = append(mergeArr,5)
+		//fmt.Println("合并-预算")
 	}
 	//6、中标单位
 	if source.winner==""&&info.winner!=""{
@@ -776,6 +664,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.winner = info.winner
 		mergeArr = append(mergeArr,6)
+		//fmt.Println("合并-中标单位")
 	}
 	//7、中标金额
 	if source.bidamount==0&&info.bidamount!=0{
@@ -790,6 +679,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.bidamount = info.bidamount
 		mergeArr = append(mergeArr,7)
+		//fmt.Println("合并-中标金额")
 	}
 	//8、开天时间-地点
 	if source.bidopentime==0&&info.bidopentime!=0{
@@ -804,6 +694,7 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 		source.bidopentime = info.bidopentime
 		mergeArr = append(mergeArr,8)
+		//fmt.Println("合并-开标时间")
 	}
 
 	//以上合并过于简单,待进一步优化
@@ -813,6 +704,102 @@ func mergeDataFields(source *Info, info *Info) (*Info,[]int64){
 
 //权重评估
 func basicDataScore(v *Info, info *Info) bool  {
+
+	//权重评估
+	/*
+	网站优先级判定规则:
+    1、中央>省>市>县区
+    2、政府采购>公共资源>采购单位官网>招标代理公司/平台
+	*/
+
+
+	v_score,info_score :=-1,-1
+	dict_v := SiteMap[v.site]
+	dict_info := SiteMap[info.site]
+	//先判断level
+	if dict_v !=nil {
+		v_level := util.ObjToString(dict_v["level"])
+		if v_level =="中央" {
+			v_score = 4
+		}else if v_level =="省级" {
+			v_score = 3
+		}else if v_level =="市级" {
+			v_score = 2
+		}else if v_level =="县区" {
+			v_score = 1
+		}else if v_level =="" {
+		}else {
+			v_score = 0
+		}
+	}
+
+	if dict_info !=nil {
+		info_level := util.ObjToString(dict_info["level"])
+		if info_level =="中央" {
+			info_score = 4
+		}else if info_level =="省级" {
+			info_score = 3
+		}else if info_level =="市级" {
+			info_score = 2
+		}else if info_level =="县区" {
+			info_score = 1
+		}else if info_level == ""{
+
+		}else {
+			v_score = 0
+		}
+	}
+
+	if v_score>info_score{
+		return true
+	}
+	if v_score<info_score{
+		return false
+	}
+
+	//判断sitetype
+	if dict_v !=nil {
+		v_sitetype := util.ObjToString(dict_v["sitetype"])
+		if v_sitetype =="政府采购"||v_sitetype=="政府门户" {
+			v_score = 4
+		}else if v_sitetype =="公共资源" {
+			v_score = 3
+		}else if v_sitetype =="官方网站" {
+			v_score = 2
+		}else if v_sitetype =="社会公共招标平台"||v_sitetype =="企业招标平台" {
+			v_score = 1
+		}else if v_sitetype =="" {
+		}else {
+			v_score = 0
+		}
+	}
+
+	if dict_info !=nil {
+		info_sitetype := util.ObjToString(dict_info["sitetype"])
+		if info_sitetype =="政府采购"||info_sitetype=="政府门户" {
+			info_score = 4
+		}else if info_sitetype =="公共资源" {
+			info_score = 3
+		}else if info_sitetype =="官方网站" {
+			info_score = 2
+		}else if info_sitetype =="社会公共招标平台"||info_sitetype =="企业招标平台" {
+			info_score = 1
+		}else if info_sitetype =="" {
+		}else {
+			info_score = 0
+		}
+	}
+
+	if v_score>info_score{
+		return true
+	}
+	if v_score<info_score{
+		return false
+	}
+
+
+
+	//网站评估
 	m,n:=0,0
 	if v.projectname!="" {m++}
 	if v.buyer!="" {m++}

+ 6 - 13
udps/main.go

@@ -23,23 +23,16 @@ func main() {
 	//2017-06-01,2018-06-01
 	//2018-06-01,2019-02-20
 
-	/*
-	5da3f2c5a5cb26b9b79847fc
-	5db2735ba5cb26b9b7c99c6f   76万
-	*/
 
 	/*
-		9W
-	5d767728a5cb26b9b7748868
-	ObjectId("5d77c881a5cb26b9b7de209d")
+	ObjectId("5da3f2c5a5cb26b9b79847fc")
+	ObjectId("5db2735ba5cb26b9b7c99c6f")
 
-
-	//历史中间一段数据
-	ObjectId("5d771e90a5cb26b9b7be7976")
-	ObjectId("5d775be4a5cb26b9b759b5eb")
+	5da3f2c5a5cb26b9b79847fc
+	5db2735ba5cb26b9b7c99c6f
 	*/
-	flag.StringVar(&sid, "sid", "5d771e90a5cb26b9b7be7976", "开始id")
-	flag.StringVar(&eid, "eid", "5d775be4a5cb26b9b759b5eb", "结束id")
+	flag.StringVar(&sid, "sid", "", "开始id")
+	flag.StringVar(&eid, "eid", "", "结束id")
 	flag.StringVar(&startDate, "start", "", "开始日期2006-01-02")
 	flag.StringVar(&endDate, "end", "", "结束日期2006-01-02")
 	flag.StringVar(&ip, "ip", "127.0.0.1", "ip")