瀏覽代碼

项目合并

5 年之前
父節點
當前提交
c0e09d3f49
共有 66 個文件被更改,包括 11005 次插入240 次删除
  1. 0 240
      fullproject/src/fulldata.go
  2. 0 0
      fullproject/src2/city.json
  3. 0 0
      fullproject/src2/cleareids.go
  4. 0 0
      fullproject/src2/compare.go
  5. 0 0
      fullproject/src2/config.json
  6. 0 0
      fullproject/src2/datamonitor.go
  7. 0 0
      fullproject/src2/extractarea.go
  8. 126 0
      fullproject/src2/fulldata.go
  9. 0 0
      fullproject/src2/handleproject.go
  10. 0 0
      fullproject/src2/main.go
  11. 0 0
      fullproject/src2/main_test.go
  12. 0 0
      fullproject/src2/projectmeger.go
  13. 二進制
      fullproject/src2/src
  14. 0 0
      fullproject/src2/thisinfo.go
  15. 0 0
      fullproject/src2/udptaskmap.go
  16. 470 0
      fullproject/src_dev1/bak.txt
  17. 3 0
      fullproject/src_dev1/clear.go
  18. 173 0
      fullproject/src_dev1/comparePNCB.go
  19. 34 0
      fullproject/src_dev1/config.json
  20. 215 0
      fullproject/src_dev1/fulldata.go
  21. 436 0
      fullproject/src_dev1/infotool.go
  22. 74 0
      fullproject/src_dev1/main.go
  23. 719 0
      fullproject/src_dev1/merge.go
  24. 321 0
      fullproject/src_dev1/merge_select.go
  25. 16 0
      fullproject/src_dev2/config.json
  26. 291 0
      fullproject/src_dev2/init.go
  27. 65 0
      fullproject/src_dev2/load_data.go
  28. 61 0
      fullproject/src_dev2/main.go
  29. 377 0
      fullproject/src_dev2/merge.go
  30. 160 0
      fullproject/src_dev2/merge_comparepncb.go
  31. 276 0
      fullproject/src_dev2/merge_select.go
  32. 116 0
      fullproject/src_dev2/new_project.go
  33. 200 0
      fullproject/src_dev2/task.go
  34. 221 0
      fullproject/src_dev2/update_project.go
  35. 87 0
      fullproject/src_dev3/clearmem.go
  36. 16 0
      fullproject/src_dev3/config.json
  37. 290 0
      fullproject/src_dev3/init.go
  38. 65 0
      fullproject/src_dev3/load_data.go
  39. 61 0
      fullproject/src_dev3/main.go
  40. 486 0
      fullproject/src_dev3/merge.go
  41. 160 0
      fullproject/src_dev3/merge_comparepncb.go
  42. 276 0
      fullproject/src_dev3/merge_select.go
  43. 181 0
      fullproject/src_dev3/new_project.go
  44. 207 0
      fullproject/src_dev3/task.go
  45. 231 0
      fullproject/src_dev3/update_project.go
  46. 87 0
      fullproject/src_dev4/clearmem.go
  47. 16 0
      fullproject/src_dev4/config.json
  48. 301 0
      fullproject/src_dev4/init.go
  49. 65 0
      fullproject/src_dev4/load_data.go
  50. 68 0
      fullproject/src_dev4/main.go
  51. 406 0
      fullproject/src_dev4/merge.go
  52. 182 0
      fullproject/src_dev4/merge_comparepncb.go
  53. 508 0
      fullproject/src_dev4/merge_select.go
  54. 172 0
      fullproject/src_dev4/new_project.go
  55. 222 0
      fullproject/src_dev4/task.go
  56. 234 0
      fullproject/src_dev4/update_project.go
  57. 二進制
      fullproject/src_dev5.zip
  58. 14 0
      fullproject/src_dev5/config.json
  59. 368 0
      fullproject/src_dev5/init.go
  60. 70 0
      fullproject/src_dev5/load_data.go
  61. 78 0
      fullproject/src_dev5/main.go
  62. 187 0
      fullproject/src_dev5/merge_comparepnc.go
  63. 510 0
      fullproject/src_dev5/merge_select.go
  64. 711 0
      fullproject/src_dev5/project.go
  65. 387 0
      fullproject/src_dev5/task.go
  66. 5 0
      fullproject/src_dev5/test/t.go

+ 0 - 240
fullproject/src/fulldata.go

@@ -1,240 +0,0 @@
-package main
-
-import (
-	"log"
-	"strings"
-
-	"qfw/util"
-	"qfw/util/mongodb"
-
-	"qfw/util/redis"
-	"sync"
-	"time"
-)
-
-var FullCount = 0
-
-func RunFullData() {
-	defer util.Catch()
-	var wg = sync.WaitGroup{}
-	startTime := int64(1325347200) //2012-01-01
-	ps := 3
-	pool := make(chan *task, ps)
-	day := 0
-	endChan := make(chan bool, 1)
-	go func() {
-		now := time.Now().Unix()
-		bComplete := false
-		for {
-			if startTime > now || bComplete {
-				log.Println("任务结束")
-				endChan <- true
-				break
-			}
-			endTime := startTime + 86400
-			q := map[string]interface{}{
-				"publishtime": map[string]interface{}{
-					"$gt":  startTime,
-					"$lte": endTime,
-				},
-			}
-
-			//数据正序处理
-			sess := MQFW.GetMgoConn()
-			var result []map[string]interface{}
-			sess.DB(MQFW.DbName).C(extractColl).Find(q).All(&result)
-			MQFW.DestoryMongoConn(sess)
-			pool <- &task{result}
-			wg.Add(1)
-			startTime = endTime
-			day++
-			log.Println("day====", day)
-			if day > 0 && day%ps == 0 {
-				wg.Wait()
-				MQFW.Destory()
-				MQFW = mongodb.MongodbSim{
-					MongodbAddr: Sysconfig["mongodbServers"].(string),
-					Size:        2 * ps,
-					DbName:      Sysconfig["mongodbName"].(string),
-				}
-				MQFW.InitPool()
-			}
-
-		}
-	}()
-
-	for {
-		select {
-		case t := <-pool:
-			t.query()
-			t.result = nil
-			t = nil
-			wg.Done()
-		case <-endChan:
-			return
-		}
-	}
-
-}
-
-type task struct {
-	result []map[string]interface{}
-}
-
-func (t *task) query() {
-	index := 0
-	wg := &sync.WaitGroup{}
-	for _, tmp := range t.result {
-		if index%10000 == 0 {
-			log.Println(index, tmp["_id"])
-		}
-		index++
-		if util.IntAll(tmp["repeat"]) == 1 {
-			continue
-		}
-		pt := util.Int64All(tmp["publishtime"])
-		if pt > currentMegerTime {
-			currentMegerTime = pt
-		}
-		currentMegerCount++
-		if currentMegerCount > 300000 {
-			log.Println("执行清理", currentMegerTime)
-			clearPKey()
-			currentMegerCount = 0
-		}
-		wg.Add(1)
-		MultiThread <- true
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-MultiThread
-				wg.Done()
-			}()
-			thisid := util.BsonIdToSId(tmp["_id"])
-			info := PreThisInfo(tmp)
-			if info != nil {
-				lockPNCBMap(info)
-				startProjectMerge(info, tmp)
-				redis.Put(INFOID, thisid, 1, INFOTIMEOUT)
-				currentMegerTime = info.Publishtime
-				unlockPNCBMap(info)
-			}
-		}(tmp)
-	}
-	wg.Wait()
-	FullCount += index
-
-	log.Println("currentFull", FullCount)
-
-}
-
-type KeyMapPC struct {
-	Lock sync.Mutex
-	Map  map[string]*KeyMap
-}
-
-type CompareInfoPC struct {
-	Field    string        //对比属性 pn/pc/pb
-	Key      string        //存放rediskey
-	Scores   []*CompareOne //对比分值 pinfo索引对应分值
-	Bfind    bool          //是否查找到
-	IdArr    []string
-	K        *Key
-	KeyMapPC *KeyMapPC
-}
-
-func NewCompareInfoPC(field, key string, KeyMapPC *KeyMapPC) *CompareInfoPC {
-	return &CompareInfoPC{
-		Field:    field,
-		Key:      key,
-		Scores:   []*CompareOne{},
-		KeyMapPC: KeyMapPC,
-	}
-
-}
-
-var PNIdMap, PCIdMap = &KeyMapPC{Map: map[string]*KeyMap{}}, &KeyMapPC{Map: map[string]*KeyMap{}}
-
-//获取对比项目数组
-func getComeperProjects2(p PCBV, thisinfo *Info) (res []interface{}, pncb []*CompareInfo) {
-	newarr := []string{}
-	repeatId := map[string]bool{}
-	if p.PnameLen > 0 {
-		pn := NewCompareInfoPC("pn", thisinfo.PNKey, PNIdMap)
-		//对比全国和本省
-		PNIdMap.Lock.Lock()
-		km := PNIdMap.Map[thisinfo.Area]
-		if km == nil {
-			km = &KeyMap{Map: map[string]*Key{}}
-			PNIdMap.Map[thisinfo.Area] = km
-		}
-
-		PNIdMap.Lock.Unlock()
-
-		thisinfo.AllRelatePNKeyMap = map[string]*Key{}
-		pn.KeyMap.Lock.Lock()
-		for k, v := range pn.KeyMap.Map {
-			if strings.Contains(k, pn.Key) || strings.Contains(pn.Key, k) {
-				thisinfo.AllRelatePNKeyMap[k] = v
-				for _, id := range *v.Arr {
-					if !repeatId[id] {
-						newarr = append(newarr, id)
-						repeatId[id] = true
-					}
-				}
-			}
-		}
-		if thisinfo.AllRelatePNKeyMap[pn.Key] == nil {
-			K := &Key{&[]string{}, &sync.Mutex{}}
-			thisinfo.AllRelatePNKeyMap[pn.Key] = K
-			pn.KeyMap.Map[pn.Key] = K
-		}
-		pn.KeyMap.Lock.Unlock()
-	}
-	if p.PcodeLen > 0 {
-		pc := NewCompareInfo("pc", thisinfo.PCKey, PCKey)
-		pncb = append(pncb, pc)
-		thisinfo.AllRelatePCKeyMap = map[string]*Key{}
-		pc.KeyMap.Lock.Lock()
-		for k, v := range pc.KeyMap.Map {
-			if strings.Contains(k, pc.Key) || strings.Contains(pc.Key, k) {
-				thisinfo.AllRelatePCKeyMap[k] = v
-				for _, id := range *v.Arr {
-					if !repeatId[id] {
-						newarr = append(newarr, id)
-						repeatId[id] = true
-					}
-				}
-			}
-		}
-		if thisinfo.AllRelatePCKeyMap[pc.Key] == nil {
-			K := &Key{&[]string{}, &sync.Mutex{}}
-			thisinfo.AllRelatePCKeyMap[pc.Key] = K
-			pc.KeyMap.Map[pc.Key] = K
-		}
-		pc.KeyMap.Lock.Unlock()
-	}
-
-	if p.BuyerLen > 0 {
-		pb := NewCompareInfo("pb", thisinfo.PBKey, PBKey)
-		pncb = append(pncb, pb)
-		pb.KeyMap.Lock.Lock()
-		K := pb.KeyMap.Map[pb.Key]
-		if K == nil {
-			K = &Key{&[]string{}, &sync.Mutex{}}
-			pb.KeyMap.Map[pb.Key] = K
-		} else {
-			for _, id := range *K.Arr {
-				if !repeatId[id] {
-					newarr = append(newarr, id)
-					repeatId[id] = true
-				}
-			}
-		}
-		pb.KeyMap.Lock.Unlock()
-	}
-
-	if len(newarr) > 0 {
-		res = redis.Mget(REDISIDS, newarr)
-	}
-	return
-}

+ 0 - 0
fullproject/src/city.json → fullproject/src2/city.json


+ 0 - 0
fullproject/src/cleareids.go → fullproject/src2/cleareids.go


+ 0 - 0
fullproject/src/compare.go → fullproject/src2/compare.go


+ 0 - 0
fullproject/src/config.json → fullproject/src2/config.json


+ 0 - 0
fullproject/src/datamonitor.go → fullproject/src2/datamonitor.go


+ 0 - 0
fullproject/src/extractarea.go → fullproject/src2/extractarea.go


+ 126 - 0
fullproject/src2/fulldata.go

@@ -0,0 +1,126 @@
+package main
+
+import (
+	"log"
+	"qfw/util"
+	"qfw/util/mongodb"
+
+	"qfw/util/redis"
+	"sync"
+	"time"
+)
+
+var FullCount = 0
+
+func RunFullData() {
+	defer util.Catch()
+	var wg = sync.WaitGroup{}
+	startTime := int64(1325347200) //2012-01-01
+	ps := 3
+	pool := make(chan *task, ps)
+	day := 0
+	endChan := make(chan bool, 1)
+	go func() {
+		now := time.Now().Unix()
+		bComplete := false
+		for {
+			if startTime > now || bComplete {
+				log.Println("任务结束")
+				endChan <- true
+				break
+			}
+			endTime := startTime + 86400
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": endTime,
+				},
+			}
+
+			//数据正序处理
+			sess := MQFW.GetMgoConn()
+			var result []map[string]interface{}
+			sess.DB(MQFW.DbName).C(extractColl).Find(q).All(&result)
+			MQFW.DestoryMongoConn(sess)
+			pool <- &task{result}
+			wg.Add(1)
+			startTime = endTime
+			day++
+			log.Println("day====", day)
+			if day > 0 && day%ps == 0 {
+				wg.Wait()
+				MQFW.Destory()
+				MQFW = mongodb.MongodbSim{
+					MongodbAddr: Sysconfig["mongodbServers"].(string),
+					Size:        2 * ps,
+					DbName:      Sysconfig["mongodbName"].(string),
+				}
+				MQFW.InitPool()
+			}
+
+		}
+	}()
+
+	for {
+		select {
+		case t := <-pool:
+			t.query()
+			t.result = nil
+			t = nil
+			wg.Done()
+		case <-endChan:
+			return
+		}
+	}
+
+}
+
+type task struct {
+	result []map[string]interface{}
+}
+
+func (t *task) query() {
+	index := 0
+	wg := &sync.WaitGroup{}
+	for _, tmp := range t.result {
+		if index%10000 == 0 {
+			log.Println(index, tmp["_id"])
+		}
+		index++
+		if util.IntAll(tmp["repeat"]) == 1 {
+			continue
+		}
+		pt := util.Int64All(tmp["publishtime"])
+		if pt > currentMegerTime {
+			currentMegerTime = pt
+		}
+		currentMegerCount++
+		if currentMegerCount > 300000 {
+			log.Println("执行清理", currentMegerTime)
+			clearPKey()
+			currentMegerCount = 0
+		}
+		wg.Add(1)
+		MultiThread <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-MultiThread
+				wg.Done()
+			}()
+			thisid := util.BsonIdToSId(tmp["_id"])
+			info := PreThisInfo(tmp)
+			if info != nil {
+				lockPNCBMap(info)
+				startProjectMerge(info, tmp)
+				redis.Put(INFOID, thisid, 1, INFOTIMEOUT)
+				currentMegerTime = info.Publishtime
+				unlockPNCBMap(info)
+			}
+		}(tmp)
+	}
+	wg.Wait()
+	FullCount += index
+
+	log.Println("currentFull", FullCount)
+
+}

+ 0 - 0
fullproject/src/handleproject.go → fullproject/src2/handleproject.go


+ 0 - 0
fullproject/src/main.go → fullproject/src2/main.go


+ 0 - 0
fullproject/src/main_test.go → fullproject/src2/main_test.go


+ 0 - 0
fullproject/src/projectmeger.go → fullproject/src2/projectmeger.go


二進制
fullproject/src2/src


+ 0 - 0
fullproject/src/thisinfo.go → fullproject/src2/thisinfo.go


+ 0 - 0
fullproject/src/udptaskmap.go → fullproject/src2/udptaskmap.go


+ 470 - 0
fullproject/src_dev1/bak.txt

@@ -0,0 +1,470 @@
+
+	//fmt.Println("-------------------", len(compareABCD))
+	//四个元素一致 [AB][AB][AB][AB]
+	vm = []string{"A", "B"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				for m := 0; m < 2; m++ {
+					key := vm[i] + vm[j] + vm[k] + vm[m]
+					compareAB[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	fmt.Println("-------------------", len(compareAB))
+	//---至少两个一致,其他可能不存在
+	//[AB][ABD][AB][ABD]
+	//[AB][ABD][ABD][AB]
+	//[ABD][AB][ABD][AB]
+	//[ABD][AB][AB][ABD]
+	vm = []string{"A", "B"}
+	vm2 := []string{"A", "B", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 2; k++ {
+				for m := 0; m < 3; m++ {
+					key := vm[i] + vm2[j] + vm[k] + vm2[m]
+					if !compareAB[key] {
+						compareAB2D[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				for m := 0; m < 2; m++ {
+					key := vm[i] + vm2[j] + vm2[k] + vm[m]
+					if !compareAB[key] {
+						compareAB2D[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 3; k++ {
+				for m := 0; m < 2; m++ {
+					key := vm2[i] + vm[j] + vm2[k] + vm[m]
+					if !compareAB[key] {
+						compareAB2D[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				for m := 0; m < 3; m++ {
+					key := vm2[i] + vm[j] + vm[k] + vm2[m]
+					if !compareAB[key] {
+						compareAB2D[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2D))
+	//---至少一个一致,其他可能不存在
+	//[ABD][ABD][ABD][ABD] //已经删除DDDD
+	vm = []string{"A", "B", "D"}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				for m := 0; m < 3; m++ {
+					key := vm[i] + vm[j] + vm[k] + vm[m]
+					if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
+						compareABD[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABD))
+
+	//[AB][ABCD][AB][ABCD]
+	//[AB][ABCD][ABCD][AB]
+	//[ABCD][AB][ABCD][AB]
+	//[ABCD][AB][AB][ABCD]
+
+	vm = []string{"A", "B"}
+	vm2 = []string{"A", "B", "C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 2; k++ {
+				for m := 0; m < 4; m++ {
+					key := vm[i] + vm2[j] + vm[k] + vm2[m]
+					if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+						compareAB2CD[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				for m := 0; m < 2; m++ {
+					key := vm[i] + vm2[j] + vm2[k] + vm[m]
+					if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+						compareAB2CD[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 4; k++ {
+				for m := 0; m < 2; m++ {
+					key := vm2[i] + vm[j] + vm2[k] + vm[m]
+					if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+						compareAB2CD[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				for m := 0; m < 4; m++ {
+					key := vm2[i] + vm[j] + vm[k] + vm2[m]
+					if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+						compareAB2CD[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2CD))
+
+	//[ABCD][ABCD][ABCD][ABCD]  //已经删除[CD][CD][CD][CD]   //这个要重点讨论
+	vm = []string{"A", "B", "C", "D"}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				for m := 0; m < 4; m++ {
+					key := vm[i] + vm[j] + vm[k] + vm[m]
+					if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
+						compareABCD[key] = true
+						//fmt.Println(key)
+					}
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABCD))
+	
+	
+	
+	/*
+								if compareNoPass[compareStr] {
+									//[CD][CD][CD]
+									//没有通过
+								} else {
+									compareProject.score = score
+									compareProject.pos = k
+									if compareAB[compareStr] || compareAB2D[compareStr] || compareAB2CD[compareStr] { //项目名称、项目编号相等 四个相等
+										if compareBuyer != "C" { //采购单位相等或不存在
+											if compareBuyer == "D" { //不存在
+												if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+													if info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12 {
+														comRes1 = append(comRes1, compareProject)
+														BFind = true
+													} else {
+														comRes2 = append(comRes2, compareProject)
+														BFind = true
+													}
+												} else {
+													if info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12 {
+														comRes2 = append(comRes2, compareProject)
+														BFind = true
+													} else if compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D" {
+														comRes3 = append(comRes3, compareProject)
+														BFind = true
+													}
+												}
+											} else { //相等或包含
+												comRes1 = append(comRes1, compareProject)
+												BFind = true
+											}
+										} else { //采购单位不相等
+											//省市  时间  代理机构
+											if "AA" == compareCity && compareAgency != "D" && compareTime != "D" {
+												if info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12 {
+													comRes1 = append(comRes1, compareProject)
+													BFind = true
+												} else {
+													comRes2 = append(comRes2, compareProject)
+													BFind = true
+												}
+											} else {
+												if (compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D") && (info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12) {
+													comRes2 = append(comRes2, compareProject)
+													BFind = true
+												}
+											}
+										}
+										//} else if compareAB2D[compareStr] { //至少有两个[AB]*[AB]*相等
+									} else if compareABD[compareStr] { //至少有一个[ABD]相等
+										if compareBuyer != "C" { //采购单位相等或不存在
+											if compareBuyer == "D" { //不存在
+												if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+													if (info.LenPN > 19) && (info.LenPC > 12 || info.LenPTC > 12) {
+														comRes2 = append(comRes2, compareProject)
+														BFind = true
+													} else if info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12 {
+														comRes3 = append(comRes3, compareProject)
+														BFind = true
+													}
+												} else {
+													if (compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D") && (info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12) {
+														comRes3 = append(comRes3, compareProject)
+														BFind = true
+													}
+												}
+											} else { //相等或包含
+												if (info.LenPN > 19) && (info.LenPC > 12 || info.LenPTC > 12) {
+													comRes1 = append(comRes1, compareProject)
+													BFind = true
+												} else {
+													comRes2 = append(comRes2, compareProject)
+													BFind = true
+												}
+											}
+										} else { //采购单位不相等
+											//省市  时间  代理机构
+											if "AA" == compareCity && compareAgency != "D" && compareTime != "D" {
+												if (info.LenPN > 19) && (info.LenPC > 12 || info.LenPTC > 12) {
+													comRes2 = append(comRes2, compareProject)
+													BFind = true
+												} else if info.LenPN > 19 || info.LenPC > 12 || info.LenPTC > 12 {
+													comRes3 = append(comRes3, compareProject)
+													BFind = true
+												}
+											} else {
+												if (compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D") && (info.LenPN > 19) && (info.LenPC > 12 || info.LenPTC > 12) {
+													comRes3 = append(comRes3, compareProject)
+													BFind = true
+												}
+											}
+										}
+									} else if compareABCD[compareStr] { //有不相等的
+										if compareBuyer != "C" { //采购单位相等或不存在
+											if compareBuyer == "D" { //不存在
+												if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+													if (info.LenPN > 21) && (info.LenPC > 14 || info.LenPTC > 14) {
+														comRes2 = append(comRes2, compareProject)
+														BFind = true
+													} else if info.LenPN > 21 || info.LenPC > 14 || info.LenPTC > 14 {
+														comRes3 = append(comRes3, compareProject)
+														BFind = true
+													}
+												} else {
+													if (compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D") && (info.LenPN > 21 || info.LenPC > 14 || info.LenPTC > 14) {
+														comRes3 = append(comRes3, compareProject)
+														BFind = true
+													}
+												}
+											} else { //相等或包含
+												if (info.LenPN > 21) && (info.LenPC > 14 || info.LenPTC > 14) {
+													comRes1 = append(comRes1, compareProject)
+													BFind = true
+												} else {
+													comRes2 = append(comRes2, compareProject)
+													BFind = true
+												}
+											}
+										} else { //采购单位不相等
+											//省市  时间  代理机构
+											if "AA" == compareCity && compareAgency != "D" && compareTime != "D" {
+												if (info.LenPN > 24) && (info.LenPC > 16 || info.LenPTC > 16) {
+													comRes2 = append(comRes2, compareProject)
+													BFind = true
+												} else if info.LenPN > 24 || info.LenPC > 16 || info.LenPTC > 16 {
+													comRes3 = append(comRes3, compareProject)
+													BFind = true
+												}
+											} else {
+												if (compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D") && (info.LenPN > 24) && (info.LenPC > 16 || info.LenPTC > 16) {
+													comRes3 = append(comRes3, compareProject)
+													BFind = true
+												}
+											}
+										}
+									}
+								}
+
+							*/
+
+							//							if !BFind {
+							//								//解锁
+							//								IDArr[k].Lock.Unlock()
+							//	
+							
+							
+							
+							/*
+		if info.PTN != "" {
+			if info.PTN == compareProject.PTN || info.PTN == compareProject.ProjectName {
+				compareStr += "A"
+				score += 4
+				if len([]rune(info.PTN)) > 19 {
+					score += 2
+				}
+			} else if compareProject.PTN != "" {
+				if strings.Contains(info.PTN, compareProject.PTN) || strings.Contains(compareProject.PTN, info.PTN) {
+					compareStr += "B"
+					score += 3
+					if len([]rune(info.PTN)) > 19 {
+						score += 2
+					}
+				} else {
+					compareStr += "C"
+				}
+			} else if compareProject.ProjectName != "" {
+				if strings.Contains(info.PTN, compareProject.ProjectName) || strings.Contains(compareProject.ProjectName, info.PTN) {
+					compareStr += "B"
+					score += 3
+					if len([]rune(info.PTN)) > 19 {
+						score += 2
+					}
+				} else {
+					compareStr += "C"
+				}
+			} else {
+				compareStr += "D"
+			}
+		} else {
+			compareStr += "D"
+		}
+	*/
+	
+	
+	if info.ProjectCode != "" {
+		if info.ProjectCode == compareProject.ProjectCode {
+			compareStr += "A"
+			score += 4
+			if len([]rune(info.ProjectCode)) > 12 {
+				score += 2
+			}
+		} else if compareProject.ProjectCode != "" {
+			if strings.Contains(info.ProjectCode, compareProject.ProjectCode) || strings.Contains(compareProject.ProjectCode, info.ProjectCode) {
+				compareStr += "B"
+				score += 3
+				if len([]rune(info.ProjectCode)) > 12 {
+					score += 2
+				}
+			} else {
+				compareStr += "C"
+			}
+		} else {
+			compareStr += "D"
+		}
+	} else {
+		compareStr += "D"
+	}
+
+	if info.PTC != "" {
+		if info.PTC == "" { //compareProject.PTC || info.PTC == compareProject.ProjectCode {
+			compareStr += "A"
+			score += 4
+			if len([]rune(info.PTC)) > 12 {
+				score += 2
+			}
+		} else if "" != "" {
+			if strings.Contains(info.PTC, "") || strings.Contains("", info.PTC) {
+				compareStr += "B"
+				score += 3
+				if len([]rune(info.PTC)) > 12 {
+					score += 2
+				}
+			} else {
+				compareStr += "C"
+			}
+		} else if compareProject.ProjectCode != "" {
+			if strings.Contains(info.PTC, compareProject.ProjectCode) || strings.Contains(compareProject.ProjectCode, info.PTC) {
+				compareStr += "B"
+				score += 3
+				if len([]rune(info.PTC)) > 12 {
+					score += 2
+				}
+			} else {
+				compareStr += "C"
+			}
+		} else {
+			compareStr += "D"
+		}
+	} else {
+		compareStr += "D"
+	}
+	、
+	
+	
+	
+	if len(comRes1) > 0 {
+					if len(comRes1) > 1 {
+						sort.Slice(comRes1, func(i, j int) bool {
+							return comRes1[i].score > comRes1[j].score
+						})
+					}
+					UpdateProject(tmp, info, comRes1[0], 1, comStr)
+				} else if len(comRes2) > 0 {
+					if len(comRes2) > 1 {
+						sort.Slice(comRes2, func(i, j int) bool {
+							return comRes2[i].score > comRes2[j].score
+						})
+					}
+					UpdateProject(tmp, info, comRes2[0], 2, comStr)
+
+				} else if len(comRes3) > 0 {
+					if len(comRes3) > 1 {
+						sort.Slice(comRes3, func(i, j int) bool {
+							return comRes3[i].score > comRes3[j].score
+						})
+					}
+					UpdateProject(tmp, info, comRes3[0], 3, comStr)
+				} else {
+					//没有找到
+					id := NewProject(tmp, info)
+					t, _ := strconv.ParseInt(id[0:8], 16, 64)
+					AllMapLock.Lock()
+					AllIdsMap[int(t)%idsMapSize][id] = &ID{Id: id, lastTime: info.Publishtime}
+					AllMapLock.Unlock()
+					for _, m := range idsMap {
+						m.Key.Arr = append(m.Key.Arr, id)
+					}
+				}
+				
+				
+				for k, v := range str {
+		if v != "" && repeat[v] == nil {
+			index := len(v) % size
+			m := AllMap[index]
+			m.Lock.Lock()
+			ids := m.Map[v]
+			if ids == nil {
+				ids = &Key{Arr: []string{}}
+				m.Map[v] = ids
+			}
+			//所有键一样的对象加锁
+			ids.Lock.Lock()
+			m.Lock.Unlock()
+			repeat[v] = &IdsMapAndIndex{ids, k}
+		}
+	}

+ 3 - 0
fullproject/src_dev1/clear.go

@@ -0,0 +1,3 @@
+package main
+
+func clearPKey() {}

+ 173 - 0
fullproject/src_dev1/comparePNCB.go

@@ -0,0 +1,173 @@
+package main
+
+import (
+	"regexp"
+	"strings"
+)
+
+func comparePNCB(info *Info, compareProject *ProjectInfo) (compareStr string, score int) {
+	retv := 0
+	if info.ProjectName != "" {
+		pns := []string{}
+		if compareProject.ProjectName != "" {
+			pns = append(pns, compareProject.ProjectName)
+		}
+		if len(compareProject.MPN) > 0 {
+			pns = append(pns, compareProject.MPN...)
+		}
+		ifind := 0
+		for _, v := range pns {
+			if info.ProjectName == v {
+				ifind = 1
+				break
+			} else {
+				//if strings.Contains(info.ProjectName, v) || strings.Contains(v, info.ProjectName) ||
+				retv = CheckContain(info.ProjectName, v, info.Buyer)
+				//if CheckContain(info.ProjectName, v, info.Buyer)
+				if retv == 3 {
+					ifind = 1
+					break
+				} else if retv == 1 {
+					ifind = 2
+				} else if ifind == 0 {
+					ifind = 3
+				}
+			}
+		}
+		switch ifind {
+		case 0:
+			compareStr = "D"
+		case 1:
+			compareStr = "A"
+			score += 4
+			if len([]rune(info.ProjectName)) > 19 {
+				score += 2
+			}
+		case 2:
+			compareStr = "B"
+			score += 2
+			if len([]rune(info.ProjectName)) > 19 {
+				score += 1
+			}
+		case 3:
+			compareStr = "C"
+		}
+	} else {
+		compareStr = "D"
+	}
+	PNStr := compareStr
+	for _, pc := range []string{info.ProjectCode, info.PTC} {
+		if pc != "" {
+			pcs := []string{}
+			if compareProject.ProjectCode != "" {
+				pcs = append(pcs, compareProject.ProjectCode)
+			}
+			if len(compareProject.MPC) > 0 {
+				pcs = append(pcs, compareProject.MPC...)
+			}
+			ifind := 0
+			for _, v := range pcs {
+				if pc == v {
+					ifind = 1
+					break
+				} else {
+					// math.Abs(float64(len([]rune(pc))-len([]rune(v)))) < 6
+					//if !_numreg1.MatchString(pc) && !_zimureg1.MatchString(pc) && !_numreg1.MatchString(v) && !_zimureg1.MatchString(v)
+					if (PNStr == "A" || PNStr == "B" || (!_nzreg.MatchString(pc) && !_nzreg.MatchString(v))) && (strings.Contains(pc, v) || strings.Contains(v, pc)) {
+						t1 := pc
+						t2 := v
+						if len(v) > len(pc) {
+							t1 = v
+							t2 = pc
+						}
+						t3 := strings.Replace(t1, t2, "", -1)
+						t3 = _datereg.ReplaceAllString(t3, "")
+						if t3 == "" {
+							ifind = 1
+							break
+						} else if _hanreg.MatchString(t3) || _numreg1.MatchString(t3) {
+							ifind = 2
+						} else {
+							ifind = 3
+						}
+					} else if ifind == 0 {
+						ifind = 3
+					}
+				}
+			}
+			switch ifind {
+			case 0:
+				compareStr += "D"
+			case 1:
+				compareStr += "A"
+				score += 4
+				if len([]rune(pc)) > 19 {
+					score += 2
+				}
+			case 2:
+				if compareStr == "C" && retv == 2 {
+					compareStr += "C"
+				} else {
+					compareStr += "B"
+					score += 2
+					if len([]rune(pc)) > 19 {
+						score += 1
+					}
+				}
+			case 3:
+				compareStr += "C"
+			}
+
+		} else {
+			compareStr += "D"
+		}
+	}
+	return
+}
+
+var _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
+var _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
+var _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
+var _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
+var _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
+
+var replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()]|栏标价|中标候选人|招标代理)")
+var pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
+
+func CheckContain(b1, b2, infoBuyer string) (res int) {
+	b1 = replaceStr.ReplaceAllString(b1, "")
+	b2 = replaceStr.ReplaceAllString(b2, "")
+	b1 = strings.Replace(b1, infoBuyer, "", 1)
+	b2 = strings.Replace(b2, infoBuyer, "", 1)
+
+	if b1 == b2 {
+		res = 3
+		return
+	}
+	bs1 := []rune(b1)
+	bs2 := []rune(b2)
+	tmp := ""
+	for i := 0; i < len(bs1); i++ {
+		for j := 0; j < len(bs2); j++ {
+			if bs1[i] == bs2[j] {
+				tmp += string(bs1[i])
+			} else if tmp != "" {
+				b1 = strings.Replace(b1, tmp, "", -1)
+				b2 = strings.Replace(b2, tmp, "", -1)
+				tmp = ""
+			}
+		}
+	}
+	if tmp != "" {
+		b1 = strings.Replace(b1, tmp, "", -1)
+		b2 = strings.Replace(b2, tmp, "", -1)
+	}
+	if b1 == b2 {
+		res = 3
+	} else if (b1 == "" && !pStr.MatchString(b2)) || (b2 == "" && !pStr.MatchString(b1)) {
+		res = 1
+	} else {
+		res = 2
+	}
+	return
+}

+ 34 - 0
fullproject/src_dev1/config.json

@@ -0,0 +1,34 @@
+{
+    "mongodbServers": "192.168.3.207:27082",
+    "mongodbPoolSize": 10,
+    "mongodbName": "cesuo",
+    "jkmail": {
+        "to": "zhangjinkun@topnet.net.cn",
+        "api": "http://10.171.112.160:19281/_send/_mail"
+    },
+    "thread": 1,
+    "extractColl": "key1_biddingall",
+    "projectColl": "projectset_0720",
+    "lenprojectname": 18,
+    "redisPoolSize": 20,
+    "redisaddrs": "ids=192.168.3.207:1378,keys=192.168.3.207:1378,info=192.168.3.207:1378",
+    "clearedis": {
+        "open": true,
+        "clearcron": "0 10 15 ? * 4",
+        "projectcycle": 180
+    },
+    "megerfields": {
+        "projectlen": 5,
+        "projectcodelen": 8
+    },
+    "taskstock": {
+        "open": true,
+		"startTime":1451610310,
+        "startdate": "2015-11-01",
+        "endate": "2019-06-30"
+    },
+    "udpport": ":1482",
+    "nextNode": [
+
+    ]
+}

+ 215 - 0
fullproject/src_dev1/fulldata.go

@@ -0,0 +1,215 @@
+package main
+
+import (
+	"log"
+	"qfw/util"
+	//	"qfw/util/mongodb"
+
+	//	"qfw/util/redis"
+	"sync"
+	"time"
+
+	//	"gopkg.in/mgo.v2/bson"
+)
+
+var FullCount = 0
+
+func RunFullData() {
+	startTime, END := int64(0), int64(0)
+	sts, bres := MQFW.Find(extractColl, `{}`, "publishtime", `{"publishtime":1}`, true, 1, 1)
+	if bres && sts != nil && len(*sts) == 1 {
+		startTime = util.Int64All((*sts)[0]["publishtime"])
+		startTime -= 1
+		sts, bres = MQFW.Find(extractColl, `{}`, "-publishtime", `{"publishtime":1}`, true, 1, 1)
+		if bres && sts != nil && len(*sts) == 1 {
+			END = util.Int64All((*sts)[0]["publishtime"])
+		}
+		log.Println(startTime, END)
+	} else {
+		return
+	}
+	defer util.Catch()
+	var wg = sync.WaitGroup{}
+	//2012-01-01  到 2015-01-01  1420041600
+
+	findPoolSize := 4
+	pool := make(chan *task, findPoolSize)
+
+	endChan := make(chan bool, 1)
+	_ = time.Now().Unix()
+
+	//	sess := MQFW.GetMgoConn()
+	//	var result []map[string]interface{}
+	//	sess.DB(MQFW.DbName).C(extractColl).Find(map[string]interface{}{}).Sort("publishtime").All(&result)
+	//	log.Println("查询结果:", len(result))
+	//	MQFW.DestoryMongoConn(sess)
+	//	pool <- &task{result}
+	//endChan <- true
+
+	before15year := int64(1420041600) //15年之前3天查询一次
+	day := 0
+	go func() {
+		for {
+			if startTime >= END {
+				log.Println("任务结束")
+				endChan <- true
+				break
+			}
+			addDay := 1
+			if startTime < before15year {
+				addDay = 3
+			}
+			//endTime := int64(1561828196)
+			endTime := startTime + int64(20*86400)
+			day += addDay
+			log.Println("day====", day, startTime, endTime)
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": endTime,
+				},
+			}
+			//			q = bson.M{"_id": bson.M{"$in": []interface{}{
+			//				util.StringTOBsonId("5a29933f40d2d9bbe87ba510"),
+			//				util.StringTOBsonId("59cb110740d2d9bbe8a5ea89"),
+			//				util.StringTOBsonId("59dec3c640d2d9bbe8fc2067"),
+			//			}}}
+
+			//			q = bson.M{"_id": bson.M{"$in": []interface{}{
+			//				util.StringTOBsonId("58ea834ee1382322d055aba2"),
+			//				util.StringTOBsonId("5762767261a0721f1504317e"),
+			//				util.StringTOBsonId("5909acaee138233f2da53ebc"),
+			//				util.StringTOBsonId("57764ddaedbcdc49e6003b62"),
+			//				util.StringTOBsonId("590a9b2ee138233f2da964b7"),
+			//				util.StringTOBsonId("58dab09ae138233607531939"),
+			//				util.StringTOBsonId("57909adcedbcdc35c8005ab5"),
+			//				util.StringTOBsonId("57861257edbcdc1cea01478c"),
+			//				util.StringTOBsonId("57e0ac3861a0721f15324175"),
+			//				util.StringTOBsonId("58da95b8e138233607524f76"),
+			//				util.StringTOBsonId("590a88b3e138233f2da9111b"),
+			//			}}}
+			//			q = bson.M{"_id": bson.M{"$in": []interface{}{
+			//				util.StringTOBsonId("59cf6c7940d2d9bbe8c62d42"),
+			//				util.StringTOBsonId("59dedd1b40d2d9bbe8fe1382"),
+			//				util.StringTOBsonId("59dedcc140d2d9bbe8fe0f7f"),
+			//				util.StringTOBsonId("59dedd1b40d2d9bbe8fe1386"),
+			//				util.StringTOBsonId("59dedd1b40d2d9bbe8fe138d"),
+			//				util.StringTOBsonId("59dedcc140d2d9bbe8fe0f63"),
+			//				util.StringTOBsonId("59e9584340d2d9bbe84bfefd"),
+			//				util.StringTOBsonId("59e9584340d2d9bbe84bff01"),
+			//				util.StringTOBsonId("59e9584340d2d9bbe84bff08"),
+			//				util.StringTOBsonId("59e9584340d2d9bbe84bff0c"),
+			//				util.StringTOBsonId("59e9676340d2d9bbe84d4ba1"),
+			//				util.StringTOBsonId("59e9795540d2d9bbe84eacf2"),
+			//				util.StringTOBsonId("59e979af40d2d9bbe84eb0f0"),
+			//				util.StringTOBsonId("59e978fb40d2d9bbe84ea8b6"),
+			//				util.StringTOBsonId("59e9ad6740d2d9bbe851fee7"),
+			//				util.StringTOBsonId("59e9ad6740d2d9bbe851fef6"),
+			//				util.StringTOBsonId("59e9ae7640d2d9bbe8521746"),
+			//				util.StringTOBsonId("59e9aed040d2d9bbe8521fbb"),
+			//				util.StringTOBsonId("59e9aed040d2d9bbe8521feb"),
+			//				util.StringTOBsonId("59e9b88f40d2d9bbe852f222"),
+			//				util.StringTOBsonId("59efde8640d2d9bbe87677e0"),
+			//				util.StringTOBsonId("59efde8640d2d9bbe87677dc"),
+			//				util.StringTOBsonId("59f0107b40d2d9bbe87a8935"),
+			//				util.StringTOBsonId("5a026a1e40d2d9bbe8ffbbbc"),
+			//				util.StringTOBsonId("5a0269c440d2d9bbe8ffb586"),
+			//				util.StringTOBsonId("5a02840c40d2d9bbe8019c75"),
+			//				util.StringTOBsonId("5a02840c40d2d9bbe8019c80"),
+			//				util.StringTOBsonId("5a02be7640d2d9bbe8057516"),
+			//			}}}
+
+			//			startTime = 1561828197
+			//数据正序处理
+			sess := MQFW.GetMgoConn()
+			if sess == nil {
+				time.Sleep(10 * time.Second)
+				continue
+			}
+			var result []map[string]interface{}
+			sess.DB(MQFW.DbName).C(extractColl).Find(q).Sort("publishtime").All(&result)
+			startTime = endTime
+			log.Println("查询结果:", len(result))
+			if len(result) == 0 {
+				continue
+			}
+			MQFW.DestoryMongoConn(sess)
+			pool <- &task{result}
+			wg.Add(1)
+			startTime = endTime
+			if day > 0 && day%(1*findPoolSize) == 0 {
+				wg.Wait()
+				//				MQFW.Destory()
+				//				MQFW = mongodb.MongodbSim{
+				//					MongodbAddr: Sysconfig["mongodbServers"].(string),
+				//					Size:        2 * findPoolSize,
+				//					DbName:      Sysconfig["mongodbName"].(string),
+				//				}
+				//				MQFW.InitPool()
+			}
+		}
+	}()
+
+	for {
+		select {
+		case t := <-pool:
+			t.query()
+			t.result = nil
+			t = nil
+			wg.Done()
+		case <-endChan:
+			return
+		}
+	}
+
+}
+
+type task struct {
+	result []map[string]interface{}
+}
+
+func (t *task) query() {
+	index := 0
+	wg := &sync.WaitGroup{}
+
+	for _, tmp := range t.result {
+		if index%2000 == 0 {
+			log.Println(index, tmp["_id"])
+		}
+		index++
+		if util.IntAll(tmp["repeat"]) == 1 {
+			continue
+		}
+		pt := util.Int64All(tmp["publishtime"])
+		if pt > currentMegerTime {
+			currentMegerTime = pt
+		}
+		currentMegerCount++
+		if currentMegerCount > 600000 {
+			log.Println("执行清理", currentMegerTime)
+			clearPKey()
+			currentMegerCount = 0
+		}
+		wg.Add(1)
+		MultiThread <- true
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-MultiThread
+				wg.Done()
+			}()
+			info := PreThisInfo(tmp)
+			if info != nil && (info.LenPC > 3 || info.LenPN > 3 || info.LenPTC > 3) {
+				startProjectMerge(info, tmp)
+				//thisid := util.BsonIdToSId(tmp["_id"])
+				//redis.Put(INFOID, thisid, 1, INFOTIMEOUT)
+				currentMegerTime = info.Publishtime
+			}
+		}(tmp)
+		//time.Sleep(10 * time.Microsecond)
+	}
+	wg.Wait()
+	FullCount += index
+	t.result = nil
+	log.Println("currentFull", FullCount)
+
+}

+ 436 - 0
fullproject/src_dev1/infotool.go

@@ -0,0 +1,436 @@
+package main
+
+import (
+	"encoding/json"
+	//	"fmt"
+	du "jy/util"
+	qu "qfw/util"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+//抽取信息映射实体类
+type Info struct {
+	Id            string                 `json:"_id"`
+	Href          string                 `json:"href"`
+	Publishtime   int64                  `json:"publishtime"`
+	Title         string                 `json:"title"`
+	TopType       string                 `json:"toptype"`
+	SubType       string                 `json:"subtype"`
+	ProjectName   string                 `json:"projectname"`
+	ProjectCode   string                 `json:"projectcode"`
+	Buyer         string                 `json:"buyer"`
+	Buyerperson   string                 `json:"buyerperson"`
+	Buyertel      string                 `json:"buyertel"`
+	Agency        string                 `json:"agency"`
+	Area          string                 `json:"area"`
+	City          string                 `json:"city"`
+	HasPackage    bool                   `json:"haspackage"`
+	Package       map[string]interface{} `json:"package"`
+	PNum          string                 `json:"pnum"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"`
+	Winners       []string
+	dealtype      int
+
+	Buyerclass  string `json:"buyerclass"`
+	Bidopentime int64  `json:"bidopentime"`
+	District    string `json:"district"`
+	Winnerorder []string
+
+	PTC    string
+	pnbval int
+	LenPC  int
+	LenPN  int
+	LenPTC int
+}
+
+var (
+	PNKeyMap, PCKeyMap, PBKeyMap = sync.Map{}, sync.Map{}, sync.Map{}
+	pnreg                        = regexp.MustCompile("^(及编号[::])|(项目|采购|招标|中标|成交|结果|[_]|公告)$")
+	titleGetPn                   = regexp.MustCompile("^([\\[【((]?.?(资格预审|中标|招标|延期|成交|结果|合同|失败|询价|关于对?)(公告)?[\\]】))]?([::]|关于对?)?)?(.{4,70}?(采购|工程)?(项目)?)([((【]?(第?[一二三四五六七八九1-9再]次|重新|重招|公开|[预拟]).{0,3}?[))】]?)?(招标|采购|采购计划|发包|结果|变更|更正|成交|网上(竞价)?|电子化|电子反拍|询比?价|比价|竞争性(谈判|磋商)|流标|废标|邀请|合同|验收|违规|资格|预审|中标(结果)?|延期|澄清|暂停|补遗|终止|文件|标前|征求|报建|征集|论证|谈判|拟实施|中止|需求|比选|评标(过程)?及?|磋商|未入围|进口|投标|答疑|抽签|异常|质疑|答复|回复|应答|遴选|最高|拦标|推迟|开标|取消|延迟|撤销|控制价|场外|作废|候选人|采用|实施|预|不良记录|竞买|反拍|修正|调整|简称|小型)?(公告|记录|公示|预告|通知[函书]?|意见[函书]?|[函书])?([((【].*?[))】])?$")
+	titleGetPc                   = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
+	titleGetPc1                  = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
+	titleGetPc2                  = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
+	numreg                       = regexp.MustCompile("^[0-9]$")
+	numreg2                      = regexp.MustCompile("^[0-9]+$")
+	numCheckPc                   = regexp.MustCompile("^[0-9-]{1,10}$")
+	TitleReg                     = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ、\\-~至]+(子|合同|分|施工|监理|标)?[包标段][号段]?[、]?)+|((子|合同|分|施工|监理|标)?[包标段][号段]?[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ、\\-~至]+[、]?)+|(子|合同|分|施工|监理|标)?[包标段][号段]?[a-zA-Z0-9]+[\\-~-至、](子|合同|分|施工|监理|标)?[包标段][号段]?[a-zA-Z0-9]+")
+)
+
+type ProjectInfo struct {
+	Id            string                 `json:"id"`
+	Publistime    []int64                `json:"publistime"` //多条信息的发布时间、跨度
+	InfoType      [][]string             `json:"infotype"`   //多条信息内的 toptype、subtype
+	Ids           []string               `json:"ids"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"`
+	Winners       []string               `json:"winners"`
+	ProjectName   string                 `json:"projectname"`
+	ProjectCode   string                 `json:"projectcode"` //项目代码唯一(纯数字的权重低)
+	Buyer         string                 `json:"buyer"`       //采购单位唯一
+	MPN           []string               `json:"mpn"`         //合并后多余的项目名称
+	MPC           []string               `json:"mpc"`         //合并后多余的项目编号
+	Buyerperson   string                 `json:"buyerperson"`
+	Buyertel      string                 `json:"buyertel"`
+	Agency        string                 `json:"agency"`     //代理机构唯一
+	Area          string                 `json:"area"`       //地区唯一
+	City          string                 `json:"city"`       //地市
+	District      string                 `json:"district"`   //区县
+	HasPackage    bool                   `json:"haspackage"` //是否有分包
+	Package       map[string]interface{} `json:"package"`    //分包的对比对象
+
+	Buyerclass  string   `json:"buyerclass"`  //采购单位分类
+	Bidopentime int64    `json:"bidopentime"` //开标时间
+	Winnerorder []string //中标候选人
+	score       int
+	comStr      string
+}
+
+type KeyMap struct {
+	Lock sync.Mutex
+	Map  map[string]*Key
+}
+
+type Key struct {
+	Arr  []string
+	Lock sync.Mutex
+}
+type IdAndLock struct {
+	Id   string
+	Lock sync.Mutex
+}
+
+func NewKeyMap() *KeyMap {
+	return &KeyMap{
+		Map:  map[string]*Key{},
+		Lock: sync.Mutex{},
+	}
+}
+
+var size, idsMapSize = 30, 100
+var AllPNMap = make([]*KeyMap, size)  //存储 项目名称,值为id数组
+var AllPCMap = make([]*KeyMap, size)  //存储 项目编号,值为id数组
+var AllPTCMap = make([]*KeyMap, size) //存储 项目编号,值为id数组
+var AllPBMap = make([]*KeyMap, size)  //存储 采购单位,值为id数组
+
+type ID struct {
+	Id       string
+	Lock     sync.Mutex
+	lastTime int64
+	pos      int
+}
+
+//所有项目id对象,加锁,删除等用
+var AllIdsMap = make([]map[string]*ID, idsMapSize)
+var AllIdsMap2 = map[string]*ID{}
+var AllIdsMapLock = sync.Mutex{}
+
+//预定义字符串 [ABCD][ABCD][ABCD]  项目名称/编号/标题编号
+var compareNoPass = map[string]bool{}
+var compareAB = map[string]bool{}
+var compareAB2D = map[string]bool{}
+var compareABD = map[string]bool{}
+var compareAB2CD = map[string]bool{}
+var compareABCD = map[string]bool{}
+
+func init() {
+	for i := 0; i < size; i++ {
+		AllPNMap[i] = NewKeyMap()
+		AllPCMap[i] = NewKeyMap()
+		AllPTCMap[i] = NewKeyMap()
+		AllPBMap[i] = NewKeyMap()
+	}
+
+	for i := 0; i < idsMapSize; i++ {
+		AllIdsMap[i] = map[string]*ID{}
+	}
+
+	//---不能通过
+	vm := []string{"C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareNoPass[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------")
+
+	//三个元素一致 [AB][AB][AB],分值最高
+	vm = []string{"A", "B"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareAB[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB))
+	//---至少两个一致,其他可能不存在
+	//[AB][AB][ABD]
+	//[AB][ABD][AB]
+	vm = []string{"A", "B"}
+	vm2 := []string{"A", "B", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2D))
+	//---至少一个一致,其他可能不存在
+	//[ABD][ABD][ABD] //已经删除DDD
+	vm = []string{"A", "B", "D"}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
+					compareABD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABD))
+
+	//[AB][ABCD][AB]
+	//[AB][AB][ABCD]
+	vm = []string{"A", "B"}
+	vm2 = []string{"A", "B", "C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2CD))
+	//[ABECD][ABECD][ABECD]  //已经删除[CD][CD][CD]   //这个要重点讨论
+	vm = []string{"A", "B", "C", "D"}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
+					compareABCD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABCD))
+
+}
+
+//预处理信息
+func PreThisInfo(tmp map[string]interface{}) *Info {
+	bys, _ := json.Marshal(tmp)
+	var thisinfo *Info
+	json.Unmarshal(bys, &thisinfo)
+	if thisinfo == nil {
+		return nil
+	}
+	if len(thisinfo.Topscopeclass) == 0 {
+		thisinfo.Topscopeclass = []string{}
+	}
+	if len(thisinfo.Subscopeclass) == 0 {
+		thisinfo.Subscopeclass = []string{}
+	}
+	//去重
+	thisinfo.Subscopeclass = RemoveDup(thisinfo.Subscopeclass)
+
+	if len(thisinfo.Package) > 0 { //信息是否分包
+		thisinfo.HasPackage = true
+	} else if thisinfo.TopType == "结果" && TitleReg.MatchString(thisinfo.Title) {
+		//当信息类型是结果时,并且标题中包含分包字样,找到包号,用以后面比较打分
+		res := TitleReg.FindAllStringSubmatch(thisinfo.Title, -1)
+		pnum := du.PackageNumberConvert(res[0][0])
+		//du.Debug(pnum, res)
+		thisinfo.PNum = pnum
+	}
+	//	if checkInfoAlter(tmp) {
+	//		thisinfo.SubType = "变更"
+	//	}
+	//计算中标人
+	winner, _ := tmp["winner"].(string)
+	m1 := map[string]bool{}
+	winners := []string{}
+	if winner != "" {
+		m1[winner] = true
+		winners = append(winners, winner)
+	}
+	if thisinfo.HasPackage {
+		packageM, _ := tmp["package"].(bson.M)
+		for _, p := range packageM {
+			pm, _ := p.(map[string]interface{})
+			pw, _ := pm["winner"].(string)
+			if pw != "" {
+				m1[pw] = true
+				winners = append(winners, pw)
+			}
+		}
+	}
+	thisinfo.Winners = winners
+	m1 = nil
+	//中标候选人
+	winnerorder := []string{}
+	if winorders, ok := tmp["winnerorder"].([]interface{}); ok {
+		for _, wins := range winorders {
+			if win, ok := wins.(map[string]interface{}); ok {
+				entname := qu.ObjToString(win["entname"])
+				if entname != "" && len([]rune(entname)) > 6 {
+					winnerorder = append(winnerorder, entname)
+				}
+			}
+		}
+	}
+	thisinfo.Winnerorder = winnerorder
+	res := titleGetPc.FindStringSubmatch(thisinfo.Title)
+	if len(res) > 1 && len(res[1]) > 8 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+		thisinfo.PTC = res[1]
+		thisinfo.pnbval++
+	} else {
+		res = titleGetPc1.FindStringSubmatch(thisinfo.Title)
+		if len(res) > 3 && len(res[3]) > 8 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) {
+			thisinfo.PTC = res[3]
+			thisinfo.pnbval++
+		} else {
+			res = titleGetPc2.FindStringSubmatch(thisinfo.Title)
+			if len(res) > 1 && len(res[1]) > 8 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+				thisinfo.PTC = res[1]
+				thisinfo.pnbval++
+			}
+		}
+	}
+
+	if thisinfo.ProjectName != "" {
+		thisinfo.pnbval++
+	}
+
+	if thisinfo.ProjectCode != "" && !_zimureg1.MatchString(thisinfo.ProjectCode) {
+		thisinfo.pnbval++
+	} else {
+		thisinfo.ProjectCode = ""
+	}
+
+	if thisinfo.Buyer != "" {
+		thisinfo.pnbval++
+	}
+
+	thisinfo.LenPC = len([]rune(thisinfo.ProjectCode))
+	thisinfo.LenPTC = len([]rune(thisinfo.PTC))
+	thisinfo.LenPN = len([]rune(thisinfo.ProjectName))
+	return thisinfo
+}
+
+//移除数组中重复的元素
+func RemoveDup(arr []string) (newarr []string) {
+	m1 := map[string]bool{}
+	newarr = []string{}
+	for _, k := range arr {
+		if !m1[k] {
+			m1[k] = true
+			newarr = append(newarr, k)
+		}
+	}
+	return
+}
+
+//阻塞同名的pb、pc、pv并发
+func lockPNCBMap(thisinfo *Info) {
+	for { //等待其他任务完成
+		ok := true
+		if thisinfo.LenPN > 0 {
+			if _, b := PNKeyMap.Load(thisinfo.ProjectName); b {
+				ok = false
+			}
+		}
+		if thisinfo.LenPC > 0 {
+			if _, b := PCKeyMap.Load(thisinfo.ProjectCode); b {
+				ok = false
+			}
+		}
+		if thisinfo.LenPTC > 0 {
+			if _, b := PCKeyMap.Load(thisinfo.PTC); b {
+				ok = false
+			}
+		}
+		if len(thisinfo.Buyer) > 0 {
+			if _, b := PBKeyMap.Load(thisinfo.Buyer); b {
+				ok = false
+			}
+		}
+		if ok {
+			break
+		} else {
+			time.Sleep(30 * time.Millisecond)
+		}
+	}
+}
+
+//放行
+func unlockPNCBMap(thisinfo *Info) {
+	PNKeyMap.Delete(thisinfo.ProjectName)
+	PCKeyMap.Delete(thisinfo.ProjectCode)
+	PCKeyMap.Delete(thisinfo.PTC)
+	PBKeyMap.Delete(thisinfo.Buyer)
+}
+
+//判断信息是否是变更
+func checkInfoAlter(tmp map[string]interface{} /*新信息*/) bool {
+	toptype := qu.ObjToString(tmp["toptype"])
+	subtype := qu.ObjToString(tmp["subtype"])
+	title := qu.ObjToString(tmp["title"])
+	if subtype == "变更" || strings.Index(title, "变更公告") > -1 || strings.Index(title, "更正公告") > -1 {
+		//当信息类型是变更或标题中含变更时
+		if toptype == "招标" {
+			//招标的变更公告,不作处理
+		} else if toptype == "结果" {
+			subtype = "变更"
+		}
+	}
+	return subtype == "变更"
+}

+ 74 - 0
fullproject/src_dev1/main.go

@@ -0,0 +1,74 @@
+package main
+
+import (
+	"qfw/util"
+	"qfw/util/mongodb"
+	"qfw/util/redis"
+	"sync"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+const (
+	REDISIDS    = "ids"
+	REDISKEYS   = "keys"
+	INFOID      = "info"
+	INFOTIMEOUT = 86400 * 30
+)
+
+var (
+	Sysconfig                map[string]interface{}
+	MQFW                     mongodb.MongodbSim
+	extractColl, projectColl string
+	lenprojectname           int
+	MultiThread              chan bool
+	IdLock                   = &sync.Mutex{}
+	PncbMayLock              = &sync.Mutex{}
+
+	//三组lock,对应的(PNKey)key为项目名称,值对应的是此项目名称对应的项目id数组
+	PNKey, PCKey, PBKey = NewKeyMap(), NewKeyMap(), NewKeyMap()
+
+	currentMegerTime  int64 //合并项目的时间位置,用来清理几个月之前的项目
+	currentMegerCount int   //合并项目的计数,用来定时清理
+)
+
+func init() {
+
+	util.ReadConfig(&Sysconfig)
+	MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 200))
+	lenprojectname = util.IntAllDef(Sysconfig["lenprojectname"], 20) - 1
+
+	redis.InitRedisBySize(Sysconfig["redisaddrs"].(string), util.IntAllDef(Sysconfig["redisPoolSize"], 100), 30, 300)
+	MQFW = mongodb.MongodbSim{
+		MongodbAddr: Sysconfig["mongodbServers"].(string),
+		Size:        util.IntAll(Sysconfig["mongodbPoolSize"]),
+		DbName:      Sysconfig["mongodbName"].(string),
+	}
+	MQFW.InitPool()
+	extractColl = Sysconfig["extractColl"].(string)
+	projectColl = Sysconfig["projectColl"].(string)
+}
+
+func main() {
+	RunFullData()
+	time.Sleep(99999 * time.Hour)
+}
+
+func NewPushInfo(tmp map[string]interface{}) bson.M {
+	return bson.M{
+		"comeintime":  tmp["comeintime"],
+		"publishtime": tmp["publishtime"],
+		"title":       tmp["title"],
+		"toptype":     tmp["toptype"],
+		"subtype":     tmp["subtype"],
+		"infoformat":  tmp["infoformat"],
+		"infoid":      util.BsonIdToSId(tmp["_id"]),
+		"area":        tmp["area"],
+		"city":        tmp["city"],
+		"projectname": tmp["projectname"],
+		"projectcode": tmp["projectcode"],
+		"buyer":       tmp["buyer"],
+		"href":        tmp["href"],
+	}
+}

+ 719 - 0
fullproject/src_dev1/merge.go

@@ -0,0 +1,719 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"math"
+	qu "qfw/util"
+	"qfw/util/redis"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+func startProjectMerge(info *Info, tmp map[string]interface{}) {
+	//只有或没有采购单位的无法合并
+	if info.pnbval < 2 && info.Buyer != "" {
+		//extInfoTag("invalid", qu.BsonIdToSId(tmp["_id"]))
+		log.Println("invalid", info.pnbval, info.Buyer, info.Id)
+	} else {
+		bpn, bpc, bptc, bpb, pids := getCompareIds(info.ProjectName, info.ProjectCode, info.PTC, info.Buyer)
+		repeatId := map[string]bool{}
+		idArr := []string{} //项目id
+		IDArr := []*ID{}    //项目信息
+		//map--k为pn,ptn,pc,ptc,buyer值 v为Id数组和lock
+		for _, m := range pids {
+			defer m.Lock.Unlock()
+			for _, id := range m.Arr {
+				if !repeatId[id] {
+					repeatId[id] = true
+					_, _ = strconv.ParseInt(id[0:8], 16, 64)
+					AllIdsMapLock.Lock()
+					//Id := AllIdsMap[int(t)%idsMapSize][id]
+					Id := AllIdsMap2[id]
+					AllIdsMapLock.Unlock()
+					Id.Lock.Lock()
+					defer Id.Lock.Unlock()
+					idArr = append(idArr, id)
+					IDArr = append(IDArr, Id)
+				}
+			}
+		}
+
+		bFindProject := false
+		findPid := ""
+		//获取完id,进行计算
+		if len(idArr) > 0 && len(idArr) == len(IDArr) {
+			res := redis.Mget(REDISIDS, idArr)
+			//定义两组
+			comRes1 := []*ProjectInfo{}
+			comRes2 := []*ProjectInfo{}
+			comRes3 := []*ProjectInfo{}
+			if len(res) == len(idArr) {
+				for _, v := range res {
+					var cp ProjectInfo
+					comStr := ""
+					var compareProject *ProjectInfo
+					err := json.Unmarshal(v.([]byte), &cp)
+					compareProject = &cp
+					diffTime := math.Abs(float64(info.Publishtime - compareProject.Publistime[len(compareProject.Publistime)-1]))
+					if err != nil {
+						log.Println("从redis取id信息出错!!!", err)
+					} else if diffTime < 360*86400 {
+						//"A 相等 	B 被包含 	C 不相等	 	D不存在  E被包含
+						compareStr, score := comparePNCB(info, compareProject)
+						//if info.ProjectCode == "ZWJLZXQ-2016-0508" {
+						//}
+						resVal := Select(compareStr, info, compareProject)
+						//---------------------------------------
+						if resVal > 0 {
+							compareBuyer := ""
+							if info.Buyer != "" {
+								if info.Buyer == compareProject.Buyer {
+									compareBuyer += "A"
+								} else if compareProject.Buyer != "" {
+									if strings.Contains(info.Buyer, compareProject.Buyer) || strings.Contains(compareProject.Buyer, info.Buyer) {
+										compareBuyer += "B"
+									} else {
+										compareBuyer += "C"
+									}
+								} else {
+									compareBuyer += "D"
+								}
+							} else {
+								compareBuyer += "D"
+							}
+
+							//---------------------------------------
+							compareCity := ""
+							if info.Area != "全国" && info.Area != "" && info.Area == compareProject.Area {
+								compareCity += "A"
+								score += 2
+							} else if info.Area == "全国" || compareProject.Area == "全国" {
+								compareCity += "B"
+								score += 1
+							} else {
+								compareCity += "C"
+							}
+
+							if info.City != "" && info.City == compareProject.City {
+								compareCity += "A"
+								score += 2
+							} else {
+								if info.Area == "全国" || compareProject.Area == "全国" {
+									compareCity += "B"
+								} else {
+									compareCity += "C"
+								}
+							}
+							compareTime := ""
+							//diffTime := math.Abs(float64(info.Publishtime - compareProject.Publistime[len(compareProject.Publistime)-1]))
+							if diffTime < 45*86400 {
+								compareTime += "A"
+								score += 2
+							} else if diffTime < 90*86400 {
+								compareTime += "B"
+								score += 1
+							} else {
+								compareTime += "D"
+							}
+							compareAgency := ""
+							if info.Agency != "" {
+								if info.Agency == compareProject.Agency {
+									compareAgency += "A"
+									score += 2
+								} else if compareProject.Agency != "" {
+									if strings.Contains(info.Agency, compareProject.Agency) || strings.Contains(compareProject.Agency, info.Agency) {
+										compareAgency += "B"
+										score += 1
+									} else {
+										compareAgency += "C"
+									}
+								} else {
+									compareAgency += "D"
+								}
+							} else {
+								compareAgency += "D"
+							}
+
+							//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+
+							comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency
+							compareProject.comStr = comStr
+							//							log.Println(resVal, info.ProjectName, info.ProjectCode, info.Buyer, compareProject.Id, compareProject.ProjectName, compareProject.ProjectCode, compareProject.Buyer, compareProject.MPC, compareStr)
+							//							log.Println(compareBuyer, compareCity, compareAgency, compareTime)
+							switch resVal {
+							case 3:
+								if compareBuyer == "A" || compareBuyer == "B" {
+									comRes1 = append(comRes1, compareProject)
+								} else if compareBuyer == "D" {
+									if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+										comRes1 = append(comRes1, compareProject)
+
+									} else {
+										comRes2 = append(comRes2, compareProject)
+
+									}
+								} else {
+									if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+										comRes2 = append(comRes2, compareProject)
+
+									} else if compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D" {
+										comRes3 = append(comRes3, compareProject)
+									}
+								}
+							case 1, 2:
+
+								if compareBuyer == "A" {
+									comRes1 = append(comRes1, compareProject)
+								} else if compareBuyer == "B" {
+									comRes2 = append(comRes2, compareProject)
+
+								} else if compareBuyer == "D" {
+									if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+										comRes2 = append(comRes2, compareProject)
+									} else {
+										comRes3 = append(comRes3, compareProject)
+									}
+								} else {
+									if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+										comRes3 = append(comRes3, compareProject)
+									}
+								}
+							}
+						}
+					}
+				}
+				//--------------------------------对比完成-----------------------
+				for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
+					if len(resN) > 0 {
+						if len(resN) > 1 {
+							sort.Slice(comRes1, func(i, j int) bool {
+								return comRes1[i].score > comRes1[j].score
+							})
+						}
+
+						bFindProject = true
+						findPid = resN[0].Id
+						for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+							if bv > -1 {
+								pids[bv].Arr = append(pids[bv].Arr, findPid)
+								if k2 == 0 {
+									if resN[0].ProjectName == "" {
+										resN[0].ProjectName = info.ProjectName
+									} else {
+										if resN[0].MPN == nil {
+											resN[0].MPN = []string{info.ProjectName}
+										} else {
+											resN[0].MPN = append(resN[0].MPN, info.ProjectName)
+										}
+									}
+
+								} else if k2 < 3 {
+									if resN[0].ProjectCode == "" {
+										resN[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
+									} else {
+										if resN[0].MPC == nil {
+											resN[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+										} else {
+											resN[0].MPC = append(resN[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+										}
+									}
+
+								} else {
+									if resN[0].Buyer == "" {
+										resN[0].Buyer = info.Buyer
+									}
+								}
+							}
+						}
+						UpdateProject(tmp, info, resN[0], kv+1, resN[0].comStr)
+						//更新AllIdsMao中的时间
+						redis.Put(REDISIDS, findPid, resN[0], 0)
+						break
+					}
+				}
+			} else {
+				log.Println("redis记录缺失!!!")
+			}
+		}
+
+		if !bFindProject {
+			//没有找到
+			id := NewProject(tmp, info)
+			_, _ = strconv.ParseInt(id[0:8], 16, 64)
+			AllIdsMapLock.Lock()
+			//AllIdsMap[int(t)%idsMapSize][id] = &ID{Id: id, lastTime: info.Publishtime}
+			AllIdsMap2[id] = &ID{Id: id, lastTime: info.Publishtime}
+			AllIdsMapLock.Unlock()
+			for _, m := range pids {
+				m.Arr = append(m.Arr, id)
+			}
+		}
+	}
+}
+
+func UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string) {
+	//log.Println(thisinfo.ProjectName, pInfo.ProjectName, comStr)
+	//	updateid := pInfo.Id
+	if BinarySearch(pInfo.Ids, thisinfo.Id) > -1 {
+		return //updateid
+	}
+	set := map[string]interface{}{}
+	res, bres := MQFW.FindById(projectColl, pInfo.Id, `{"list":0}`)
+	EqInfoUpdate(thisinfo, pInfo)
+	if bres && res != nil && *res != nil {
+		set["topscopeclass"] = pInfo.Topscopeclass
+		set["subscopeclass"] = pInfo.Subscopeclass
+		s_subscopeclass := strings.Join(pInfo.Subscopeclass, ",")
+		if len(s_subscopeclass) > 0 {
+			s_subscopeclass = "," + s_subscopeclass + ","
+		}
+		set["s_subscopeclass"] = s_subscopeclass
+		s_winner := strings.Join(pInfo.Winners, ",")
+		if len(s_winner) > 0 {
+			s_winner = "," + s_winner + ","
+		}
+		set["s_winner"] = s_winner
+		if pInfo.Buyerperson != "" && pInfo.Buyertel != "" {
+			set["buyerperson"] = pInfo.Buyerperson
+			set["buyertel"] = pInfo.Buyertel
+		}
+		if pInfo.Buyerclass != "" {
+			set["buyerclass"] = pInfo.Buyerclass
+		}
+		if pInfo.District != "" {
+			set["district"] = pInfo.District
+		}
+		if pInfo.Bidopentime > 0 {
+			set["bidopentime"] = pInfo.Bidopentime
+		}
+		if len(pInfo.Winnerorder) > 0 {
+			set["winnerorder"] = pInfo.Winnerorder
+		}
+		if thisinfo.HasPackage {
+			set["multipackage"] = 1
+		} else {
+			set["multipackage"] = 0
+		}
+
+		if pInfo.ProjectName != "" {
+			set["projectname"] = pInfo.ProjectName
+		}
+
+		if pInfo.ProjectCode != "" {
+			set["projectcode"] = pInfo.ProjectCode
+		}
+
+		if pInfo.Buyer != "" {
+			set["buyer"] = pInfo.Buyer
+		}
+
+		//预算、中标价
+		if qu.Float64All(tmp["bidamount"]) > 0 {
+			if qu.Float64All((*res)["bidamount"]) == 0 {
+				set["bidamount"] = tmp["bidamount"]
+			}
+		}
+
+		if qu.Float64All(tmp["budget"]) > 0 {
+			if qu.Float64All((*res)["budget"]) == 0 {
+				set["budget"] = tmp["budget"]
+			}
+		}
+
+		set["mpn"] = pInfo.MPN
+		set["mpc"] = pInfo.MPC
+		set["area"] = pInfo.Area
+		set["city"] = pInfo.City
+
+		//e := InitEL(util.ObjToString((*res)["extractpos"]))
+		if thisinfo.dealtype == 1 {
+			var sonpackage map[string]interface{}
+			for _, obj := range tmp["package"].(map[string]interface{}) {
+				sonpackage, _ = obj.(map[string]interface{})
+			}
+			for _, v2 := range []string{"budget", "budget_w", "winner", "winner_w", "bidstatus", "bidstatus_w"} {
+				if sonpackage[v2] != nil {
+					tmp[v2] = sonpackage[v2]
+				}
+			}
+		}
+		//e.fieldpriority(&tmp, res, &set)
+		//set["extractpos"] = e.GetVal()
+		if thisinfo.HasPackage { //多包处理
+			p1, _ := (*res)["package"].(map[string]interface{})
+			p2, _ := tmp["package"].(map[string]interface{})
+			if p2 != nil {
+				if p1 != nil {
+					for pk2, pv2 := range p2 {
+						if p1[pk2] != nil { //合并
+							item1, _ := p1[pk2].(map[string]interface{})
+							item2, _ := pv2.(map[string]interface{})
+							if item1 != nil && item2 != nil { //原始项
+								for ik1, iv1 := range item2 {
+									if item1[ik1] == nil {
+										item1[ik1] = iv1
+									}
+								}
+							}
+						} else {
+							p1[pk2] = pv2
+						}
+					}
+				} else {
+					p1 = p2
+				}
+			}
+			set["package"] = p1
+		}
+		//中标候选人合并
+
+		update := map[string]interface{}{}
+		if len(set) > 0 {
+			update["$set"] = set
+		}
+		//保留原数据吧
+		push := NewPushInfo(tmp)
+		push["compareStr"] = comStr
+		update["$push"] = map[string]interface{}{
+			"list": push,
+		}
+		if len(update) > 0 {
+			MQFW.Update(projectColl, map[string]interface{}{
+				"_id": qu.StringTOBsonId(pInfo.Id),
+			}, &update, false, false)
+		}
+	}
+	//再往redis中放 index
+	//往队列中增加时间 -------------->start
+}
+
+func EqInfoUpdate(thisinfo *Info, pInfo *ProjectInfo) {
+	var tk int
+	bf1 := false
+	for _k, tv := range pInfo.Publistime {
+		tk = _k
+		if tv > thisinfo.Publishtime {
+			bf1 = true
+			break
+		}
+	}
+	if bf1 {
+		pInfo.Publistime = append(append(pInfo.Publistime[:tk], thisinfo.Publishtime), pInfo.Publistime[tk:]...)
+		//pInfo.InfoType = append(append(pInfo.InfoType[:tk], []string{thisinfo.TopType, thisinfo.SubType}), pInfo.InfoType[tk:]...)
+		//pInfo.Ids = append(append(pInfo.Ids[:tk], thisinfo.Id), pInfo.Ids[tk:]...)
+	} else {
+		pInfo.Publistime = append(pInfo.Publistime, thisinfo.Publishtime)
+		//pInfo.InfoType = append(pInfo.InfoType, []string{thisinfo.TopType, thisinfo.SubType})
+	}
+	pInfo.Ids = append(pInfo.Ids, thisinfo.Id)
+	//增加发布时间结束----------------->end
+
+	if (pInfo.Buyer == "" && thisinfo.Buyer != "") || (len([]rune(pInfo.Buyer)) < 5 && len([]rune(thisinfo.Buyer)) > 5) {
+		pInfo.Buyer = thisinfo.Buyer
+	}
+	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
+		pInfo.Agency = thisinfo.Agency
+	}
+	if (pInfo.ProjectCode == "" && thisinfo.ProjectCode != "") || (len([]rune(pInfo.ProjectCode)) < 6 && len([]rune(thisinfo.ProjectCode)) > 6) {
+		pInfo.ProjectCode = thisinfo.ProjectCode
+	}
+
+	if pInfo.Area == "全国" && thisinfo.Area != "全国" {
+		pInfo.Area = thisinfo.Area
+		pInfo.City = thisinfo.City
+	}
+	if thisinfo.Buyerperson != "" && thisinfo.Buyertel != "" && len([]rune(thisinfo.Buyertel)) > 6 {
+		pInfo.Buyerperson = thisinfo.Buyerperson
+		pInfo.Buyertel = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		pInfo.Buyerclass = thisinfo.Buyerclass
+	}
+	if thisinfo.District != "" {
+		pInfo.District = thisinfo.District
+	}
+	if thisinfo.Bidopentime > 0 {
+		pInfo.Bidopentime = thisinfo.Bidopentime
+	}
+	if len(thisinfo.Topscopeclass) > 0 {
+		sort.Strings(pInfo.Topscopeclass)
+		for _, k := range thisinfo.Topscopeclass {
+			if BinarySearch(pInfo.Topscopeclass, k) == -1 {
+				pInfo.Topscopeclass = append(pInfo.Topscopeclass, k)
+				sort.Strings(pInfo.Topscopeclass)
+			}
+		}
+	}
+
+	if len(thisinfo.Subscopeclass) > 0 {
+		sort.Strings(pInfo.Subscopeclass)
+		for _, k := range thisinfo.Subscopeclass {
+			if BinarySearch(pInfo.Subscopeclass, k) == -1 {
+				pInfo.Subscopeclass = append(pInfo.Subscopeclass, k)
+				sort.Strings(pInfo.Subscopeclass)
+			}
+		}
+	}
+	//winner
+	if len(thisinfo.Winners) > 0 {
+		sort.Strings(pInfo.Winners)
+		for _, k := range thisinfo.Winners {
+			if BinarySearch(pInfo.Winners, k) == -1 {
+				pInfo.Winners = append(pInfo.Winners, k)
+				sort.Strings(pInfo.Winners)
+			}
+		}
+	}
+	//winnerorder
+	if len(thisinfo.Winnerorder) > 0 {
+		sort.Strings(pInfo.Winnerorder)
+		for _, k := range thisinfo.Winnerorder {
+			if BinarySearch(pInfo.Winnerorder, k) == -1 {
+				pInfo.Winnerorder = append(pInfo.Winnerorder, k)
+				sort.Strings(pInfo.Winnerorder)
+			}
+		}
+	}
+}
+
+//二分字符串查找
+func BinarySearch(s []string, k string) int {
+	sort.Strings(s)
+	lo, hi := 0, len(s)-1
+	for lo <= hi {
+		m := (lo + hi) >> 1
+		if s[m] < k {
+			lo = m + 1
+		} else if s[m] > k {
+			hi = m - 1
+		} else {
+			return m
+		}
+	}
+	return -1
+}
+
+var FIELDS = []string{"area", "city", "district", "projectname", "projectcode", "buyer", "winner", "budget", "bidamount", "bidstatus", "agency", "projectscope"}
+
+func NewProject(tmp map[string]interface{}, thisinfo *Info) string {
+
+	set := map[string]interface{}{}
+	for _, f := range FIELDS {
+		if tmp[f] != nil {
+			set[f] = tmp[f]
+		}
+	}
+	set["s_projectname"] = thisinfo.ProjectName
+	set["createtime"] = time.Now().Unix()
+	set["sourceinfoid"] = qu.BsonIdToSId(tmp["_id"])
+	set["sourceinfourl"] = tmp["href"]
+	set["topscopeclass"] = thisinfo.Topscopeclass
+	set["subscopeclass"] = thisinfo.Subscopeclass
+	if thisinfo.Buyerperson != "" {
+		set["buyerperson"] = thisinfo.Buyerperson
+	}
+	if thisinfo.Buyertel != "" {
+		set["buyertel"] = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		set["buyertel"] = thisinfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > 0 {
+		set["bidopentime"] = thisinfo.Bidopentime
+	}
+	if len(thisinfo.Winnerorder) > 0 {
+		set["winnerorder"] = thisinfo.Winnerorder
+	}
+	s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
+	set["s_subscopeclass"] = s_subscopeclass
+	s_winner := strings.Join(thisinfo.Winners, ",")
+	set["s_winner"] = s_winner
+	if tmp["package"] != nil {
+		set["package"] = tmp["package"] //没定义优先级
+	}
+	push := NewPushInfo(tmp)
+	set["list"] = []bson.M{
+		push,
+	}
+
+	id := MQFW.Save(projectColl, set)
+	p1 := NewPinfo(id, thisinfo)
+	redis.PutCKV(REDISIDS, id, p1)
+
+	return id
+}
+
+//生成存放在redis数组中的对象
+func NewPinfo(id string, thisinfo *Info) ProjectInfo {
+	p1 := ProjectInfo{
+		Publistime:    []int64{thisinfo.Publishtime},
+		InfoType:      [][]string{[]string{thisinfo.TopType, thisinfo.SubType}},
+		Id:            id,
+		Ids:           []string{thisinfo.Id},
+		Topscopeclass: thisinfo.Topscopeclass,
+		Subscopeclass: thisinfo.Subscopeclass,
+		Winners:       thisinfo.Winners,
+		ProjectName:   thisinfo.ProjectName,
+		ProjectCode:   thisinfo.ProjectCode,
+		Buyer:         thisinfo.Buyer,
+		Agency:        thisinfo.Agency,
+		Area:          thisinfo.Area,
+		City:          thisinfo.City,
+		District:      thisinfo.District,
+		MPN:           []string{},
+		MPC:           []string{},
+		HasPackage:    thisinfo.HasPackage,
+		Package:       map[string]interface{}{},
+		Buyerclass:    thisinfo.Buyerclass,
+		Bidopentime:   thisinfo.Bidopentime,
+		Winnerorder:   thisinfo.Winnerorder,
+	}
+	if thisinfo.LenPTC > 5 {
+		p1.MPC = append(p1.MPC, thisinfo.PTC)
+	}
+	for k4, _ := range thisinfo.Package {
+		p1.Package[k4] = ""
+	}
+	return p1
+}
+
+type IdsMapAndIndex struct {
+	Key   *Key
+	Index int
+}
+
+var sm = sync.Map{}
+
+var ALock = sync.Mutex{}
+
+var mapPn = map[string]*Key{}
+var mapPc = map[string]*Key{}
+var mapPb = map[string]*Key{}
+
+//var mapPnLock, mapPcLock, mapPbLock = sync.Mutex{}, sync.Mutex{}, sync.Mutex{}
+
+//从对应map中获取对比的项目id
+func getCompareIds(pn, pc, ptc, pb string) (bpn, bpc, bptc, bpb int, res []*Key) {
+	ALock.Lock()
+	defer ALock.Unlock()
+	res = []*Key{}
+	bpn, bpc, bptc, bpb = -1, -1, -1, -1
+	if pn != "" {
+		//mapPnLock.Lock()
+		ids := mapPn[pn]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPn[pn] = ids
+			bpn = 0
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+		//mapPnLock.Unlock()
+
+		//		index := len(pn) % size
+		//		m := AllPNMap[index]
+		//		m.Lock.Lock()
+		//		ids := m.Map[pn]
+		//		//log.Println("PN", ids, pn, index)
+		//		if ids == nil {
+		//			ids = &Key{Arr: []string{}}
+		//			m.Map[pn] = ids
+		//			bpn = 0
+		//		}
+		//		ids.Lock.Lock()
+		//		res = append(res, ids)
+		//		m.Lock.Unlock()
+	}
+
+	if pc != "" {
+		//mapPcLock.Lock()
+		ids := mapPc[pc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[pc] = ids
+			bpc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+		//mapPcLock.Unlock()
+		//		index := len(pc) % size
+		//		m := AllPCMap[index]
+		//		m.Lock.Lock()
+		//		ids := m.Map[pc]
+		//		//log.Println("PC", ids, pc, index)
+		//		if ids == nil {
+		//			ids = &Key{Arr: []string{}}
+		//			m.Map[pc] = ids
+		//			bpc = len(res)
+		//		}
+		//		ids.Lock.Lock()
+		//		res = append(res, ids)
+		//		m.Lock.Unlock()
+	}
+
+	if ptc != "" {
+		ids := mapPc[ptc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[ptc] = ids
+			bptc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+		//		index := len(ptc) % size
+		//		m := AllPTCMap[index]
+		//		m.Lock.Lock()
+		//		ids := m.Map[ptc]
+		//		//log.Println("PTC", ids, ptc, index)
+		//		if ids == nil {
+		//			ids = &Key{Arr: []string{}}
+		//			m.Map[ptc] = ids
+		//			bptc = len(res)
+		//		}
+		//		ids.Lock.Lock()
+		//		res = append(res, ids)
+		//		m.Lock.Unlock()
+	}
+
+	if pb != "" {
+		ids := mapPb[pb]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPb[pb] = ids
+			bpb = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+		//		index := len(pb) % size
+		//		m := AllPBMap[index]
+		//		m.Lock.Lock()
+		//		ids := m.Map[pb]
+		//		//log.Println("PB", ids, pb, index)
+		//		if ids == nil {
+		//			ids = &Key{Arr: []string{}}
+		//			m.Map[pb] = ids
+		//			bpb = len(res)
+		//		}
+		//		ids.Lock.Lock()
+		//		res = append(res, ids)
+		//		m.Lock.Unlock()
+	}
+
+	return
+}
+
+//抽取信息打标记
+func extInfoTag(sflag, id string) {
+	MQFW.UpdateById(extractColl, id,
+		map[string]interface{}{
+			"$set": map[string]interface{}{
+				"meger_sflag": sflag,
+			},
+		})
+}

+ 321 - 0
fullproject/src_dev1/merge_select.go

@@ -0,0 +1,321 @@
+package main
+
+import (
+	"math"
+	"regexp"
+)
+
+func Select(compareStr string, info *Info, compareInfo *ProjectInfo) (res int) {
+	//没有可对比的项目名称、或项目编号
+	if compareNoPass[compareStr] {
+
+	} else {
+		switch compareStr {
+		case "AAA":
+			res = 3
+		case "AAB":
+			res = 3
+		case "ABA":
+			res = 3
+		case "ABB":
+			if info.LenPTC > 6 || info.LenPC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAA":
+			if info.LenPN > 10 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAB":
+			if info.LenPN > 10 || info.LenPTC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBA":
+			if info.LenPN > 10 || info.LenPC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBB":
+			v := 0
+			if info.LenPN > 10 {
+				v++
+			}
+			if info.LenPC > 6 {
+				v++
+			}
+			if info.LenPTC > 6 {
+				v++
+			}
+			if v > 1 {
+				res = 3
+			} else if v == 1 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "AAD":
+			res = 3
+		case "ABD":
+			if info.LenPC > 12 || info.LenPN > 16 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAD":
+			if info.LenPC > 12 || info.LenPN > 16 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBD":
+			if info.LenPC > 12 && info.LenPN > 16 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "ADA":
+			res = 3
+		case "ADB":
+			if info.LenPTC > 12 || info.LenPN > 16 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BDA":
+			if info.LenPTC > 12 || info.LenPN > 16 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BDB":
+			if info.LenPTC > 12 && info.LenPN > 16 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "ADD":
+			//			if info.LenPN > 18 {
+			//				res = 3
+			//			} else
+			if info.LenPN > 14 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BDD":
+			if info.LenPN > 18 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DAA":
+			if info.LenPTC > 12 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DAB":
+			if info.LenPTC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DAD":
+			if info.LenPTC > 14 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBA":
+			if info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBB":
+			if info.LenPTC > 12 && info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBD":
+			if info.LenPC > 8 {
+				res = 1
+			}
+		case "DDA":
+			if info.LenPTC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DDB":
+			if info.LenPC > 8 {
+				res = 1
+			}
+		case "ACA":
+			if info.LenPN > 12 || info.LenPTC > 9 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ACB":
+			if info.LenPN > 14 && info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BCA":
+			if info.LenPN > 12 && info.LenPTC > 9 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BCB":
+			if info.LenPN > 12 && info.LenPTC > 9 {
+				res = 2
+			} else if info.LenPN > 16 || info.LenPTC > 12 {
+				res = 1
+			}
+		case "AAC":
+			if info.LenPN > 12 || info.LenPC > 9 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "ABC":
+			if info.LenPN > 14 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BAC":
+			if info.LenPN > 14 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BBC":
+			//			if info.LenPN > 12 && info.LenPTC > 9 {
+			//				res = 1
+			//			} else {
+			//				res = 1
+			//			}
+			res = 1
+		case "ACC":
+			//			if info.LenPC > 4 && len(compareInfo.ProjectCode) > 4 && CheckHanAndNum(info.ProjectCode) && CheckHanAndNum(compareInfo.ProjectCode) {
+			//				//未考虑MPC
+			//			} else if info.LenPN > 16 {
+			//				res = 1
+			//			}
+			//
+		case "ACD":
+			//项目编号不一致
+			if math.Abs(float64(info.LenPC-len([]rune(compareInfo.ProjectCode)))) > 3 && info.LenPN > 12 {
+				res = 1
+			}
+		case "ADC":
+			if info.LenPN > 16 {
+				res = 1
+			}
+		case "BCC":
+			//项目编号不一致
+		case "BCD":
+			//项目编号不一致
+			if info.LenPC > 4 && len(compareInfo.ProjectCode) > 4 && (!_numreg1.MatchString(info.ProjectCode) && !_numreg1.MatchString(compareInfo.ProjectCode)) {
+				//未考虑MPC
+			} else if info.LenPN > 18 && info.LenPC-len([]rune(compareInfo.ProjectCode)) != 0 {
+				res = 1
+			}
+		case "BDC":
+			if info.LenPN > 18 && info.LenPTC-len([]rune(compareInfo.ProjectCode)) != 0 {
+				res = 1
+			}
+		case "CAA":
+			if info.LenPC > 10 || info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "CAB":
+			if info.LenPC > 10 && info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "CAC":
+			if info.LenPC > 14 && math.Abs(float64(info.LenPN-len([]rune(compareInfo.ProjectName)))) > 4 {
+				res = 1
+			}
+		case "CAD":
+			if info.LenPC > 14 && math.Abs(float64(info.LenPN-len([]rune(compareInfo.ProjectName)))) > 4 {
+				res = 1
+			}
+		case "CBA":
+			if info.LenPC > 10 && info.LenPTC > 10 {
+				res = 1
+			}
+		case "CBB":
+			if info.LenPC > 10 && info.LenPTC > 10 {
+				res = 1
+			}
+		case "CBC":
+			if info.LenPC > 14 || (info.LenPC > 9 && !numCheckPc.MatchString(info.ProjectCode)) {
+				res = 1
+			}
+		case "CBD":
+			if info.LenPC > 14 || (info.LenPC > 10 && !numCheckPc.MatchString(info.ProjectCode)) {
+				res = 1
+			}
+		case "CCA":
+			if (info.LenPTC > 12 && !numCheckPc.MatchString(info.PTC)) || CheckHanAndNum(info.PTC) {
+				res = 1
+			}
+		case "CCB":
+			//
+		case "CDA":
+			if info.LenPTC > 10 || (info.LenPTC > 8 && !numCheckPc.MatchString(info.PTC)) || CheckHanAndNum(info.PTC) {
+				res = 1
+			}
+		case "CDB":
+			if info.LenPTC > 15 || (info.LenPTC > 12 && !numCheckPc.MatchString(info.PTC)) {
+				res = 1
+			}
+		case "DAC":
+			if info.LenPC > 12 || (info.LenPC > 10 && !numCheckPc.MatchString(info.ProjectCode)) || CheckHanAndNum(info.ProjectCode) {
+				res = 1
+			}
+		case "DBC":
+			if info.LenPC > 15 || (info.LenPC > 12 && !numCheckPc.MatchString(info.ProjectCode)) {
+				res = 1
+			}
+		case "DCA":
+			if info.LenPTC > 14 || (info.LenPTC > 10 && !numCheckPc.MatchString(info.PTC)) || CheckHanAndNum(info.PTC) {
+				res = 1
+			}
+		case "DCB":
+			//
+			if !CheckHanAndNum(info.PTC) && !numCheckPc.MatchString(info.PTC) && info.LenPTC > 10 {
+				res = 1
+			}
+		}
+
+	}
+	return
+}
+
+var nreg1 = regexp.MustCompile("[0-9]{2,}")
+var zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
+var hreg1 = regexp.MustCompile(`[\p{Han}]+`)
+
+func CheckHanAndNum(str string) (b bool) {
+	return nreg1.MatchString(str) && hreg1.MatchString(str)
+}
+func CheckZimuAndNum(str string) (b bool) {
+	return zreg1.MatchString(str) && nreg1.MatchString(str)
+}

+ 16 - 0
fullproject/src_dev2/config.json

@@ -0,0 +1,16 @@
+{
+    "thread": 1,
+    "mongodbServers": "192.168.3.207:27082",
+    "mongodbPoolSize": 10,
+    "mongodbName": "cesuo",
+    "extractColl": "key1_biddingall",
+    "projectColl": "projectset_0809",
+    "redisaddrs": "ids=192.168.3.207:1378,keys=192.168.3.207:1378,info=192.168.3.207:1378",
+    "redisPoolSize": 20,
+    "jkmail": {
+        "to": "zhangjinkun@topnet.net.cn",
+        "api": "http://10.171.112.160:19281/_send/_mail"
+    },
+    "udpport": ":1482",
+    "nextNode": []
+}

+ 291 - 0
fullproject/src_dev2/init.go

@@ -0,0 +1,291 @@
+package main
+
+import (
+	"log"
+	mu "mfw/util"
+	"qfw/util"
+	"qfw/util/mongodb"
+	"qfw/util/redis"
+	"regexp"
+	"sync"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+const (
+	ProjectCache = "info" //存放每条项目信息,key为项目ID
+)
+
+var (
+	Sysconfig                map[string]interface{} //读取配置文件
+	MongoTool                mongodb.MongodbSim     //mongodb连接
+	ExtractColl, ProjectColl string                 //抽取表、项目表
+	CurrentMegerTime         int64                  //当前合并到的信息的时间
+	CurrentMegerCount        int                    //当前合并计数
+	MultiThread              = make(chan bool, 5)   //项目合并线程
+	AllIdsMap2               = map[string]*ID{}
+	AllIdsMapLock            = sync.Mutex{}
+)
+
+var (
+	_datereg   = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
+	_numreg1   = regexp.MustCompile("^[0-9-]{1,8}$")
+	_zimureg1  = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
+	_nzreg     = regexp.MustCompile("^[0-9a-zA-Z-]+$")
+	_hanreg    = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
+	replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
+	pStr       = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
+	nreg1      = regexp.MustCompile("[0-9]{2,}")
+	zreg1      = regexp.MustCompile("[a-zA-Z]{1,}")
+	hreg1      = regexp.MustCompile(`[\p{Han}]+`)
+	numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
+
+	//存放项目名称
+	mapPn = map[string]*Key{}
+	//存放项目编号
+	mapPc = map[string]*Key{}
+	//存放采购单位
+	mapPb         = map[string]*Key{}
+	compareNoPass = map[string]bool{}
+	compareAB     = map[string]bool{}
+	compareAB2D   = map[string]bool{}
+	compareABD    = map[string]bool{}
+	compareAB2CD  = map[string]bool{}
+	compareABCD   = map[string]bool{}
+)
+
+func init() {
+	util.ReadConfig(&Sysconfig)
+	MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
+	redis.InitRedisBySize(Sysconfig["redisaddrs"].(string), util.IntAllDef(Sysconfig["redisPoolSize"], 60), 10, 300)
+	MongoTool = mongodb.MongodbSim{
+		MongodbAddr: Sysconfig["mongodbServers"].(string),
+		Size:        util.IntAll(Sysconfig["mongodbPoolSize"]),
+		DbName:      Sysconfig["mongodbName"].(string),
+	}
+	MongoTool.InitPool()
+	ExtractColl = Sysconfig["extractColl"].(string)
+	ProjectColl = Sysconfig["projectColl"].(string)
+
+	udpport, _ := Sysconfig["udpport"].(string)
+	udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	log.Println("Udp服务监听", udpport)
+
+	//---不能通过
+	vm := []string{"C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareNoPass[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------")
+
+	//三个元素一致 [AB][AB][AB],分值最高
+	vm = []string{"A", "B"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareAB[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB))
+	//---至少两个一致,其他可能不存在
+	//[AB][AB][ABD]
+	//[AB][ABD][AB]
+	vm = []string{"A", "B"}
+	vm2 := []string{"A", "B", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2D))
+	//---至少一个一致,其他可能不存在
+	//[ABD][ABD][ABD] //已经删除DDD
+	vm = []string{"A", "B", "D"}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
+					compareABD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABD))
+
+	//[AB][ABCD][AB]
+	//[AB][AB][ABCD]
+	vm = []string{"A", "B"}
+	vm2 = []string{"A", "B", "C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2CD))
+	//[ABECD][ABECD][ABECD]  //已经删除[CD][CD][CD]   //这个要重点讨论
+	vm = []string{"A", "B", "C", "D"}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
+					compareABCD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+}
+
+func CheckHanAndNum(str string) (b bool) {
+	return nreg1.MatchString(str) && hreg1.MatchString(str)
+}
+func CheckZimuAndNum(str string) (b bool) {
+	return zreg1.MatchString(str) && nreg1.MatchString(str)
+}
+
+type KeyMap struct {
+	Lock sync.Mutex
+	Map  map[string]*Key
+}
+
+type ID struct {
+	Id       string
+	Lock     sync.Mutex
+	lastTime int64
+	pos      int
+}
+type Key struct {
+	Arr  []string
+	Lock sync.Mutex
+}
+type IdAndLock struct {
+	Id   string
+	Lock sync.Mutex
+}
+
+func NewKeyMap() *KeyMap {
+	return &KeyMap{
+		Map:  map[string]*Key{},
+		Lock: sync.Mutex{},
+	}
+}
+
+//招标信息实体类
+type Info struct {
+	Id            string                 `json:"_id"`
+	Href          string                 `json:"href"` //源地址
+	Publishtime   int64                  `json:"publishtime"`
+	Title         string                 `json:"title"`
+	TopType       string                 `json:"toptype"`
+	SubType       string                 `json:"subtype"`
+	ProjectName   string                 `json:"projectname"`
+	ProjectCode   string                 `json:"projectcode"`
+	Buyer         string                 `json:"buyer"`
+	Buyerperson   string                 `json:"buyerperson"`
+	Buyertel      string                 `json:"buyertel"`
+	Agency        string                 `json:"agency"`
+	Area          string                 `json:"area"`
+	City          string                 `json:"city"`
+	District      string                 `json:"district"`
+	HasPackage    bool                   `json:"haspackage"`
+	Package       map[string]interface{} `json:"package"`
+	PNum          string                 `json:"pnum"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"`
+	Buyerclass    string                 `json:"buyerclass"`
+	Bidopentime   int64                  `json:"bidopentime"`
+	budget        float64                `json:"budget"`
+	bidamount     float64                `json:"bidamount"`
+	Winners       []string
+	dealtype      int
+
+	Winnerorder []string
+
+	PTC    string //从标题中抽的项目编号
+	pnbval int    //项目名称、编号、采购单位存在的个数
+	LenPC  int    //项目编号长度
+	LenPN  int    //项目名称长度
+	LenPTC int    //标题抽的项目编号长度
+}
+
+//项目实体类
+type ProjectInfo struct {
+	Id            bson.ObjectId          `bson:"_id"`
+	FirstTime     int64                  `json:"firsttime"` //项目的最早时间
+	LastTime      int64                  `json:"lasttime"`  //项目的最后时间
+	Ids           []string               `json:"ids"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"` //子行业分类
+	Winners       []string               `json:"winners"`       //中标人
+	ProjectName   string                 `json:"projectname"`   //项目名称
+	ProjectCode   string                 `json:"projectcode"`   //项目代码唯一(纯数字的权重低)
+	Buyer         string                 `json:"buyer"`         //采购单位唯一
+	MPN           []string               `json:"mpn"`           //合并后多余的项目名称
+	MPC           []string               `json:"mpc"`           //合并后多余的项目编号
+	Buyerperson   string                 `json:"buyerperson"`   //采购联系人
+	Buyertel      string                 `json:"buyertel"`      //采购联系人电话
+	Agency        string                 `json:"agency"`        //代理机构
+	Area          string                 `json:"area"`          //地区
+	City          string                 `json:"city"`          //地市
+	District      string                 `json:"district"`      //区县
+	HasPackage    bool                   `json:"haspackage"`    //是否有分包
+	Package       map[string]interface{} `json:"package"`       //分包的对比对象
+	Buyerclass    string                 `json:"buyerclass"`    //采购单位分类
+	Bidopentime   int64                  `json:"bidopentime"`   //开标时间
+	Zbtime        int64                  `json:"zbtime"`        //招标时间
+	Jgtime        int64                  `json:"jgtime"`        //结果中标时间
+	Bidamount     float64                `json:"bidamount"`     //中标金额
+	Budget        float64                `json:"budget"`        //预算
+	Winnerorder   []string               `json:"winnerorder"`   //中标候选人
+	score         int
+	comStr        string
+}

+ 65 - 0
fullproject/src_dev2/load_data.go

@@ -0,0 +1,65 @@
+package main
+
+import (
+	"time"
+	//"encoding/json"
+	"log"
+)
+
+//初始加载数据,默认加载最近6个月的数据
+
+func loadData(projectColl string, month int, bCacheRedis bool) {
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+
+	q := map[string]interface{}{}
+	it := sess.DB(MongoTool.DbName).C(projectColl).Find(&q).Iter()
+	AllIdsMapLock.Lock()
+	tmp := &ProjectInfo{}
+	n := 0
+	for it.Next(tmp) {
+		n++
+		if n%1000 == 0 {
+			log.Println("current", n, "\n", tmp.Id, tmp)
+			time.Sleep(2 * time.Second)
+		}
+		for _, v := range append([]string{tmp.ProjectName}, tmp.MPN...) {
+			if v != "" {
+				k := mapPn[v]
+				if k == nil {
+					k = &Key{Arr: []string{}}
+					mapPn[v] = k
+				}
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		for _, v := range append([]string{tmp.ProjectCode}, tmp.MPC...) {
+			if v != "" {
+				k := mapPc[v]
+				if k == nil {
+					k = &Key{Arr: []string{}}
+					mapPc[v] = k
+				}
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		if tmp.Buyer != "" {
+			k := mapPb[tmp.Buyer]
+			if k == nil {
+				k = &Key{Arr: []string{}}
+				mapPb[tmp.Buyer] = k
+			}
+			k.Arr = append(k.Arr, tmp.Id.Hex())
+		}
+
+		AllIdsMap2[tmp.Id.Hex()] = &ID{Id: tmp.Id.Hex(), lastTime: tmp.LastTime}
+
+		if bCacheRedis {
+			//存入redis
+
+		}
+
+	}
+	AllIdsMapLock.Unlock()
+	log.Println("load over")
+}

+ 61 - 0
fullproject/src_dev2/main.go

@@ -0,0 +1,61 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	mu "mfw/util"
+	"net"
+	"time"
+)
+
+var (
+	udpclient    mu.UdpClient //udp对象
+	SingleThread = make(chan bool, 1)
+)
+
+func main() {
+
+	//udp跑增量  id段   zl
+	//udp跑全量			ql
+	//udp跑历史数据  信息id1,id2/或id段  ls
+	//udp强制合并  信息id1,id2,id3 [项目id] 不存在时新建  qzhb
+	//udp强制拆分  项目id,信息id1,id2          qzcf
+	//udp重新合并  信息id1,id2,id3             cxhb
+	//loadData("project_0809", 1, true)
+	time.Sleep(99999 * time.Hour)
+}
+
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA: //上个节点的数据
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		log.Println("err:", err, "mapInfo:", mapInfo)
+		if err != nil {
+			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+		} else if mapInfo != nil {
+			key, _ := mapInfo["key"].(string)
+			if key == "" {
+				key = "udpok"
+			}
+			go udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+			SingleThread <- true
+			tasktype, _ := mapInfo["stype"].(string)
+			log.Println("tasktype:", tasktype)
+			switch tasktype {
+			case "ql":
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					taskQl(mapInfo)
+				}()
+			}
+		}
+	case mu.OP_NOOP: //下个节点回应
+		ok := string(data)
+		if ok != "" {
+			log.Println("ok:", ok)
+		}
+	}
+}

+ 377 - 0
fullproject/src_dev2/merge.go

@@ -0,0 +1,377 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"math"
+	qu "qfw/util"
+	"qfw/util/redis"
+	"sort"
+	//"strconv"
+	"strings"
+	"sync"
+)
+
+//单线程控制查找
+var findLock = sync.Mutex{}
+
+//从对应map中获取对比的项目id
+func getCompareIds(pn, pc, ptc, pb string) (bpn, bpc, bptc, bpb int, res []*Key, idArr []string, IDArr []*ID) {
+	findLock.Lock()
+	defer findLock.Unlock()
+	res = []*Key{}
+	//是否查找到,并标识位置。-1代表未查找到。
+	bpn, bpc, bptc, bpb = -1, -1, -1, -1
+	if pn != "" {
+		ids := mapPn[pn]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPn[pn] = ids
+			bpn = 0
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pc != "" {
+		ids := mapPc[pc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[pc] = ids
+			bpc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if ptc != "" {
+		ids := mapPc[ptc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[ptc] = ids
+			bptc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pb != "" {
+		ids := mapPb[pb]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPb[pb] = ids
+			bpb = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+	repeatId := map[string]bool{}
+	idArr = []string{} //项目id
+	IDArr = []*ID{}    //项目信息
+	for _, m := range res {
+		for _, id := range m.Arr {
+			if !repeatId[id] {
+				repeatId[id] = true
+				//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+				AllIdsMapLock.Lock()
+				Id := AllIdsMap2[id]
+				AllIdsMapLock.Unlock()
+				Id.Lock.Lock()
+				idArr = append(idArr, id)
+				IDArr = append(IDArr, Id)
+			}
+		}
+	}
+	return
+}
+
+//项目合并入口
+func startProjectMerge(info *Info, tmp map[string]interface{}) {
+	//只有或没有采购单位的无法合并
+	//bpn, bpc, bptc, bpb 是否查找到,并标识位置。-1代表未查找到。
+	//pids 是项目id数组集合
+	//IDArr,是单个项目ID对象集合
+	bpn, bpc, bptc, bpb, pids, idArr, IDArr := getCompareIds(info.ProjectName, info.ProjectCode, info.PTC, info.Buyer)
+	//map--k为pn,ptn,pc,ptc,buyer值 v为Id数组和lock
+
+	for _, m := range pids {
+		defer m.Lock.Unlock()
+	}
+	for _, id := range IDArr {
+		defer id.Lock.Unlock()
+	}
+
+	bFindProject := false
+	findPid := ""
+	//获取完id,进行计算
+	if len(idArr) > 0 && len(idArr) == len(IDArr) {
+		res := redis.Mget(ProjectCache, idArr)
+		//定义两组
+		comRes1 := []*ProjectInfo{} //优先级最高的对比结果数组
+		comRes2 := []*ProjectInfo{} //优化级其次
+		comRes3 := []*ProjectInfo{}
+		if len(res) == len(idArr) {
+			for _, v := range res {
+				var cp ProjectInfo
+				comStr := ""
+				var compareProject *ProjectInfo
+				err := json.Unmarshal(v.([]byte), &cp)
+				compareProject = &cp
+				//问题出地LastTime!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+				diffTime := math.Abs(float64(info.Publishtime - compareProject.LastTime))
+				if err != nil {
+					log.Println("从redis取id信息出错!!!", err)
+				} else if diffTime < 360*86400 {
+					//"A 相等 	B 被包含 	C 不相等	 	D不存在  E被包含
+					compareStr, score := comparePNCB(info, compareProject)
+					//if info.ProjectCode == "ZWJLZXQ-2016-0508" {
+					//log.Println(compareStr)
+					//}
+					resVal := Select(compareStr, info, compareProject)
+					//---------------------------------------
+					if resVal > 0 {
+						compareBuyer := "D"
+						if info.Buyer != "" {
+							if info.Buyer == compareProject.Buyer {
+								compareBuyer = "A"
+								score += 3
+							} else if compareProject.Buyer != "" {
+								if strings.Contains(info.Buyer, compareProject.Buyer) || strings.Contains(compareProject.Buyer, info.Buyer) {
+									compareBuyer = "B"
+									score += 1
+								} else {
+									compareBuyer = "C"
+								}
+							}
+						}
+
+						//---------------------------------------
+
+						compareCity := ""
+						if info.Area != "全国" && info.Area != "" && info.Area == compareProject.Area {
+							compareCity += "A"
+							score += 2
+						} else if info.Area == "全国" || compareProject.Area == "全国" {
+							compareCity += "B"
+							score += 1
+						} else {
+							compareCity += "C"
+						}
+						if compareCity != "C" {
+							if info.City != "" && info.City == compareProject.City {
+								compareCity += "A"
+								score += 2
+							} else {
+								if info.Area == "全国" || compareProject.Area == "全国" {
+									compareCity += "B"
+								} else if info.City == compareCity {
+									compareCity += "B"
+								} else {
+									compareCity += "C"
+								}
+							}
+						} else {
+							compareCity += "C"
+						}
+
+						compareTime := "D"
+						//diffTime := math.Abs(float64(info.Publishtime - compareProject.Publistime[len(compareProject.Publistime)-1]))
+						if diffTime < 45*86400 {
+							compareTime = "A"
+							score += 2
+						} else if diffTime < 90*86400 {
+							compareTime = "B"
+							score += 1
+						}
+						score2 := 0
+						compareAgency := "D"
+						if info.Agency != "" {
+							if info.Agency == compareProject.Agency {
+								compareAgency = "A"
+								score += 2
+								score2 += 1
+							} else if compareProject.Agency != "" {
+								if strings.Contains(info.Agency, compareProject.Agency) || strings.Contains(compareProject.Agency, info.Agency) {
+									compareAgency = "B"
+									score += 1
+									score2 += 1
+								} else {
+									compareAgency = "C"
+								}
+							}
+						}
+						compareBudget := "C"
+						if info.budget > 0 && (info.budget == compareProject.Budget || (compareProject.Bidamount > 0 && info.budget > compareProject.Bidamount && (info.budget-compareProject.Bidamount) < (0.1*info.budget))) {
+							compareBudget = "A"
+							score += 1
+							score2 += 1
+						}
+						compareBidmount := "C"
+						if info.bidamount > 0 && (info.bidamount == compareProject.Bidamount || (compareProject.Budget > 0 && compareProject.Budget > info.bidamount && (compareProject.Budget-info.bidamount) < 0.1*compareProject.Budget)) {
+							compareBidmount = "A"
+							score += 1
+							score2 += 1
+						}
+
+						//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+
+						comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency + compareBudget + compareBidmount
+						compareProject.comStr = comStr
+						eqV := 0
+						switch resVal {
+						case 3:
+							if compareBuyer < "C" {
+								eqV = 1
+							} else if compareBuyer == "D" {
+								if "AA" == compareCity && compareAgency != "C" && compareTime != "D" && score2 > 0 {
+									eqV = 2
+								} else if compareCity[1:1] != "C" && score2 > 0 && compareTime == "A" {
+									eqV = 3
+								} else if compareTime != "D" && compareAgency != "C" && score2 > 1 {
+									eqV = 3
+								}
+							} else {
+								if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+									eqV = 2
+								} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 1 {
+									eqV = 3
+								}
+							}
+
+						case 2:
+							if compareBuyer < "C" {
+								if compareTime != "D" && "AA" == compareCity && score2 > 0 {
+									eqV = 2
+								} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+									eqV = 3
+								}
+							} else if compareBuyer == "D" {
+								if "AA" == compareCity && compareTime != "D" && score2 > 1 {
+									eqV = 3
+								} else if score2 > 2 && compareTime == "A" {
+									eqV = 3
+								}
+							} else {
+								if "AA" == compareCity && (compareAgency == "A" || score2 > 1) && compareTime == "A" {
+									eqV = 3
+								}
+							}
+						case 1:
+							if compareBuyer < "C" {
+								if compareTime != "D" && "AA" == compareCity && score2 > 1 {
+									eqV = 2
+								} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 1 {
+									eqV = 3
+								}
+							} else if compareBuyer == "D" {
+								if "AA" == compareCity && compareTime != "D" && score2 > 2 {
+									eqV = 3
+								} else if compareCity[1:1] != "C" && score2 > 2 && compareTime == "A" {
+									eqV = 3
+								}
+							} else {
+								if "AA" == compareCity && score2 > 2 && compareTime == "A" {
+									eqV = 3
+								}
+							}
+						}
+						if eqV == 1 {
+							comRes1 = append(comRes1, compareProject)
+						} else if eqV == 2 {
+							comRes2 = append(comRes2, compareProject)
+						} else if eqV == 3 {
+							comRes3 = append(comRes3, compareProject)
+						} else {
+							log.Println("+++++++++++", resVal, comStr, info, compareProject)
+						}
+					}
+				}
+			}
+			//--------------------------------对比完成-----------------------
+			//更新数组、更新项目
+			for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
+				if len(resN) > 0 {
+					if len(resN) > 1 {
+						sort.Slice(resN, func(i, j int) bool {
+							return resN[i].score > resN[j].score
+						})
+					}
+
+					bFindProject = true
+					findPid = resN[0].Id.Hex()
+					for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+						if bv > -1 {
+							pids[bv].Arr = append(pids[bv].Arr, findPid)
+							if k2 == 0 {
+								if resN[0].ProjectName == "" {
+									resN[0].ProjectName = info.ProjectName
+								} else {
+									if resN[0].MPN == nil {
+										resN[0].MPN = []string{info.ProjectName}
+									} else {
+										resN[0].MPN = append(resN[0].MPN, info.ProjectName)
+									}
+								}
+
+							} else if k2 < 3 {
+								if resN[0].ProjectCode == "" {
+									resN[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
+								} else {
+									if resN[0].MPC == nil {
+										resN[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+									} else {
+										resN[0].MPC = append(resN[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+									}
+								}
+
+							} else {
+								if resN[0].Buyer == "" {
+									resN[0].Buyer = info.Buyer
+								}
+							}
+						}
+					}
+					UpdateProject(tmp, info, resN[0], kv+1, resN[0].comStr)
+					//更新AllIdsMao中的时间
+					redis.Put(ProjectCache, findPid, resN[0], 0)
+					break
+				}
+			}
+		} else {
+			log.Println("redis记录缺失!!!")
+		}
+	}
+
+	if !bFindProject {
+		//没有找到
+		id := NewProject(tmp, info)
+		//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+		AllIdsMapLock.Lock()
+		//AllIdsMap[int(t)%idsMapSize][id] = &ID{Id: id, lastTime: info.Publishtime}
+		AllIdsMap2[id] = &ID{Id: id, lastTime: info.Publishtime}
+		AllIdsMapLock.Unlock()
+		for _, m := range pids {
+			m.Arr = append(m.Arr, id)
+		}
+	}
+
+}
+
+//二分字符串查找
+func BinarySearch(s []string, k string) int {
+	sort.Strings(s)
+	lo, hi := 0, len(s)-1
+	for lo <= hi {
+		m := (lo + hi) >> 1
+		if s[m] < k {
+			lo = m + 1
+		} else if s[m] > k {
+			hi = m - 1
+		} else {
+			return m
+		}
+	}
+	return -1
+}
+
+var FIELDS = []string{"area", "city", "district", "projectname", "projectcode", "buyer", "winner", "budget", "bidamount", "bidstatus", "agency", "projectscope"}

+ 160 - 0
fullproject/src_dev2/merge_comparepncb.go

@@ -0,0 +1,160 @@
+package main
+
+import (
+	"strings"
+)
+
+func comparePNCB(info *Info, compareProject *ProjectInfo) (compareStr string, score int) {
+	if info.ProjectName != "" {
+		pns := []string{}
+		if compareProject.ProjectName != "" {
+			pns = append(pns, compareProject.ProjectName)
+		}
+		if len(compareProject.MPN) > 0 {
+			pns = append(pns, compareProject.MPN...)
+		}
+		ifind := 0
+		for _, v := range pns {
+			if info.ProjectName == v {
+				ifind = 1
+				break
+			} else {
+				//if strings.Contains(info.ProjectName, v) || strings.Contains(v, info.ProjectName) ||
+				retv := CheckContain(info.ProjectName, v, info.Buyer)
+				if retv == 1 {
+					ifind = 1
+					break
+				} else if retv == 2 {
+					ifind = 2
+				} else if ifind == 0 {
+					ifind = 3
+				}
+			}
+		}
+		switch ifind {
+		case 0:
+			compareStr = "D"
+		case 1:
+			compareStr = "A"
+			score += 4
+			if len([]rune(info.ProjectName)) > 18 {
+				score += 2
+			}
+		case 2:
+			compareStr = "B"
+			score += 2
+			if len([]rune(info.ProjectName)) > 18 {
+				score += 1
+			}
+		case 3:
+			compareStr = "C"
+		}
+	} else {
+		compareStr = "D"
+	}
+
+	/*
+			项目编号 - -()() 要注意
+			init_text = ["号","(重)","(第二次)","(重)"]
+	all_clean_mark = ["[","(","【","(","〖","]",")","】",")","〗","-","〔","〕","《","[","]","{","}","{","—"," ","-","﹝","﹞","–"]
+	*/
+	for _, pc := range []string{info.ProjectCode, info.PTC} {
+		if pc != "" {
+			pcs := []string{}
+			if compareProject.ProjectCode != "" {
+				pcs = append(pcs, compareProject.ProjectCode)
+			}
+			if len(compareProject.MPC) > 0 {
+				pcs = append(pcs, compareProject.MPC...)
+			}
+			ifind := 0
+			for _, v := range pcs {
+				if pc == v {
+					ifind = 1
+					break
+				} else {
+					// math.Abs(float64(len([]rune(pc))-len([]rune(v)))) < 6
+					//if !_numreg1.MatchString(pc) && !_zimureg1.MatchString(pc) && !_numreg1.MatchString(v) && !_zimureg1.MatchString(v)
+					if strings.Contains(pc, v) || strings.Contains(v, pc) {
+						t1 := pc
+						t2 := v
+						if len(v) > len(pc) {
+							t1 = v
+							t2 = pc
+						}
+						t3 := strings.Replace(t1, t2, "", -1)
+						t3 = _datereg.ReplaceAllString(t3, "")
+						if t3 == "" {
+							ifind = 1
+							break
+						} else {
+							ifind = 2
+						}
+					} else if ifind == 0 {
+						ifind = 3
+					}
+				}
+			}
+			switch ifind {
+			case 0:
+				compareStr += "D"
+			case 1:
+				compareStr += "A"
+				score += 4
+				if len([]rune(pc)) > 18 {
+					score += 2
+				}
+			case 2:
+				compareStr += "B"
+				score += 2
+				if len([]rune(pc)) > 18 {
+					score += 1
+				}
+			case 3:
+				compareStr += "C"
+			}
+
+		} else {
+			compareStr += "D"
+		}
+	}
+	return
+}
+
+func CheckContain(b1, b2, infoBuyer string) (res int) {
+	b1 = replaceStr.ReplaceAllString(b1, "")
+	b2 = replaceStr.ReplaceAllString(b2, "")
+	b1 = strings.Replace(b1, infoBuyer, "", 1)
+	b2 = strings.Replace(b2, infoBuyer, "", 1)
+
+	if b1 == b2 {
+		res = 1 //相等
+		return
+	}
+	bs1 := []rune(b1)
+	bs2 := []rune(b2)
+	tmp := ""
+	for i := 0; i < len(bs1); i++ {
+		for j := 0; j < len(bs2); j++ {
+			if bs1[i] == bs2[j] {
+				tmp += string(bs1[i])
+			} else if tmp != "" {
+				b1 = strings.Replace(b1, tmp, "", -1)
+				b2 = strings.Replace(b2, tmp, "", -1)
+				tmp = ""
+			}
+		}
+	}
+	if tmp != "" {
+		b1 = strings.Replace(b1, tmp, "", -1)
+		b2 = strings.Replace(b2, tmp, "", -1)
+	}
+	if b1 == b2 {
+		res = 1 //相等
+	} else if b1 == "" || b2 == "" {
+		res = 2 //包含
+	} else {
+		res = 3 //不相等
+	}
+	return
+}

+ 276 - 0
fullproject/src_dev2/merge_select.go

@@ -0,0 +1,276 @@
+package main
+
+func Select(compareStr string, info *Info, compareInfo *ProjectInfo) (res int) {
+	//没有可对比的项目名称、或项目编号
+	if compareNoPass[compareStr] {
+
+	} else {
+		switch compareStr {
+		case "AAA":
+			res = 3
+		case "AAB":
+			res = 3
+		case "ABA":
+			res = 3
+		case "ABB":
+			if info.LenPTC > 6 || info.LenPC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAA":
+			if info.LenPN > 10 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAB":
+			if info.LenPN > 10 || info.LenPTC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBA":
+			if info.LenPN > 10 || info.LenPC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBB":
+			v := 0
+			if info.LenPN > 12 {
+				v++
+			}
+			if info.LenPC > 8 {
+				v++
+			}
+			if info.LenPTC > 8 {
+				v++
+			}
+			if v > 1 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "AAD":
+			res = 3
+		case "ABD":
+			//			if info.LenPC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BAD":
+			//			if info.LenPC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BBD":
+			if info.LenPC > 12 && info.LenPN > 16 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ADA":
+			res = 3
+		case "ADB":
+			//			if info.LenPTC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BDA":
+			//			if info.LenPTC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BDB":
+			if info.LenPTC > 12 && info.LenPN > 16 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ADD":
+			if info.LenPN > 14 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BDD":
+			//			if info.LenPN > 18 {
+			//				res = 2
+			//			} else {
+			//				res = 1
+			//			}
+			res = 1
+		case "DAA":
+			if info.LenPTC > 12 || info.LenPC > 12 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "DAB":
+			if info.LenPTC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DAD":
+			if info.LenPTC > 14 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBA":
+			if info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBB":
+			if info.LenPTC > 12 && info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBD":
+			if info.LenPC > 8 {
+				res = 1
+			}
+		case "DDA":
+			if info.LenPTC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DDB":
+			if info.LenPC > 8 {
+				res = 1
+			}
+		case "ACA":
+			if info.LenPN > 12 || info.LenPTC > 9 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ACB":
+			if info.LenPN > 14 && info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BCA":
+			if info.LenPN > 12 && info.LenPTC > 9 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BCB":
+			if info.LenPN > 12 && info.LenPTC > 9 {
+				res = 2
+			} else if info.LenPN > 16 || info.LenPTC > 12 {
+				res = 1
+			}
+		case "AAC":
+			if info.LenPN > 12 || info.LenPC > 9 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "ABC":
+			if info.LenPN > 14 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BAC":
+			if info.LenPN > 14 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BBC":
+			//			if info.LenPN > 12 && info.LenPTC > 9 {
+			//				res = 1
+			//			} else {
+			//				res = 1
+			//			}
+			res = 1
+		case "ACC":
+			//			if info.LenPC > 4 && len(compareInfo.ProjectCode) > 4 && CheckHanAndNum(info.ProjectCode) && CheckHanAndNum(compareInfo.ProjectCode) {
+			//				//未考虑MPC
+			//			} else if info.LenPN > 16 {
+			//				res = 1
+			//			}
+			//
+			res = 1
+		case "ACD":
+			//项目编号不一致
+			res = 1
+		case "ADC":
+			res = 1
+		case "BCC":
+			//项目编号不一致
+			res = 1
+		case "BCD":
+			//项目编号不一致
+			if info.LenPC > 4 && len(compareInfo.ProjectCode) > 4 && (!_numreg1.MatchString(info.ProjectCode) && !_numreg1.MatchString(compareInfo.ProjectCode)) {
+				//未考虑MPC
+			} else if info.LenPN > 18 && info.LenPC-len([]rune(compareInfo.ProjectCode)) != 0 {
+				res = 1
+			}
+		case "BDC":
+			if info.LenPN > 18 && info.LenPTC-len([]rune(compareInfo.ProjectCode)) != 0 {
+				res = 1
+			}
+		case "CAA":
+			if info.LenPC > 10 || info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "CAB":
+			if info.LenPC > 10 && info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "CAC":
+			res = 1
+		case "CAD":
+			res = 1
+		case "CBA":
+			res = 1
+		case "CBB":
+			res = 1
+		case "CBC":
+			res = 1
+		case "CBD":
+			res = 1
+		case "CCA":
+			res = 1
+		case "CCB":
+			//
+		case "CDA":
+			res = 1
+		case "CDB":
+			res = 1
+		case "DAC":
+			res = 1
+		case "DBC":
+			res = 1
+		case "DCA":
+			res = 1
+		case "DCB":
+			res = 1
+		}
+
+	}
+	return
+}

+ 116 - 0
fullproject/src_dev2/new_project.go

@@ -0,0 +1,116 @@
+package main
+
+import (
+	qu "qfw/util"
+	"qfw/util/redis"
+	"strings"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+func NewProject(tmp map[string]interface{}, thisinfo *Info) string {
+
+	set := map[string]interface{}{}
+	for _, f := range FIELDS {
+		if tmp[f] != nil {
+			set[f] = tmp[f]
+		}
+	}
+	set["s_projectname"] = thisinfo.ProjectName
+	set["createtime"] = time.Now().Unix()
+	set["sourceinfoid"] = qu.BsonIdToSId(tmp["_id"])
+	set["sourceinfourl"] = tmp["href"]
+	set["topscopeclass"] = thisinfo.Topscopeclass
+	set["subscopeclass"] = thisinfo.Subscopeclass
+	if thisinfo.Buyerperson != "" {
+		set["buyerperson"] = thisinfo.Buyerperson
+	}
+	if thisinfo.Buyertel != "" {
+		set["buyertel"] = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		set["buyertel"] = thisinfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > 0 {
+		set["bidopentime"] = thisinfo.Bidopentime
+	}
+	if len(thisinfo.Winnerorder) > 0 {
+		set["winnerorder"] = thisinfo.Winnerorder
+	}
+	s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
+	set["s_subscopeclass"] = s_subscopeclass
+	s_winner := strings.Join(thisinfo.Winners, ",")
+	set["s_winner"] = s_winner
+	if tmp["package"] != nil {
+		set["package"] = tmp["package"] //没定义优先级
+	}
+	push := NewPushInfo(tmp)
+	set["list"] = []bson.M{
+		push,
+	}
+
+	id := MongoTool.Save(ProjectColl, set)
+	p1 := NewPinfo(id, thisinfo)
+	redis.PutCKV(ProjectCache, id, p1)
+
+	return id
+}
+
+func NewPushInfo(tmp map[string]interface{}) bson.M {
+	res := bson.M{
+		"comeintime":  tmp["comeintime"],
+		"publishtime": tmp["publishtime"],
+		"title":       tmp["title"],
+		"toptype":     tmp["toptype"],
+		"subtype":     tmp["subtype"],
+		"infoformat":  tmp["infoformat"],
+		"infoid":      qu.BsonIdToSId(tmp["_id"]),
+		"area":        tmp["area"],
+		"city":        tmp["city"],
+		"projectname": tmp["projectname"],
+		"projectcode": tmp["projectcode"],
+		"buyer":       tmp["buyer"],
+		"href":        tmp["href"],
+	}
+	for _, k := range []string{"winner", "budget", "bidamount"} {
+		if tmp[k] != nil {
+			res[k] = tmp[k]
+		}
+	}
+	return res
+}
+
+//生成存放在redis数组中的对象
+func NewPinfo(id string, thisinfo *Info) ProjectInfo {
+	p1 := ProjectInfo{
+		Id:            bson.ObjectIdHex(id),
+		Ids:           []string{thisinfo.Id},
+		Topscopeclass: thisinfo.Topscopeclass,
+		Subscopeclass: thisinfo.Subscopeclass,
+		Winners:       thisinfo.Winners,
+		ProjectName:   thisinfo.ProjectName,
+		ProjectCode:   thisinfo.ProjectCode,
+		Buyer:         thisinfo.Buyer,
+		Agency:        thisinfo.Agency,
+		Area:          thisinfo.Area,
+		City:          thisinfo.City,
+		District:      thisinfo.District,
+		MPN:           []string{},
+		MPC:           []string{},
+		HasPackage:    thisinfo.HasPackage,
+		Package:       map[string]interface{}{},
+		Buyerclass:    thisinfo.Buyerclass,
+		Bidopentime:   thisinfo.Bidopentime,
+		Winnerorder:   thisinfo.Winnerorder,
+		FirstTime:     thisinfo.Publishtime,
+		LastTime:      thisinfo.Publishtime,
+	}
+	if thisinfo.LenPTC > 5 {
+		p1.MPC = append(p1.MPC, thisinfo.PTC)
+	}
+	for k4, _ := range thisinfo.Package {
+		p1.Package[k4] = ""
+	}
+	return p1
+}

+ 200 - 0
fullproject/src_dev2/task.go

@@ -0,0 +1,200 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"qfw/util"
+	"regexp"
+	//"strings"
+	"time"
+)
+
+const (
+	InitMinTime = int64(1325347200) //最小时间位置2012
+)
+
+//全量合并
+func taskQl(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	//1、检查pubilshtime索引
+	db, _ := udpInfo["db"].(string)
+	if db == "" {
+		db = MongoTool.DbName
+	}
+	coll, _ := udpInfo["coll"].(string)
+	if coll == "" {
+		coll = ExtractColl
+	}
+	sess := MongoTool.GetMgoConn()
+	bcon := false
+	if sess.DB(db).C(coll).EnsureIndexKey("publishtime_1", "publishtime_-1") == nil {
+		bcon = true
+	} else {
+		log.Println("publishtime_1索引不存在")
+	}
+	MongoTool.DestoryMongoConn(sess)
+	thread := util.IntAllDef(udpInfo["thread"], 1)
+	if bcon {
+		//获取起始时间
+		startTime, END := int64(0), int64(0)
+
+		sts, bres := MongoTool.Find(ExtractColl, `{}`, "publishtime", `{"publishtime":1}`, true, 0, 1)
+		if bres && sts != nil && len(*sts) == 1 {
+			startTime = util.Int64All((*sts)[0]["publishtime"])
+			sts, bres = MongoTool.Find(ExtractColl, `{}`, "-publishtime", `{"publishtime":1}`, true, 0, 1)
+			if bres && sts != nil && len(*sts) == 1 {
+				END = util.Int64All((*sts)[0]["publishtime"])
+			}
+			log.Println("查询到的起始时间", startTime, END)
+		} else {
+			return
+		}
+		startTime -= 1
+		sum := 0
+		if startTime < InitMinTime {
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": InitMinTime,
+				},
+			}
+			sum = Mql(q, thread, db, coll, sum)
+			startTime = InitMinTime
+		}
+		for {
+			if startTime >= END {
+				break
+			}
+			et := startTime + 50*86400
+			if et >= END {
+				et = END
+			}
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": et,
+				},
+			}
+			sum = Mql(q, thread, db, coll, sum)
+			startTime = et
+			time.Sleep(1 * time.Second)
+		}
+	}
+	log.Println("task over!!!")
+}
+
+func Mql(q map[string]interface{}, thread int, db, coll string, sum int) int {
+	defer util.Catch()
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	query := sess.DB(db).C(coll).Find(q).Sort("publishtime").Iter()
+	pool := make(chan bool, thread)
+	count := 0
+	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
+		info := ParseInfo(tmp)
+		if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
+			pool <- true
+			go func(info *Info, tmp map[string]interface{}) {
+				defer func() {
+					<-pool
+				}()
+				startProjectMerge(info, tmp)
+			}(info, tmp)
+		} else {
+			//log.Println("info err:", tmp["_id"], tmp["title"], tmp["buyer"])
+		}
+		if sum%1000 == 0 {
+			log.Println("current", sum)
+		}
+		sum++
+		tmp = make(map[string]interface{})
+	}
+	//阻塞
+	for n := 0; n < thread; n++ {
+		pool <- true
+	}
+	//完成
+	log.Println("sontask over:", count, sum, q)
+	return sum
+}
+
+var (
+	titleGetPc  = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
+	titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
+	titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
+	pcReplace   = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$")
+)
+
+func ParseInfo(tmp map[string]interface{}) (info *Info) {
+	bys, _ := json.Marshal(tmp)
+	var thisinfo *Info
+	json.Unmarshal(bys, &thisinfo)
+	if thisinfo == nil {
+		return nil
+	}
+	if len(thisinfo.Topscopeclass) == 0 {
+		thisinfo.Topscopeclass = []string{}
+	}
+	if len(thisinfo.Subscopeclass) == 0 {
+		thisinfo.Subscopeclass = []string{}
+	}
+
+	res := titleGetPc.FindStringSubmatch(thisinfo.Title)
+	if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+		thisinfo.PTC = res[1]
+		thisinfo.pnbval++
+	} else {
+		res = titleGetPc1.FindStringSubmatch(thisinfo.Title)
+		if len(res) > 3 && len(res[3]) > 6 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) {
+			thisinfo.PTC = res[3]
+			thisinfo.pnbval++
+		} else {
+			res = titleGetPc2.FindStringSubmatch(thisinfo.Title)
+			if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+				thisinfo.PTC = res[1]
+				thisinfo.pnbval++
+			}
+		}
+	}
+
+	if thisinfo.ProjectName != "" && len([]rune(thisinfo.ProjectName)) > 0 {
+		//		thisinfo.ProjectName = strings.Replace(thisinfo.ProjectName, "(", "(", -1)
+		//		thisinfo.ProjectName = strings.Replace(thisinfo.ProjectName, ")", ")", -1)
+		//		thisinfo.ProjectName = strings.Replace(thisinfo.ProjectName, "-", "", -1)
+		thisinfo.ProjectName = pcReplace.ReplaceAllString(thisinfo.ProjectName, "")
+		if thisinfo.ProjectName != "" {
+			thisinfo.pnbval++
+		}
+	}
+
+	if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+		if thisinfo.ProjectCode != "" {
+			thisinfo.ProjectCode = pcReplace.ReplaceAllString(thisinfo.ProjectCode, "")
+			//			thisinfo.ProjectCode = strings.Replace(thisinfo.ProjectCode, "(", "(", -1)
+			//			thisinfo.ProjectCode = strings.Replace(thisinfo.ProjectCode, ")", ")", -1)
+			//			thisinfo.ProjectCode = strings.Replace(thisinfo.ProjectCode, "-", "", -1)
+		} else {
+			thisinfo.PTC = pcReplace.ReplaceAllString(thisinfo.PTC, "")
+			//			thisinfo.PTC = strings.Replace(thisinfo.PTC, "(", "(", -1)
+			//			thisinfo.PTC = strings.Replace(thisinfo.PTC, ")", ")", -1)
+			//			thisinfo.PTC = strings.Replace(thisinfo.PTC, "-", "", -1)
+		}
+		if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+			thisinfo.pnbval++
+		}
+	}
+	if thisinfo.ProjectCode == thisinfo.PTC {
+		thisinfo.PTC = ""
+	}
+
+	if thisinfo.Buyer != "" && len([]rune(thisinfo.Buyer)) > 2 {
+		thisinfo.pnbval++
+	} else {
+		thisinfo.Buyer = ""
+	}
+
+	thisinfo.LenPC = len([]rune(thisinfo.ProjectCode))
+	thisinfo.LenPTC = len([]rune(thisinfo.PTC))
+	thisinfo.LenPN = len([]rune(thisinfo.ProjectName))
+	return thisinfo
+}

+ 221 - 0
fullproject/src_dev2/update_project.go

@@ -0,0 +1,221 @@
+package main
+
+import (
+	qu "qfw/util"
+	"sort"
+	"strings"
+)
+
+func UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string) {
+	//log.Println(thisinfo.ProjectName, pInfo.ProjectName, comStr)
+	//	updateid := pInfo.Id
+	if BinarySearch(pInfo.Ids, thisinfo.Id) > -1 {
+		return //updateid
+	}
+	set := map[string]interface{}{}
+	res, bres := MongoTool.FindById(ProjectColl, pInfo.Id.Hex(), `{"list":0}`)
+	EqInfoUpdate(thisinfo, pInfo)
+	if bres && res != nil && *res != nil {
+		set["topscopeclass"] = pInfo.Topscopeclass
+		set["subscopeclass"] = pInfo.Subscopeclass
+		s_subscopeclass := strings.Join(pInfo.Subscopeclass, ",")
+		if len(s_subscopeclass) > 0 {
+			s_subscopeclass = "," + s_subscopeclass + ","
+		}
+		set["s_subscopeclass"] = s_subscopeclass
+		s_winner := strings.Join(pInfo.Winners, ",")
+		if len(s_winner) > 0 {
+			s_winner = "," + s_winner + ","
+		}
+		set["s_winner"] = s_winner
+		if pInfo.Buyerperson != "" && pInfo.Buyertel != "" {
+			set["buyerperson"] = pInfo.Buyerperson
+			set["buyertel"] = pInfo.Buyertel
+		}
+		if pInfo.Buyerclass != "" {
+			set["buyerclass"] = pInfo.Buyerclass
+		}
+		if pInfo.District != "" {
+			set["district"] = pInfo.District
+		}
+		if pInfo.Bidopentime > 0 {
+			set["bidopentime"] = pInfo.Bidopentime
+		}
+		if len(pInfo.Winnerorder) > 0 {
+			set["winnerorder"] = pInfo.Winnerorder
+		}
+		if thisinfo.HasPackage {
+			set["multipackage"] = 1
+		} else {
+			set["multipackage"] = 0
+		}
+
+		if pInfo.ProjectName != "" {
+			set["projectname"] = pInfo.ProjectName
+		}
+
+		if pInfo.ProjectCode != "" {
+			set["projectcode"] = pInfo.ProjectCode
+		}
+
+		if pInfo.Buyer != "" {
+			set["buyer"] = pInfo.Buyer
+		}
+
+		//预算、中标价
+		if qu.Float64All(tmp["bidamount"]) > 0 {
+			if qu.Float64All((*res)["bidamount"]) == 0 {
+				set["bidamount"] = tmp["bidamount"]
+			}
+		}
+
+		if qu.Float64All(tmp["budget"]) > 0 {
+			if qu.Float64All((*res)["budget"]) == 0 {
+				set["budget"] = tmp["budget"]
+			}
+		}
+
+		set["mpn"] = pInfo.MPN
+		set["mpc"] = pInfo.MPC
+		set["area"] = pInfo.Area
+		set["city"] = pInfo.City
+
+		//e := InitEL(util.ObjToString((*res)["extractpos"]))
+		if thisinfo.dealtype == 1 {
+			var sonpackage map[string]interface{}
+			for _, obj := range tmp["package"].(map[string]interface{}) {
+				sonpackage, _ = obj.(map[string]interface{})
+			}
+			for _, v2 := range []string{"budget", "budget_w", "winner", "winner_w", "bidstatus", "bidstatus_w"} {
+				if sonpackage[v2] != nil {
+					tmp[v2] = sonpackage[v2]
+				}
+			}
+		}
+		//e.fieldpriority(&tmp, res, &set)
+		//set["extractpos"] = e.GetVal()
+		if thisinfo.HasPackage { //多包处理
+			p1, _ := (*res)["package"].(map[string]interface{})
+			p2, _ := tmp["package"].(map[string]interface{})
+			if p2 != nil {
+				if p1 != nil {
+					for pk2, pv2 := range p2 {
+						if p1[pk2] != nil { //合并
+							item1, _ := p1[pk2].(map[string]interface{})
+							item2, _ := pv2.(map[string]interface{})
+							if item1 != nil && item2 != nil { //原始项
+								for ik1, iv1 := range item2 {
+									if item1[ik1] == nil {
+										item1[ik1] = iv1
+									}
+								}
+							}
+						} else {
+							p1[pk2] = pv2
+						}
+					}
+				} else {
+					p1 = p2
+				}
+			}
+			set["package"] = p1
+		}
+		//中标候选人合并
+
+		update := map[string]interface{}{}
+		if len(set) > 0 {
+			update["$set"] = set
+		}
+		//保留原数据吧
+		push := NewPushInfo(tmp)
+		push["compareStr"] = comStr
+		update["$push"] = map[string]interface{}{
+			"list": push,
+		}
+		if len(update) > 0 {
+			MongoTool.Update(ProjectColl, map[string]interface{}{
+				"_id": qu.StringTOBsonId(pInfo.Id.Hex()),
+			}, &update, false, false)
+		}
+	}
+	//再往redis中放 index
+	//往队列中增加时间 -------------->start
+}
+
+func EqInfoUpdate(thisinfo *Info, pInfo *ProjectInfo) {
+	if thisinfo.Publishtime > pInfo.LastTime {
+		pInfo.LastTime = thisinfo.Publishtime
+	}
+	if pInfo.FirstTime == 0 || (thisinfo.Publishtime < pInfo.FirstTime && thisinfo.Publishtime > 0) {
+		pInfo.FirstTime = thisinfo.Publishtime
+	}
+	pInfo.Ids = append(pInfo.Ids, thisinfo.Id)
+	//增加发布时间结束----------------->end
+
+	if (pInfo.Buyer == "" && thisinfo.Buyer != "") || (len([]rune(pInfo.Buyer)) < 5 && len([]rune(thisinfo.Buyer)) > 5) {
+		pInfo.Buyer = thisinfo.Buyer
+	}
+	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
+		pInfo.Agency = thisinfo.Agency
+	}
+	if (pInfo.ProjectCode == "" && thisinfo.ProjectCode != "") || (len([]rune(pInfo.ProjectCode)) < 6 && len([]rune(thisinfo.ProjectCode)) > 6) {
+		pInfo.ProjectCode = thisinfo.ProjectCode
+	}
+
+	if pInfo.Area == "全国" && thisinfo.Area != "全国" {
+		pInfo.Area = thisinfo.Area
+		pInfo.City = thisinfo.City
+	}
+	if thisinfo.Buyerperson != "" && thisinfo.Buyertel != "" && len([]rune(thisinfo.Buyertel)) > 6 {
+		pInfo.Buyerperson = thisinfo.Buyerperson
+		pInfo.Buyertel = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		pInfo.Buyerclass = thisinfo.Buyerclass
+	}
+	if thisinfo.District != "" {
+		pInfo.District = thisinfo.District
+	}
+	if thisinfo.Bidopentime > 0 {
+		pInfo.Bidopentime = thisinfo.Bidopentime
+	}
+	if len(thisinfo.Topscopeclass) > 0 {
+		sort.Strings(pInfo.Topscopeclass)
+		for _, k := range thisinfo.Topscopeclass {
+			if BinarySearch(pInfo.Topscopeclass, k) == -1 {
+				pInfo.Topscopeclass = append(pInfo.Topscopeclass, k)
+				sort.Strings(pInfo.Topscopeclass)
+			}
+		}
+	}
+
+	if len(thisinfo.Subscopeclass) > 0 {
+		sort.Strings(pInfo.Subscopeclass)
+		for _, k := range thisinfo.Subscopeclass {
+			if BinarySearch(pInfo.Subscopeclass, k) == -1 {
+				pInfo.Subscopeclass = append(pInfo.Subscopeclass, k)
+				sort.Strings(pInfo.Subscopeclass)
+			}
+		}
+	}
+	//winner
+	if len(thisinfo.Winners) > 0 {
+		sort.Strings(pInfo.Winners)
+		for _, k := range thisinfo.Winners {
+			if BinarySearch(pInfo.Winners, k) == -1 {
+				pInfo.Winners = append(pInfo.Winners, k)
+				sort.Strings(pInfo.Winners)
+			}
+		}
+	}
+	//winnerorder
+	if len(thisinfo.Winnerorder) > 0 {
+		sort.Strings(pInfo.Winnerorder)
+		for _, k := range thisinfo.Winnerorder {
+			if BinarySearch(pInfo.Winnerorder, k) == -1 {
+				pInfo.Winnerorder = append(pInfo.Winnerorder, k)
+				sort.Strings(pInfo.Winnerorder)
+			}
+		}
+	}
+}

+ 87 - 0
fullproject/src_dev3/clearmem.go

@@ -0,0 +1,87 @@
+package main
+
+import (
+	"log"
+
+	"github.com/robfig/cron"
+)
+
+//定时清理内存
+//当前位置时间,小于此时间超过6个月,则清理
+var currentTime = int64(0)
+var validTime = int64(6 * 30 * 86400)
+
+func clearMem() {
+	c := cron.New()
+	c.AddFunc("50 0/1 * * * *", func() {
+		findLock.Lock()
+		defer findLock.Unlock()
+		wg.Wait()
+		//遍历id
+		AllIdsMapLock.Lock()
+		defer AllIdsMapLock.Unlock()
+		clearNum := 0
+		for k, v := range AllIdsMap2 {
+			if currentTime-v.lastTime > validTime {
+				clearNum++
+				//删除id的map
+				delete(AllIdsMap2, k)
+				//删除pb
+				if v.P.Buyer != "" {
+					ids := mapPb[v.P.Buyer]
+					if ids != nil {
+						ids.Lock.Lock()
+						ids.Arr = deleteSlice(ids.Arr, k)
+						if len(ids.Arr) == 0 {
+							delete(mapPb, v.P.Buyer)
+						}
+						ids.Lock.Unlock()
+					}
+				}
+				//删除mapPn
+				for _, vn := range append([]string{v.P.ProjectName}, v.P.MPN...) {
+					if vn != "" {
+						ids := mapPn[vn]
+						if ids != nil {
+							ids.Lock.Lock()
+							ids.Arr = deleteSlice(ids.Arr, k)
+							if len(ids.Arr) == 0 {
+								delete(mapPn, vn)
+							}
+							ids.Lock.Unlock()
+						}
+					}
+				}
+				//删除mapPc
+				for _, vn := range append([]string{v.P.ProjectCode}, v.P.MPC...) {
+					if vn != "" {
+						ids := mapPc[vn]
+						if ids != nil {
+							ids.Lock.Lock()
+							ids.Arr = deleteSlice(ids.Arr, k)
+							if len(ids.Arr) == 0 {
+								delete(mapPc, vn)
+							}
+							ids.Lock.Unlock()
+						}
+					}
+				}
+				v = nil
+			}
+		}
+		log.Println("清除完成:", clearNum, len(AllIdsMap2))
+	})
+	c.Start()
+	defer c.Stop()
+	select {}
+}
+
+func deleteSlice(arr []string, v string) []string {
+	j := 0
+	for _, v1 := range arr {
+		if v1 != v {
+			arr[j] = v1
+		}
+	}
+	return arr[:j]
+}

+ 16 - 0
fullproject/src_dev3/config.json

@@ -0,0 +1,16 @@
+{
+    "thread": 1,
+    "mongodbServers": "192.168.3.207:27082",
+    "mongodbPoolSize": 10,
+    "mongodbName": "cesuo",
+    "extractColl": "key1_biddingall",
+    "projectColl": "projectset_0809",
+    "redisaddrs": "ids=192.168.3.207:1378,keys=192.168.3.207:1378,info=192.168.3.207:1378",
+    "redisPoolSize": 20,
+    "jkmail": {
+        "to": "zhangjinkun@topnet.net.cn",
+        "api": "http://10.171.112.160:19281/_send/_mail"
+    },
+    "udpport": ":1482",
+    "nextNode": []
+}

+ 290 - 0
fullproject/src_dev3/init.go

@@ -0,0 +1,290 @@
+package main
+
+import (
+	"log"
+	mu "mfw/util"
+	"qfw/util"
+	"qfw/util/mongodb"
+	"regexp"
+	"sync"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+const (
+	ProjectCache = "info" //存放每条项目信息,key为项目ID
+)
+
+var (
+	Sysconfig                map[string]interface{} //读取配置文件
+	MongoTool                mongodb.MongodbSim     //mongodb连接
+	ExtractColl, ProjectColl string                 //抽取表、项目表
+	CurrentMegerTime         int64                  //当前合并到的信息的时间
+	CurrentMegerCount        int                    //当前合并计数
+	MultiThread              = make(chan bool, 5)   //项目合并线程
+	AllIdsMap2               = map[string]*ID{}
+	AllIdsMapLock            = sync.Mutex{}
+)
+
+var (
+	_datereg   = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
+	_numreg1   = regexp.MustCompile("^[0-9-]{1,8}$")
+	_zimureg1  = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
+	_nzreg     = regexp.MustCompile("^[0-9a-zA-Z-]+$")
+	_hanreg    = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
+	replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
+	pStr       = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
+	nreg1      = regexp.MustCompile("[0-9]{2,}")
+	zreg1      = regexp.MustCompile("[a-zA-Z]{1,}")
+	hreg1      = regexp.MustCompile(`[\p{Han}]+`)
+	numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
+
+	//存放项目名称
+	mapPn = map[string]*Key{}
+	//存放项目编号
+	mapPc = map[string]*Key{}
+	//存放采购单位
+	mapPb         = map[string]*Key{}
+	compareNoPass = map[string]bool{}
+	compareAB     = map[string]bool{}
+	compareAB2D   = map[string]bool{}
+	compareABD    = map[string]bool{}
+	compareAB2CD  = map[string]bool{}
+	compareABCD   = map[string]bool{}
+)
+
+func init() {
+	util.ReadConfig(&Sysconfig)
+	MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
+	MongoTool = mongodb.MongodbSim{
+		MongodbAddr: Sysconfig["mongodbServers"].(string),
+		Size:        util.IntAll(Sysconfig["mongodbPoolSize"]),
+		DbName:      Sysconfig["mongodbName"].(string),
+	}
+	MongoTool.InitPool()
+	ExtractColl = Sysconfig["extractColl"].(string)
+	ProjectColl = Sysconfig["projectColl"].(string)
+
+	udpport, _ := Sysconfig["udpport"].(string)
+	udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	log.Println("Udp服务监听", udpport)
+
+	//---不能通过
+	vm := []string{"C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareNoPass[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------")
+
+	//三个元素一致 [AB][AB][AB],分值最高
+	vm = []string{"A", "B"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareAB[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB))
+	//---至少两个一致,其他可能不存在
+	//[AB][AB][ABD]
+	//[AB][ABD][AB]
+	vm = []string{"A", "B"}
+	vm2 := []string{"A", "B", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2D))
+	//---至少一个一致,其他可能不存在
+	//[ABD][ABD][ABD] //已经删除DDD
+	vm = []string{"A", "B", "D"}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
+					compareABD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABD))
+
+	//[AB][ABCD][AB]
+	//[AB][AB][ABCD]
+	vm = []string{"A", "B"}
+	vm2 = []string{"A", "B", "C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2CD))
+	//[ABECD][ABECD][ABECD]  //已经删除[CD][CD][CD]   //这个要重点讨论
+	vm = []string{"A", "B", "C", "D"}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
+					compareABCD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+}
+
+func CheckHanAndNum(str string) (b bool) {
+	return nreg1.MatchString(str) && hreg1.MatchString(str)
+}
+func CheckZimuAndNum(str string) (b bool) {
+	return zreg1.MatchString(str) && nreg1.MatchString(str)
+}
+
+type KeyMap struct {
+	Lock sync.Mutex
+	Map  map[string]*Key
+}
+
+type ID struct {
+	Id       string
+	Lock     sync.Mutex
+	lastTime int64
+	pos      int
+	P        *ProjectInfo
+}
+type Key struct {
+	Arr  []string
+	Lock sync.Mutex
+}
+type IdAndLock struct {
+	Id   string
+	Lock sync.Mutex
+}
+
+func NewKeyMap() *KeyMap {
+	return &KeyMap{
+		Map:  map[string]*Key{},
+		Lock: sync.Mutex{},
+	}
+}
+
+//招标信息实体类
+type Info struct {
+	Id            string                 `json:"_id"`
+	Href          string                 `json:"href"` //源地址
+	Publishtime   int64                  `json:"publishtime"`
+	Title         string                 `json:"title"`
+	TopType       string                 `json:"toptype"`
+	SubType       string                 `json:"subtype"`
+	ProjectName   string                 `json:"projectname"`
+	ProjectCode   string                 `json:"projectcode"`
+	Buyer         string                 `json:"buyer"`
+	Buyerperson   string                 `json:"buyerperson"`
+	Buyertel      string                 `json:"buyertel"`
+	Agency        string                 `json:"agency"`
+	Area          string                 `json:"area"`
+	City          string                 `json:"city"`
+	District      string                 `json:"district"`
+	HasPackage    bool                   `json:"haspackage"`
+	Package       map[string]interface{} `json:"package"`
+	PNum          string                 `json:"pnum"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"`
+	Buyerclass    string                 `json:"buyerclass"`
+	Bidopentime   int64                  `json:"bidopentime"`
+	Budget        float64                `json:"budget"`
+	Bidamount     float64                `json:"bidamount"`
+	Winners       []string
+	dealtype      int
+
+	Winnerorder []string
+
+	PTC    string //从标题中抽的项目编号
+	pnbval int    //项目名称、编号、采购单位存在的个数
+	LenPC  int    //项目编号长度
+	LenPN  int    //项目名称长度
+	LenPTC int    //标题抽的项目编号长度
+}
+
+//项目实体类
+type ProjectInfo struct {
+	Id            bson.ObjectId          `bson:"_id"`
+	FirstTime     int64                  `json:"firsttime"` //项目的最早时间
+	LastTime      int64                  `json:"lasttime"`  //项目的最后时间
+	Ids           []string               `json:"ids"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"` //子行业分类
+	Winners       []string               `json:"winners"`       //中标人
+	ProjectName   string                 `json:"projectname"`   //项目名称
+	ProjectCode   string                 `json:"projectcode"`   //项目代码唯一(纯数字的权重低)
+	Buyer         string                 `json:"buyer"`         //采购单位唯一
+	MPN           []string               `json:"mpn"`           //合并后多余的项目名称
+	MPC           []string               `json:"mpc"`           //合并后多余的项目编号
+	Buyerperson   string                 `json:"buyerperson"`   //采购联系人
+	Buyertel      string                 `json:"buyertel"`      //采购联系人电话
+	Agency        string                 `json:"agency"`        //代理机构
+	Area          string                 `json:"area"`          //地区
+	City          string                 `json:"city"`          //地市
+	District      string                 `json:"district"`      //区县
+	HasPackage    bool                   `json:"haspackage"`    //是否有分包
+	Package       map[string]interface{} `json:"package"`       //分包的对比对象
+	Buyerclass    string                 `json:"buyerclass"`    //采购单位分类
+	Bidopentime   int64                  `json:"bidopentime"`   //开标时间
+	Zbtime        int64                  `json:"zbtime"`        //招标时间
+	Jgtime        int64                  `json:"jgtime"`        //结果中标时间
+	Bidamount     float64                `json:"bidamount"`     //中标金额
+	Budget        float64                `json:"budget"`        //预算
+	Winnerorder   []string               `json:"winnerorder"`   //中标候选人
+	score         int
+	comStr        string
+}

+ 65 - 0
fullproject/src_dev3/load_data.go

@@ -0,0 +1,65 @@
+package main
+
+import (
+	"time"
+	//"encoding/json"
+	"log"
+)
+
+//初始加载数据,默认加载最近6个月的数据
+
+func loadData(projectColl string, month int, bCacheRedis bool) {
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+
+	q := map[string]interface{}{}
+	it := sess.DB(MongoTool.DbName).C(projectColl).Find(&q).Iter()
+	AllIdsMapLock.Lock()
+	tmp := &ProjectInfo{}
+	n := 0
+	for it.Next(tmp) {
+		n++
+		if n%1000 == 0 {
+			log.Println("current", n, "\n", tmp.Id, tmp)
+			time.Sleep(2 * time.Second)
+		}
+		for _, v := range append([]string{tmp.ProjectName}, tmp.MPN...) {
+			if v != "" {
+				k := mapPn[v]
+				if k == nil {
+					k = &Key{Arr: []string{}}
+					mapPn[v] = k
+				}
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		for _, v := range append([]string{tmp.ProjectCode}, tmp.MPC...) {
+			if v != "" {
+				k := mapPc[v]
+				if k == nil {
+					k = &Key{Arr: []string{}}
+					mapPc[v] = k
+				}
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		if tmp.Buyer != "" {
+			k := mapPb[tmp.Buyer]
+			if k == nil {
+				k = &Key{Arr: []string{}}
+				mapPb[tmp.Buyer] = k
+			}
+			k.Arr = append(k.Arr, tmp.Id.Hex())
+		}
+
+		AllIdsMap2[tmp.Id.Hex()] = &ID{Id: tmp.Id.Hex(), lastTime: tmp.LastTime, P: tmp}
+
+		if bCacheRedis {
+			//存入redis
+
+		}
+
+	}
+	AllIdsMapLock.Unlock()
+	log.Println("load over")
+}

+ 61 - 0
fullproject/src_dev3/main.go

@@ -0,0 +1,61 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	mu "mfw/util"
+	"net"
+	"time"
+)
+
+var (
+	udpclient    mu.UdpClient //udp对象
+	SingleThread = make(chan bool, 1)
+)
+
+func main() {
+
+	//udp跑增量  id段   zl
+	//udp跑全量			ql
+	//udp跑历史数据  信息id1,id2/或id段  ls
+	//udp强制合并  信息id1,id2,id3 [项目id] 不存在时新建  qzhb
+	//udp强制拆分  项目id,信息id1,id2          qzcf
+	//udp重新合并  信息id1,id2,id3             cxhb
+	//loadData("project_0809", 1, true)
+	time.Sleep(99999 * time.Hour)
+}
+
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA: //上个节点的数据
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		log.Println("err:", err, "mapInfo:", mapInfo)
+		if err != nil {
+			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+		} else if mapInfo != nil {
+			key, _ := mapInfo["key"].(string)
+			if key == "" {
+				key = "udpok"
+			}
+			go udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+			SingleThread <- true
+			tasktype, _ := mapInfo["stype"].(string)
+			log.Println("tasktype:", tasktype)
+			switch tasktype {
+			case "ql":
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					taskQl(mapInfo)
+				}()
+			}
+		}
+	case mu.OP_NOOP: //下个节点回应
+		ok := string(data)
+		if ok != "" {
+			log.Println("ok:", ok)
+		}
+	}
+}

+ 486 - 0
fullproject/src_dev3/merge.go

@@ -0,0 +1,486 @@
+package main
+
+import (
+	"math"
+	qu "qfw/util"
+	"sort"
+	//"strconv"
+	"strings"
+	"sync"
+)
+
+//单线程控制查找
+var findLock = sync.Mutex{}
+
+//从对应map中获取对比的项目id
+func getCompareIds(pn, pc, ptc, pb string) (bpn, bpc, bptc, bpb int, res []*Key, idArr []string, IDArr []*ID) {
+	findLock.Lock()
+	defer findLock.Unlock()
+	wg.Add(1)
+	res = []*Key{}
+	//是否查找到,并标识位置。-1代表未查找到。
+	bpn, bpc, bptc, bpb = -1, -1, -1, -1
+	if pn != "" {
+		ids := mapPn[pn]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPn[pn] = ids
+			bpn = 0
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pc != "" {
+		ids := mapPc[pc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[pc] = ids
+			bpc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if ptc != "" {
+		ids := mapPc[ptc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[ptc] = ids
+			bptc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pb != "" {
+		ids := mapPb[pb]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPb[pb] = ids
+			bpb = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+	repeatId := map[string]bool{}
+	idArr = []string{} //项目id
+	IDArr = []*ID{}    //项目信息
+	for _, m := range res {
+		for _, id := range m.Arr {
+			if !repeatId[id] {
+				repeatId[id] = true
+				//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+				AllIdsMapLock.Lock()
+				Id := AllIdsMap2[id]
+				AllIdsMapLock.Unlock()
+				if Id != nil {
+					Id.Lock.Lock()
+					idArr = append(idArr, id)
+					IDArr = append(IDArr, Id)
+				}
+			}
+		}
+	}
+	return
+}
+
+//项目合并入口
+func startProjectMerge(info *Info, tmp map[string]interface{}) {
+	//只有或没有采购单位的无法合并
+	//bpn, bpc, bptc, bpb 是否查找到,并标识位置。-1代表未查找到。
+	//pids 是项目id数组集合
+	//IDArr,是单个项目ID对象集合
+	defer wg.Done()
+	bpn, bpc, bptc, bpb, pids, _, IDArr := getCompareIds(info.ProjectName, info.ProjectCode, info.PTC, info.Buyer)
+	//map--k为pn,ptn,pc,ptc,buyer值 v为Id数组和lock
+
+	for _, m := range pids {
+		defer m.Lock.Unlock()
+	}
+	for _, id := range IDArr {
+		defer id.Lock.Unlock()
+	}
+
+	bFindProject := false
+	findPid := ""
+	//获取完id,进行计算
+	//定义两组
+	comRes1 := []*ProjectInfo{} //优先级最高的对比结果数组
+	comRes2 := []*ProjectInfo{} //优化级其次
+	comRes3 := []*ProjectInfo{}
+
+	for _, v := range IDArr {
+		comStr := ""
+		compareProject := v.P
+		//问题出地LastTime!!!!!
+		diffTime := math.Abs(float64(info.Publishtime - compareProject.LastTime))
+		if diffTime < 360*86400 {
+			//"A 相等 	B 被包含 	C 不相等	 	D不存在  E被包含
+			compareStr, score := comparePNCB(info, compareProject)
+
+			resVal := Select(compareStr, info, compareProject)
+			//---------------------------------------
+
+			if resVal > 0 {
+
+				compareBuyer := "D"
+				if info.Buyer != "" {
+					if info.Buyer == compareProject.Buyer {
+						compareBuyer = "A"
+						score += 3
+					} else if compareProject.Buyer != "" {
+						if strings.Contains(info.Buyer, compareProject.Buyer) || strings.Contains(compareProject.Buyer, info.Buyer) {
+							compareBuyer = "B"
+							score += 1
+						} else {
+							compareBuyer = "C"
+						}
+					}
+				}
+
+				//---------------------------------------
+
+				compareCity := ""
+				if info.Area != "全国" && info.Area != "" && info.Area == compareProject.Area {
+					compareCity += "A"
+					score += 2
+				} else if info.Area == "全国" || compareProject.Area == "全国" {
+					compareCity += "B"
+					score += 1
+				} else {
+					compareCity += "C"
+				}
+				if compareCity != "C" {
+					if info.City != "" && info.City == compareProject.City {
+						compareCity += "A"
+						score += 2
+					} else {
+						if info.Area == "全国" || compareProject.Area == "全国" {
+							compareCity += "B"
+						} else if info.City == compareCity {
+							compareCity += "B"
+						} else {
+							compareCity += "C"
+						}
+					}
+				} else {
+					compareCity += "C"
+				}
+				score2 := 0
+				if compareCity == "AA" {
+					if info.District != "" && info.District == compareProject.District {
+						score2 = 1
+					}
+				}
+
+				compareTime := "D"
+				//diffTime := math.Abs(float64(info.Publishtime - compareProject.Publistime[len(compareProject.Publistime)-1]))
+				if diffTime < 45*86400 {
+					compareTime = "A"
+					score += 2
+				} else if diffTime < 90*86400 {
+					compareTime = "B"
+					score += 1
+				}
+
+				compareAgency := "D"
+				if info.Agency != "" {
+					if info.Agency == compareProject.Agency {
+						compareAgency = "A"
+						score += 2
+						score2 += 1
+					} else if compareProject.Agency != "" {
+						if strings.Contains(info.Agency, compareProject.Agency) || strings.Contains(compareProject.Agency, info.Agency) {
+							compareAgency = "B"
+							score += 1
+							score2 += 1
+						} else {
+							compareAgency = "C"
+						}
+					}
+				}
+				compareBudget := "C"
+				if info.Budget > 0 && (info.Budget == compareProject.Budget || (compareProject.Bidamount > 0 && info.Budget > compareProject.Bidamount && (info.Budget-compareProject.Bidamount) < (0.1*info.Budget))) {
+					compareBudget = "A"
+					score += 1
+					score2 += 1
+				}
+				compareBidmount := "C"
+				if info.Bidamount > 0 && (info.Bidamount == compareProject.Bidamount || (compareProject.Budget > 0 && compareProject.Budget > info.Bidamount && (compareProject.Budget-info.Bidamount) < 0.1*compareProject.Budget)) {
+					compareBidmount = "A"
+					score += 1
+					score2 += 1
+				}
+
+				//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+
+				comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency + compareBudget + compareBidmount
+				compareProject.comStr = comStr
+				eqV := 0
+				switch resVal {
+				case 3:
+					if compareBuyer < "C" {
+						eqV = 1
+					} else if compareBuyer == "D" {
+						if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && score2 > 0 && compareTime == "A" {
+							eqV = 3
+						} else if compareTime != "D" && compareAgency != "C" && score2 > 1 {
+							eqV = 3
+						}
+					} else {
+						if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 1 {
+							eqV = 3
+						}
+					}
+
+				case 2:
+					if compareBuyer < "C" {
+						if compareTime != "D" && "AA" == compareCity && compareAgency != "C" {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+					} else if compareBuyer == "D" {
+						if "AA" == compareCity && compareTime != "D" && score2 > 1 {
+							eqV = 3
+						} else if score2 > 2 && compareTime == "A" {
+							eqV = 3
+						}
+					} else {
+						if "AA" == compareCity && (compareAgency == "A" || score2 > 1) && compareTime == "A" {
+							eqV = 3
+						}
+					}
+				case 1:
+					if compareBuyer < "C" {
+						if compareTime != "D" && "AA" == compareCity && score2 > 0 && compareAgency != "C" {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 1 {
+							eqV = 3
+						}
+					} else if compareBuyer == "D" {
+						if "AA" == compareCity && compareTime != "D" && score2 > 1 && compareAgency != "C" {
+							eqV = 3
+						} else if compareCity[1:1] != "C" && score2 > 1 && compareTime == "A" && compareAgency != "C" {
+							eqV = 3
+						}
+					} else {
+						if "AA" == compareCity && score2 > 2 && compareTime == "A" {
+							eqV = 3
+						}
+					}
+				}
+				if eqV == 1 {
+					comRes1 = append(comRes1, compareProject)
+				} else if eqV == 2 {
+					comRes2 = append(comRes2, compareProject)
+				} else if eqV == 3 {
+					comRes3 = append(comRes3, compareProject)
+				} else {
+					//log.Println("+++++++++++", resVal, comStr, info, compareProject)
+				}
+
+				//				compareBuyer := "D"
+				//				if info.Buyer != "" {
+				//					if info.Buyer == compareProject.Buyer {
+				//						compareBuyer += "A"
+				//					} else if compareProject.Buyer != "" {
+				//						if strings.Contains(info.Buyer, compareProject.Buyer) || strings.Contains(compareProject.Buyer, info.Buyer) {
+				//							compareBuyer += "B"
+				//						} else {
+				//							compareBuyer += "C"
+				//						}
+				//					}
+				//				}
+
+				//				//---------------------------------------
+				//				compareCity := ""
+				//				if info.Area != "全国" && info.Area != "" && info.Area == compareProject.Area {
+				//					compareCity += "A"
+				//					score += 2
+				//				} else if info.Area == "全国" || compareProject.Area == "全国" {
+				//					compareCity += "B"
+				//					score += 1
+				//				} else {
+				//					compareCity += "C"
+				//				}
+				//				if compareCity != "C" {
+				//					if info.City != "" && info.City == compareProject.City {
+				//						compareCity += "A"
+				//						score += 2
+				//					} else {
+				//						if info.Area == "全国" || compareProject.Area == "全国" {
+				//							compareCity += "B"
+				//						} else if info.City == compareCity {
+				//							compareCity += "B"
+				//						} else {
+				//							compareCity += "C"
+				//						}
+				//					}
+				//				} else {
+				//					compareCity += "C"
+				//				}
+
+				//				compareTime := "D"
+				//				//diffTime := math.Abs(float64(info.Publishtime - compareProject.Publistime[len(compareProject.Publistime)-1]))
+				//				if diffTime < 45*86400 {
+				//					compareTime += "A"
+				//					score += 2
+				//				} else if diffTime < 90*86400 {
+				//					compareTime += "B"
+				//					score += 1
+				//				}
+				//				compareAgency := "D"
+				//				if info.Agency != "" {
+				//					if info.Agency == compareProject.Agency {
+				//						compareAgency += "A"
+				//						score += 2
+				//					} else if compareProject.Agency != "" {
+				//						if strings.Contains(info.Agency, compareProject.Agency) || strings.Contains(compareProject.Agency, info.Agency) {
+				//							compareAgency += "B"
+				//							score += 1
+				//						} else {
+				//							compareAgency += "C"
+				//						}
+				//					}
+				//				}
+				//				compareBudget := "C"
+				//				if info.budget > 0 && float64(info.budget) == compareProject.Budget {
+				//					compareBudget = "A"
+				//				}
+				//				compareBidmount := "C"
+				//				if info.bidamount > 0 && float64(info.bidamount) == compareProject.Bidamount {
+				//					compareBidmount = "A"
+				//				}
+
+				//				//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+
+				//				comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency + compareBudget + compareBidmount
+				//				compareProject.comStr = comStr
+				//				switch resVal {
+				//				case 3:
+				//					if compareBuyer == "A" || compareBuyer == "B" {
+				//						comRes1 = append(comRes1, compareProject)
+				//					} else if compareBuyer == "D" {
+				//						if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+				//							comRes1 = append(comRes1, compareProject)
+				//						} else if compareCity[1:1] != "C" {
+				//							comRes2 = append(comRes2, compareProject)
+				//						}
+				//					} else {
+				//						if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+				//							comRes2 = append(comRes2, compareProject)
+				//						} else if compareCity[1:1] != "C" && compareAgency != "C" && compareTime != "D" {
+				//							comRes3 = append(comRes3, compareProject)
+				//						} else if compareCity[1:1] != "C" && (compareBudget == "A" || compareBidmount == "A") {
+				//							comRes3 = append(comRes3, compareProject)
+				//						}
+				//					}
+				//				case 1, 2:
+				//					if compareBuyer == "A" && compareTime != "D" {
+				//						comRes2 = append(comRes2, compareProject)
+				//					} else if compareBuyer == "B" && compareTime != "D" && compareAgency != "C" {
+				//						comRes3 = append(comRes3, compareProject)
+				//					} else if compareBuyer == "D" {
+				//						if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+				//							comRes2 = append(comRes2, compareProject)
+				//						} else {
+				//							comRes3 = append(comRes3, compareProject)
+				//						}
+				//					} else {
+				//						if "AA" == compareCity && compareAgency != "C" && compareTime != "D" {
+				//							comRes3 = append(comRes3, compareProject)
+				//						} else if "AA" == compareCity && (compareBudget == "A" || compareBidmount == "A") {
+				//							comRes3 = append(comRes3, compareProject)
+				//						}
+				//					}
+				//				}
+			}
+		}
+	}
+	//--------------------------------对比完成-----------------------
+	//更新数组、更新项目
+	for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
+		if len(resN) > 0 {
+			if len(resN) > 1 {
+				sort.Slice(resN, func(i, j int) bool {
+					return resN[i].score > resN[j].score
+				})
+			}
+
+			bFindProject = true
+			findPid = resN[0].Id.Hex()
+			for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+				if bv > -1 {
+					pids[bv].Arr = append(pids[bv].Arr, findPid)
+					if k2 == 0 {
+						if resN[0].ProjectName == "" {
+							resN[0].ProjectName = info.ProjectName
+						} else {
+							if resN[0].MPN == nil {
+								resN[0].MPN = []string{info.ProjectName}
+							} else {
+								resN[0].MPN = append(resN[0].MPN, info.ProjectName)
+							}
+						}
+
+					} else if k2 < 3 {
+						if resN[0].ProjectCode == "" {
+							resN[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
+						} else {
+							if resN[0].MPC == nil {
+								resN[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+							} else {
+								resN[0].MPC = append(resN[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+							}
+						}
+
+					} else {
+						if resN[0].Buyer == "" {
+							resN[0].Buyer = info.Buyer
+						}
+					}
+				}
+			}
+			UpdateProject(tmp, info, resN[0], kv+1, resN[0].comStr)
+			//更新AllIdsMao中的时间
+			break
+		}
+	}
+
+	if !bFindProject {
+		//没有找到
+		id, p := NewProject(tmp, info)
+		//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+		AllIdsMapLock.Lock()
+		//AllIdsMap[int(t)%idsMapSize][id] = &ID{Id: id, lastTime: info.Publishtime}
+		AllIdsMap2[id] = &ID{Id: id, lastTime: info.Publishtime, P: p}
+		AllIdsMapLock.Unlock()
+		for _, m := range pids {
+			m.Arr = append(m.Arr, id)
+		}
+	}
+
+}
+
+//二分字符串查找
+func BinarySearch(s []string, k string) int {
+	sort.Strings(s)
+	lo, hi := 0, len(s)-1
+	for lo <= hi {
+		m := (lo + hi) >> 1
+		if s[m] < k {
+			lo = m + 1
+		} else if s[m] > k {
+			hi = m - 1
+		} else {
+			return m
+		}
+	}
+	return -1
+}

+ 160 - 0
fullproject/src_dev3/merge_comparepncb.go

@@ -0,0 +1,160 @@
+package main
+
+import (
+	"strings"
+)
+
+func comparePNCB(info *Info, compareProject *ProjectInfo) (compareStr string, score int) {
+	if info.ProjectName != "" {
+		pns := []string{}
+		if compareProject.ProjectName != "" {
+			pns = append(pns, compareProject.ProjectName)
+		}
+		if len(compareProject.MPN) > 0 {
+			pns = append(pns, compareProject.MPN...)
+		}
+		ifind := 0
+		for _, v := range pns {
+			if info.ProjectName == v {
+				ifind = 1
+				break
+			} else {
+				//if strings.Contains(info.ProjectName, v) || strings.Contains(v, info.ProjectName) ||
+				retv := CheckContain(info.ProjectName, v, info.Buyer)
+				if retv == 1 {
+					ifind = 1
+					break
+				} else if retv == 2 {
+					ifind = 2
+				} else if ifind == 0 {
+					ifind = 3
+				}
+			}
+		}
+		switch ifind {
+		case 0:
+			compareStr = "D"
+		case 1:
+			compareStr = "A"
+			score += 4
+			if len([]rune(info.ProjectName)) > 18 {
+				score += 2
+			}
+		case 2:
+			compareStr = "B"
+			score += 2
+			if len([]rune(info.ProjectName)) > 18 {
+				score += 1
+			}
+		case 3:
+			compareStr = "C"
+		}
+	} else {
+		compareStr = "D"
+	}
+
+	/*
+			项目编号 - -()() 要注意
+			init_text = ["号","(重)","(第二次)","(重)"]
+	all_clean_mark = ["[","(","【","(","〖","]",")","】",")","〗","-","〔","〕","《","[","]","{","}","{","—"," ","-","﹝","﹞","–"]
+	*/
+	for _, pc := range []string{info.ProjectCode, info.PTC} {
+		if pc != "" {
+			pcs := []string{}
+			if compareProject.ProjectCode != "" {
+				pcs = append(pcs, compareProject.ProjectCode)
+			}
+			if len(compareProject.MPC) > 0 {
+				pcs = append(pcs, compareProject.MPC...)
+			}
+			ifind := 0
+			for _, v := range pcs {
+				if pc == v {
+					ifind = 1
+					break
+				} else {
+					// math.Abs(float64(len([]rune(pc))-len([]rune(v)))) < 6
+					//if !_numreg1.MatchString(pc) && !_zimureg1.MatchString(pc) && !_numreg1.MatchString(v) && !_zimureg1.MatchString(v)
+					if strings.Contains(pc, v) || strings.Contains(v, pc) {
+						t1 := pc
+						t2 := v
+						if len(v) > len(pc) {
+							t1 = v
+							t2 = pc
+						}
+						t3 := strings.Replace(t1, t2, "", -1)
+						t3 = _datereg.ReplaceAllString(t3, "")
+						if t3 == "" {
+							ifind = 1
+							break
+						} else {
+							ifind = 2
+						}
+					} else if ifind == 0 {
+						ifind = 3
+					}
+				}
+			}
+			switch ifind {
+			case 0:
+				compareStr += "D"
+			case 1:
+				compareStr += "A"
+				score += 4
+				if len([]rune(pc)) > 18 {
+					score += 2
+				}
+			case 2:
+				compareStr += "B"
+				score += 2
+				if len([]rune(pc)) > 18 {
+					score += 1
+				}
+			case 3:
+				compareStr += "C"
+			}
+
+		} else {
+			compareStr += "D"
+		}
+	}
+	return
+}
+
+func CheckContain(b1, b2, infoBuyer string) (res int) {
+	b1 = replaceStr.ReplaceAllString(b1, "")
+	b2 = replaceStr.ReplaceAllString(b2, "")
+	b1 = strings.Replace(b1, infoBuyer, "", 1)
+	b2 = strings.Replace(b2, infoBuyer, "", 1)
+
+	if b1 == b2 {
+		res = 1 //相等
+		return
+	}
+	bs1 := []rune(b1)
+	bs2 := []rune(b2)
+	tmp := ""
+	for i := 0; i < len(bs1); i++ {
+		for j := 0; j < len(bs2); j++ {
+			if bs1[i] == bs2[j] {
+				tmp += string(bs1[i])
+			} else if tmp != "" {
+				b1 = strings.Replace(b1, tmp, "", -1)
+				b2 = strings.Replace(b2, tmp, "", -1)
+				tmp = ""
+			}
+		}
+	}
+	if tmp != "" {
+		b1 = strings.Replace(b1, tmp, "", -1)
+		b2 = strings.Replace(b2, tmp, "", -1)
+	}
+	if b1 == b2 {
+		res = 1 //相等
+	} else if b1 == "" || b2 == "" {
+		res = 2 //包含
+	} else {
+		res = 3 //不相等
+	}
+	return
+}

+ 276 - 0
fullproject/src_dev3/merge_select.go

@@ -0,0 +1,276 @@
+package main
+
+func Select(compareStr string, info *Info, compareInfo *ProjectInfo) (res int) {
+	//没有可对比的项目名称、或项目编号
+	if compareNoPass[compareStr] {
+
+	} else {
+		switch compareStr {
+		case "AAA":
+			res = 3
+		case "AAB":
+			res = 3
+		case "ABA":
+			res = 3
+		case "ABB":
+			if info.LenPTC > 6 || info.LenPC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAA":
+			if info.LenPN > 10 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BAB":
+			if info.LenPN > 10 || info.LenPTC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBA":
+			if info.LenPN > 10 || info.LenPC > 6 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "BBB":
+			v := 0
+			if info.LenPN > 12 {
+				v++
+			}
+			if info.LenPC > 8 {
+				v++
+			}
+			if info.LenPTC > 8 {
+				v++
+			}
+			if v > 1 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "AAD":
+			res = 3
+		case "ABD":
+			//			if info.LenPC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BAD":
+			//			if info.LenPC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BBD":
+			if info.LenPC > 12 && info.LenPN > 16 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ADA":
+			res = 3
+		case "ADB":
+			//			if info.LenPTC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BDA":
+			//			if info.LenPTC > 12 || info.LenPN > 16 {
+			//				res = 3
+			//			} else {
+			//				res = 2
+			//			}
+			res = 2
+		case "BDB":
+			if info.LenPTC > 12 && info.LenPN > 16 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ADD":
+			if info.LenPN > 14 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BDD":
+			//			if info.LenPN > 18 {
+			//				res = 2
+			//			} else {
+			//				res = 1
+			//			}
+			res = 1
+		case "DAA":
+			if info.LenPTC > 12 || info.LenPC > 12 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "DAB":
+			if info.LenPTC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DAD":
+			if info.LenPTC > 14 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBA":
+			if info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBB":
+			if info.LenPTC > 12 && info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DBD":
+			if info.LenPC > 8 {
+				res = 1
+			}
+		case "DDA":
+			if info.LenPTC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "DDB":
+			if info.LenPC > 8 {
+				res = 1
+			}
+		case "ACA":
+			if info.LenPN > 12 || info.LenPTC > 9 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "ACB":
+			if info.LenPN > 14 && info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BCA":
+			if info.LenPN > 12 && info.LenPTC > 9 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BCB":
+			if info.LenPN > 12 && info.LenPTC > 9 {
+				res = 2
+			} else if info.LenPN > 16 || info.LenPTC > 12 {
+				res = 1
+			}
+		case "AAC":
+			if info.LenPN > 12 || info.LenPC > 9 {
+				res = 3
+			} else {
+				res = 2
+			}
+		case "ABC":
+			if info.LenPN > 14 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BAC":
+			if info.LenPN > 14 || info.LenPC > 12 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "BBC":
+			//			if info.LenPN > 12 && info.LenPTC > 9 {
+			//				res = 1
+			//			} else {
+			//				res = 1
+			//			}
+			res = 1
+		case "ACC":
+			//			if info.LenPC > 4 && len(compareInfo.ProjectCode) > 4 && CheckHanAndNum(info.ProjectCode) && CheckHanAndNum(compareInfo.ProjectCode) {
+			//				//未考虑MPC
+			//			} else if info.LenPN > 16 {
+			//				res = 1
+			//			}
+			//
+			res = 1
+		case "ACD":
+			//项目编号不一致
+			res = 1
+		case "ADC":
+			res = 1
+		case "BCC":
+			//项目编号不一致
+			res = 1
+		case "BCD":
+			//项目编号不一致
+			if info.LenPC > 4 && len(compareInfo.ProjectCode) > 4 && (!_numreg1.MatchString(info.ProjectCode) && !_numreg1.MatchString(compareInfo.ProjectCode)) {
+				//未考虑MPC
+			} else if info.LenPN > 18 && info.LenPC-len([]rune(compareInfo.ProjectCode)) != 0 {
+				res = 1
+			}
+		case "BDC":
+			if info.LenPN > 18 && info.LenPTC-len([]rune(compareInfo.ProjectCode)) != 0 {
+				res = 1
+			}
+		case "CAA":
+			if info.LenPC > 10 || info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "CAB":
+			if info.LenPC > 10 && info.LenPTC > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+		case "CAC":
+			res = 1
+		case "CAD":
+			res = 1
+		case "CBA":
+			res = 1
+		case "CBB":
+			res = 1
+		case "CBC":
+			res = 1
+		case "CBD":
+			res = 1
+		case "CCA":
+			res = 1
+		case "CCB":
+			//
+		case "CDA":
+			res = 1
+		case "CDB":
+			res = 1
+		case "DAC":
+			res = 1
+		case "DBC":
+			res = 1
+		case "DCA":
+			res = 1
+		case "DCB":
+			res = 1
+		}
+
+	}
+	return
+}

+ 181 - 0
fullproject/src_dev3/new_project.go

@@ -0,0 +1,181 @@
+package main
+
+import (
+	qu "qfw/util"
+	"strings"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+var FIELDS = []string{"area", "city", "district", "projectname", "projectcode", "buyer", "winner", "budget", "bidamount", "bidstatus", "agency", "projectscope"}
+
+func NewProject(tmp map[string]interface{}, thisinfo *Info) (string, *ProjectInfo) {
+	set := map[string]interface{}{}
+	for _, f := range FIELDS {
+		if tmp[f] != nil {
+			set[f] = tmp[f]
+		}
+	}
+	set["s_projectname"] = thisinfo.ProjectName
+	set["createtime"] = time.Now().Unix()
+	set["sourceinfoid"] = qu.BsonIdToSId(tmp["_id"])
+	set["sourceinfourl"] = tmp["href"]
+	set["topscopeclass"] = thisinfo.Topscopeclass
+	set["subscopeclass"] = thisinfo.Subscopeclass
+	if thisinfo.Buyerperson != "" {
+		set["buyerperson"] = thisinfo.Buyerperson
+	}
+	if thisinfo.Buyertel != "" {
+		set["buyertel"] = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		set["buyerclass"] = thisinfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > 0 {
+		set["bidopentime"] = thisinfo.Bidopentime
+	}
+	if len(thisinfo.Winnerorder) > 0 {
+		set["winnerorder"] = thisinfo.Winnerorder
+	}
+	s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
+	set["s_subscopeclass"] = s_subscopeclass
+	s_winner := strings.Join(thisinfo.Winners, ",")
+	set["s_winner"] = s_winner
+	if tmp["package"] != nil {
+		set["package"] = tmp["package"] //没定义优先级
+	}
+	push := NewPushInfo(tmp)
+	set["list"] = []bson.M{
+		push,
+	}
+	pId := bson.NewObjectId()
+	set["_id"] = pId
+	///id := MongoTool.Save(ProjectColl, set)
+	p1 := NewPinfo(pId, thisinfo, set)
+	return pId.Hex(), &p1
+}
+
+func NewPushInfo(tmp map[string]interface{}) bson.M {
+	res := bson.M{
+		"comeintime":  tmp["comeintime"],
+		"publishtime": tmp["publishtime"],
+		"title":       tmp["title"],
+		"toptype":     tmp["toptype"],
+		"subtype":     tmp["subtype"],
+		"infoformat":  tmp["infoformat"],
+		"infoid":      qu.BsonIdToSId(tmp["_id"]),
+		"area":        tmp["area"],
+		"city":        tmp["city"],
+		"projectname": tmp["projectname"],
+		"projectcode": tmp["projectcode"],
+		"buyer":       tmp["buyer"],
+		"href":        tmp["href"],
+	}
+	for _, k := range []string{"winner", "budget", "bidamount"} {
+		if tmp[k] != nil {
+			res[k] = tmp[k]
+		}
+	}
+	return res
+}
+
+//生成存放在redis数组中的对象
+func NewPinfo(id bson.ObjectId, thisinfo *Info, set map[string]interface{}) ProjectInfo {
+	p1 := ProjectInfo{
+		Id:            id,
+		Ids:           []string{thisinfo.Id},
+		Topscopeclass: thisinfo.Topscopeclass,
+		Subscopeclass: thisinfo.Subscopeclass,
+		Winners:       thisinfo.Winners,
+		ProjectName:   thisinfo.ProjectName,
+		ProjectCode:   thisinfo.ProjectCode,
+		Buyer:         thisinfo.Buyer,
+		Agency:        thisinfo.Agency,
+		Area:          thisinfo.Area,
+		City:          thisinfo.City,
+		District:      thisinfo.District,
+		MPN:           []string{},
+		MPC:           []string{},
+		HasPackage:    thisinfo.HasPackage,
+		Package:       map[string]interface{}{},
+		Buyerclass:    thisinfo.Buyerclass,
+		Bidopentime:   thisinfo.Bidopentime,
+		Winnerorder:   thisinfo.Winnerorder,
+		FirstTime:     thisinfo.Publishtime,
+		LastTime:      thisinfo.Publishtime,
+		Budget:        thisinfo.Budget,
+		Bidamount:     thisinfo.Bidamount,
+	}
+	if thisinfo.LenPTC > 5 {
+		p1.MPC = append(p1.MPC, thisinfo.PTC)
+	}
+	//savePool <- set
+	MongoTool.SaveByOriID(ProjectColl, set)
+
+	return p1
+}
+
+var updatePool = make(chan []map[string]interface{}, 30)
+
+//var savePool = make(chan map[string]interface{}, 6)
+//func SaveQueue() {
+//	arr := []map[string]interface{}{}
+//	sp := make(chan bool, 3)
+//	for {
+//		select {
+//		case <-beforUpdate:
+//			if len(arr) > 0 {
+//				MongoTool.SaveBulk(ProjectColl, arr...)
+//				arr = []map[string]interface{}{}
+//			}
+//		case v := <-savePool:
+//			arr = append(arr, v)
+//			if len(arr) > 50 {
+//				sp <- true
+//				go func(arr []map[string]interface{}) {
+//					MongoTool.SaveBulk(ProjectColl, arr...)
+//					<-sp
+//				}(arr)
+//				arr = []map[string]interface{}{}
+//			}
+//		case <-time.After(80 * time.Millisecond):
+//			if len(arr) > 0 {
+//				sp <- true
+//				go func(arr []map[string]interface{}) {
+//					MongoTool.SaveBulk(ProjectColl, arr...)
+//					<-sp
+//				}(arr)
+//				arr = []map[string]interface{}{}
+//			}
+//		}
+//	}
+//}
+
+func updateQueue() {
+	arr := [][]map[string]interface{}{}
+	sp := make(chan bool, 3)
+	for {
+		select {
+		case v := <-updatePool:
+			arr = append(arr, v)
+			if len(arr) > 200 {
+				sp <- true
+				go func(arr [][]map[string]interface{}) {
+					MongoTool.UpdateBulk(ProjectColl, arr...)
+					<-sp
+				}(arr)
+				arr = [][]map[string]interface{}{}
+			}
+		case <-time.After(500 * time.Millisecond):
+			if len(arr) > 0 {
+				sp <- true
+				go func(arr [][]map[string]interface{}) {
+					MongoTool.UpdateBulk(ProjectColl, arr...)
+					<-sp
+				}(arr)
+				arr = [][]map[string]interface{}{}
+			}
+		}
+	}
+}

+ 207 - 0
fullproject/src_dev3/task.go

@@ -0,0 +1,207 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"qfw/util"
+	"regexp"
+	"sync"
+	//"strings"
+	"time"
+)
+
+const (
+	InitMinTime = int64(1325347200) //最小时间位置2012
+)
+
+//全量合并
+func taskQl(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	//1、检查pubilshtime索引
+	db, _ := udpInfo["db"].(string)
+	if db == "" {
+		db = MongoTool.DbName
+	}
+	coll, _ := udpInfo["coll"].(string)
+	if coll == "" {
+		coll = ExtractColl
+	}
+	sess := MongoTool.GetMgoConn()
+	bcon := false
+	if sess.DB(db).C(coll).EnsureIndexKey("publishtime_1", "publishtime_-1") == nil {
+		bcon = true
+	} else {
+		log.Println("publishtime_1索引不存在")
+	}
+	MongoTool.DestoryMongoConn(sess)
+	thread := util.IntAllDef(udpInfo["thread"], 1)
+	if bcon {
+		//go SaveQueue()
+		go updateQueue()
+		go clearMem()
+		//获取起始时间
+		startTime, END := int64(0), int64(0)
+
+		sts, bres := MongoTool.Find(ExtractColl, `{}`, "publishtime", `{"publishtime":1}`, true, 0, 1)
+		if bres && sts != nil && len(*sts) == 1 {
+			startTime = util.Int64All((*sts)[0]["publishtime"])
+			sts, bres = MongoTool.Find(ExtractColl, `{}`, "-publishtime", `{"publishtime":1}`, true, 0, 1)
+			if bres && sts != nil && len(*sts) == 1 {
+				END = util.Int64All((*sts)[0]["publishtime"])
+			}
+			log.Println("查询到的起始时间", startTime, END)
+		} else {
+			return
+		}
+		startTime -= 1
+		sum := 0
+		if startTime < InitMinTime {
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": InitMinTime,
+				},
+			}
+			sum = Mql(q, thread, db, coll, sum)
+			startTime = InitMinTime
+		}
+		for {
+			if startTime >= END {
+				break
+			}
+			et := startTime + 50*86400
+			if et >= END {
+				et = END
+			}
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": et,
+				},
+			}
+			sum = Mql(q, thread, db, coll, sum)
+			startTime = et
+			time.Sleep(1 * time.Second)
+		}
+	}
+	log.Println("task over!!!")
+}
+
+var wg = sync.WaitGroup{}
+
+func Mql(q map[string]interface{}, thread int, db, coll string, sum int) int {
+	defer util.Catch()
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	query := sess.DB(db).C(coll).Find(q).Sort("publishtime").Iter()
+	pool := make(chan bool, thread)
+	count := 0
+	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
+		info := ParseInfo(tmp)
+		if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
+			pool <- true
+			go func(info *Info, tmp map[string]interface{}) {
+				defer func() {
+					currentTime = info.Publishtime
+					<-pool
+				}()
+				startProjectMerge(info, tmp)
+			}(info, tmp)
+		} else {
+			//log.Println("info err:", tmp["_id"], tmp["title"], tmp["buyer"])
+		}
+		if sum%1000 == 0 {
+			log.Println("current", sum)
+		}
+		sum++
+		tmp = make(map[string]interface{})
+	}
+	//阻塞
+	for n := 0; n < thread; n++ {
+		pool <- true
+	}
+	//完成
+	log.Println("sontask over:", count, sum, q)
+	return sum
+}
+
+var (
+	titleGetPc  = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
+	titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
+	titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
+	pcReplace   = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$")
+)
+
+func ParseInfo(tmp map[string]interface{}) (info *Info) {
+	bys, _ := json.Marshal(tmp)
+	var thisinfo *Info
+	json.Unmarshal(bys, &thisinfo)
+	if thisinfo == nil {
+		return nil
+	}
+	if len(thisinfo.Topscopeclass) == 0 {
+		thisinfo.Topscopeclass = []string{}
+	}
+	if len(thisinfo.Subscopeclass) == 0 {
+		thisinfo.Subscopeclass = []string{}
+	}
+
+	res := titleGetPc.FindStringSubmatch(thisinfo.Title)
+	if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+		thisinfo.PTC = res[1]
+		thisinfo.pnbval++
+	} else {
+		res = titleGetPc1.FindStringSubmatch(thisinfo.Title)
+		if len(res) > 3 && len(res[3]) > 6 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) {
+			thisinfo.PTC = res[3]
+			thisinfo.pnbval++
+		} else {
+			res = titleGetPc2.FindStringSubmatch(thisinfo.Title)
+			if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+				thisinfo.PTC = res[1]
+				thisinfo.pnbval++
+			}
+		}
+	}
+
+	if thisinfo.ProjectName != "" && len([]rune(thisinfo.ProjectName)) > 0 {
+		//		thisinfo.ProjectName = strings.Replace(thisinfo.ProjectName, "(", "(", -1)
+		//		thisinfo.ProjectName = strings.Replace(thisinfo.ProjectName, ")", ")", -1)
+		//		thisinfo.ProjectName = strings.Replace(thisinfo.ProjectName, "-", "", -1)
+		thisinfo.ProjectName = pcReplace.ReplaceAllString(thisinfo.ProjectName, "")
+		if thisinfo.ProjectName != "" {
+			thisinfo.pnbval++
+		}
+	}
+
+	if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+		if thisinfo.ProjectCode != "" {
+			thisinfo.ProjectCode = pcReplace.ReplaceAllString(thisinfo.ProjectCode, "")
+			//			thisinfo.ProjectCode = strings.Replace(thisinfo.ProjectCode, "(", "(", -1)
+			//			thisinfo.ProjectCode = strings.Replace(thisinfo.ProjectCode, ")", ")", -1)
+			//			thisinfo.ProjectCode = strings.Replace(thisinfo.ProjectCode, "-", "", -1)
+		} else {
+			thisinfo.PTC = pcReplace.ReplaceAllString(thisinfo.PTC, "")
+			//			thisinfo.PTC = strings.Replace(thisinfo.PTC, "(", "(", -1)
+			//			thisinfo.PTC = strings.Replace(thisinfo.PTC, ")", ")", -1)
+			//			thisinfo.PTC = strings.Replace(thisinfo.PTC, "-", "", -1)
+		}
+		if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+			thisinfo.pnbval++
+		}
+	}
+	if thisinfo.ProjectCode == thisinfo.PTC {
+		thisinfo.PTC = ""
+	}
+
+	if thisinfo.Buyer != "" && len([]rune(thisinfo.Buyer)) > 2 {
+		thisinfo.pnbval++
+	} else {
+		thisinfo.Buyer = ""
+	}
+
+	thisinfo.LenPC = len([]rune(thisinfo.ProjectCode))
+	thisinfo.LenPTC = len([]rune(thisinfo.PTC))
+	thisinfo.LenPN = len([]rune(thisinfo.ProjectName))
+	return thisinfo
+}

+ 231 - 0
fullproject/src_dev3/update_project.go

@@ -0,0 +1,231 @@
+package main
+
+import (
+	qu "qfw/util"
+	"sort"
+	"strings"
+)
+
+func UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string) {
+	//log.Println(thisinfo.ProjectName, pInfo.ProjectName, comStr)
+	//	updateid := pInfo.Id
+	//	if BinarySearch(pInfo.Ids, thisinfo.Id) > -1 {
+	//		return //updateid
+	//	}
+	set := map[string]interface{}{}
+	//res, bres := MongoTool.FindById(ProjectColl, pInfo.Id.Hex(), `{"list":0}`)
+	EqInfoUpdate(thisinfo, pInfo)
+	//if bres && res != nil && *res != nil {
+	set["topscopeclass"] = pInfo.Topscopeclass
+	set["subscopeclass"] = pInfo.Subscopeclass
+	s_subscopeclass := strings.Join(pInfo.Subscopeclass, ",")
+	if len(s_subscopeclass) > 0 {
+		s_subscopeclass = "," + s_subscopeclass + ","
+	}
+	set["s_subscopeclass"] = s_subscopeclass
+	s_winner := strings.Join(pInfo.Winners, ",")
+	if len(s_winner) > 0 {
+		s_winner = "," + s_winner + ","
+	}
+	set["s_winner"] = s_winner
+	if pInfo.Buyerperson != "" && pInfo.Buyertel != "" {
+		set["buyerperson"] = pInfo.Buyerperson
+		set["buyertel"] = pInfo.Buyertel
+	}
+	if pInfo.Buyerclass != "" {
+		set["buyerclass"] = pInfo.Buyerclass
+	}
+	if pInfo.District != "" {
+		set["district"] = pInfo.District
+	}
+	if pInfo.Bidopentime > 0 {
+		set["bidopentime"] = pInfo.Bidopentime
+	}
+	if len(pInfo.Winnerorder) > 0 {
+		set["winnerorder"] = pInfo.Winnerorder
+	}
+	if thisinfo.HasPackage {
+		set["multipackage"] = 1
+	} else {
+		set["multipackage"] = 0
+	}
+
+	if pInfo.ProjectName != "" {
+		set["projectname"] = pInfo.ProjectName
+	}
+
+	if pInfo.ProjectCode != "" {
+		set["projectcode"] = pInfo.ProjectCode
+	}
+
+	if pInfo.Buyer != "" {
+		set["buyer"] = pInfo.Buyer
+	}
+
+	//预算、中标价
+	if qu.Float64All(tmp["bidamount"]) > 0 && pInfo.Bidamount == 0 {
+		set["bidamount"] = tmp["bidamount"]
+	}
+
+	if qu.Float64All(tmp["budget"]) > 0 && pInfo.Budget == 0 {
+		set["budget"] = tmp["budget"]
+	}
+
+	set["mpn"] = pInfo.MPN
+	set["mpc"] = pInfo.MPC
+	set["area"] = pInfo.Area
+	set["city"] = pInfo.City
+
+	//e := InitEL(util.ObjToString((*res)["extractpos"]))
+	if thisinfo.dealtype == 1 {
+		var sonpackage map[string]interface{}
+		for _, obj := range tmp["package"].(map[string]interface{}) {
+			sonpackage, _ = obj.(map[string]interface{})
+		}
+		for _, v2 := range []string{"budget", "budget_w", "winner", "winner_w", "bidstatus", "bidstatus_w"} {
+			if sonpackage[v2] != nil {
+				tmp[v2] = sonpackage[v2]
+			}
+		}
+	}
+	//e.fieldpriority(&tmp, res, &set)
+	//set["extractpos"] = e.GetVal()
+	//	if thisinfo.HasPackage { //多包处理
+	//		p1, _ := (*res)["package"].(map[string]interface{})
+	//		p2, _ := tmp["package"].(map[string]interface{})
+	//		if p2 != nil {
+	//			if p1 != nil {
+	//				for pk2, pv2 := range p2 {
+	//					if p1[pk2] != nil { //合并
+	//						item1, _ := p1[pk2].(map[string]interface{})
+	//						item2, _ := pv2.(map[string]interface{})
+	//						if item1 != nil && item2 != nil { //原始项
+	//							for ik1, iv1 := range item2 {
+	//								if item1[ik1] == nil {
+	//									item1[ik1] = iv1
+	//								}
+	//							}
+	//						}
+	//					} else {
+	//						p1[pk2] = pv2
+	//					}
+	//				}
+	//			} else {
+	//				p1 = p2
+	//			}
+	//		}
+	//		set["package"] = p1
+	//	}
+	//中标候选人合并
+
+	update := map[string]interface{}{}
+	if len(set) > 0 {
+		update["$set"] = set
+	}
+	//保留原数据吧
+	push := NewPushInfo(tmp)
+	push["compareStr"] = comStr
+	update["$push"] = map[string]interface{}{
+		"list": push,
+	}
+	if len(update) > 0 {
+		updateInfo := []map[string]interface{}{
+			map[string]interface{}{
+				"_id": pInfo.Id,
+			},
+			update,
+		}
+		updatePool <- updateInfo
+		//			MongoTool.Update(ProjectColl, map[string]interface{}{
+		//				"_id": qu.StringTOBsonId(pInfo.Id.Hex()),
+		//			}, &update, false, false)
+	}
+	//}
+	//再往redis中放 index
+	//往队列中增加时间 -------------->start
+}
+
+func EqInfoUpdate(thisinfo *Info, pInfo *ProjectInfo) {
+	if thisinfo.Publishtime > pInfo.LastTime {
+		pInfo.LastTime = thisinfo.Publishtime
+	}
+	if pInfo.FirstTime == 0 || (thisinfo.Publishtime < pInfo.FirstTime && thisinfo.Publishtime > 0) {
+		pInfo.FirstTime = thisinfo.Publishtime
+	}
+	pInfo.Ids = append(pInfo.Ids, thisinfo.Id)
+	//增加发布时间结束----------------->end
+
+	if (pInfo.Buyer == "" && thisinfo.Buyer != "") || (len([]rune(pInfo.Buyer)) < 5 && len([]rune(thisinfo.Buyer)) > 5) {
+		pInfo.Buyer = thisinfo.Buyer
+	}
+	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
+		pInfo.Agency = thisinfo.Agency
+	}
+	if (pInfo.ProjectCode == "" && thisinfo.ProjectCode != "") || (len([]rune(pInfo.ProjectCode)) < 6 && len([]rune(thisinfo.ProjectCode)) > 6) {
+		pInfo.ProjectCode = thisinfo.ProjectCode
+	}
+
+	if pInfo.Area == "全国" && thisinfo.Area != "全国" {
+		pInfo.Area = thisinfo.Area
+		pInfo.City = thisinfo.City
+	}
+	if thisinfo.District != "" {
+		pInfo.District = thisinfo.District
+	}
+	if thisinfo.Buyerperson != "" && thisinfo.Buyertel != "" {
+		pInfo.Buyerperson = thisinfo.Buyerperson
+		pInfo.Buyertel = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		pInfo.Buyerclass = thisinfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > 0 {
+		pInfo.Bidopentime = thisinfo.Bidopentime
+	}
+	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
+		pInfo.Bidamount = thisinfo.Bidamount
+	}
+	if thisinfo.Budget > 0 && pInfo.Budget < 1 {
+		pInfo.Budget = thisinfo.Budget
+	}
+
+	if len(thisinfo.Topscopeclass) > 0 {
+		sort.Strings(pInfo.Topscopeclass)
+		for _, k := range thisinfo.Topscopeclass {
+			if BinarySearch(pInfo.Topscopeclass, k) == -1 {
+				pInfo.Topscopeclass = append(pInfo.Topscopeclass, k)
+				sort.Strings(pInfo.Topscopeclass)
+			}
+		}
+	}
+
+	if len(thisinfo.Subscopeclass) > 0 {
+		sort.Strings(pInfo.Subscopeclass)
+		for _, k := range thisinfo.Subscopeclass {
+			if BinarySearch(pInfo.Subscopeclass, k) == -1 {
+				pInfo.Subscopeclass = append(pInfo.Subscopeclass, k)
+				sort.Strings(pInfo.Subscopeclass)
+			}
+		}
+	}
+	//winner
+	if len(thisinfo.Winners) > 0 {
+		sort.Strings(pInfo.Winners)
+		for _, k := range thisinfo.Winners {
+			if BinarySearch(pInfo.Winners, k) == -1 {
+				pInfo.Winners = append(pInfo.Winners, k)
+				sort.Strings(pInfo.Winners)
+			}
+		}
+	}
+	//winnerorder
+	if len(thisinfo.Winnerorder) > 0 {
+		sort.Strings(pInfo.Winnerorder)
+		for _, k := range thisinfo.Winnerorder {
+			if BinarySearch(pInfo.Winnerorder, k) == -1 {
+				pInfo.Winnerorder = append(pInfo.Winnerorder, k)
+				sort.Strings(pInfo.Winnerorder)
+			}
+		}
+	}
+}

+ 87 - 0
fullproject/src_dev4/clearmem.go

@@ -0,0 +1,87 @@
+package main
+
+import (
+	"log"
+
+	"github.com/robfig/cron"
+)
+
+//定时清理内存
+//当前位置时间,小于此时间超过6个月,则清理
+var currentTime = int64(0)
+var validTime = int64(6 * 30 * 86400)
+
+func clearMem() {
+	c := cron.New()
+	c.AddFunc("50 0/3 * * * *", func() {
+		findLock.Lock()
+		defer findLock.Unlock()
+		wg.Wait()
+		//遍历id
+		AllIdsMapLock.Lock()
+		defer AllIdsMapLock.Unlock()
+		clearNum := 0
+		for k, v := range AllIdsMap2 {
+			if currentTime-v.lastTime > validTime {
+				clearNum++
+				//删除id的map
+				delete(AllIdsMap2, k)
+				//删除pb
+				if v.P.Buyer != "" {
+					ids := mapPb[v.P.Buyer]
+					if ids != nil {
+						ids.Lock.Lock()
+						ids.Arr = deleteSlice(ids.Arr, k)
+						if len(ids.Arr) == 0 {
+							delete(mapPb, v.P.Buyer)
+						}
+						ids.Lock.Unlock()
+					}
+				}
+				//删除mapPn
+				for _, vn := range append([]string{v.P.ProjectName}, v.P.MPN...) {
+					if vn != "" {
+						ids := mapPn[vn]
+						if ids != nil {
+							ids.Lock.Lock()
+							ids.Arr = deleteSlice(ids.Arr, k)
+							if len(ids.Arr) == 0 {
+								delete(mapPn, vn)
+							}
+							ids.Lock.Unlock()
+						}
+					}
+				}
+				//删除mapPc
+				for _, vn := range append([]string{v.P.ProjectCode}, v.P.MPC...) {
+					if vn != "" {
+						ids := mapPc[vn]
+						if ids != nil {
+							ids.Lock.Lock()
+							ids.Arr = deleteSlice(ids.Arr, k)
+							if len(ids.Arr) == 0 {
+								delete(mapPc, vn)
+							}
+							ids.Lock.Unlock()
+						}
+					}
+				}
+				v = nil
+			}
+		}
+		log.Println("清除完成:", clearNum, len(AllIdsMap2))
+	})
+	c.Start()
+	defer c.Stop()
+	select {}
+}
+
+func deleteSlice(arr []string, v string) []string {
+	j := 0
+	for _, v1 := range arr {
+		if v1 != v {
+			arr[j] = v1
+		}
+	}
+	return arr[:j]
+}

+ 16 - 0
fullproject/src_dev4/config.json

@@ -0,0 +1,16 @@
+{
+    "thread": 1,
+    "mongodbServers": "192.168.3.207:27082",
+    "mongodbPoolSize": 10,
+    "mongodbName": "cesuo",
+    "extractColl": "key1_biddingall",
+    "projectColl": "projectset_0809",
+    "redisaddrs": "ids=192.168.3.207:1378,keys=192.168.3.207:1378,info=192.168.3.207:1378",
+    "redisPoolSize": 20,
+    "jkmail": {
+        "to": "zhangjinkun@topnet.net.cn",
+        "api": "http://10.171.112.160:19281/_send/_mail"
+    },
+    "udpport": ":1482",
+    "nextNode": []
+}

+ 301 - 0
fullproject/src_dev4/init.go

@@ -0,0 +1,301 @@
+package main
+
+import (
+	"log"
+	mu "mfw/util"
+	"qfw/util"
+	"qfw/util/mongodb"
+	"regexp"
+	"sync"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+const (
+	ProjectCache = "info" //存放每条项目信息,key为项目ID
+)
+
+var (
+	Sysconfig                map[string]interface{} //读取配置文件
+	MongoTool                mongodb.MongodbSim     //mongodb连接
+	ExtractColl, ProjectColl string                 //抽取表、项目表
+	CurrentMegerTime         int64                  //当前合并到的信息的时间
+	CurrentMegerCount        int                    //当前合并计数
+	MultiThread              = make(chan bool, 5)   //项目合并线程
+	AllIdsMap2               = map[string]*ID{}
+	AllIdsMapLock            = sync.Mutex{}
+)
+
+var (
+	//判断是日期
+	_datereg   = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
+	_numreg1   = regexp.MustCompile("^[0-9-]{1,8}$")
+	_zimureg1  = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
+	_nzreg     = regexp.MustCompile("^[0-9a-zA-Z-]+$")
+	_hanreg    = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
+	replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
+	//判断带有分包、等特定词的
+	pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
+	//判断包含数值
+	nreg1 = regexp.MustCompile("[0-9]{2,}")
+	//判断包含字母
+	zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
+	//判断包含汉字
+	hreg1 = regexp.MustCompile(`[\p{Han}]+`)
+	//判断项目编号是在10以内的纯数字结构
+	numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
+
+	//存放项目名称
+	mapPn = map[string]*Key{}
+	//存放项目编号
+	mapPc = map[string]*Key{}
+	//存放采购单位
+	mapPb = map[string]*Key{}
+	//仅初始化使用
+	compareNoPass = map[string]bool{}
+	compareAB     = map[string]bool{}
+	compareAB2D   = map[string]bool{}
+	compareABD    = map[string]bool{}
+	compareAB2CD  = map[string]bool{}
+	compareABCD   = map[string]bool{}
+)
+
+func init() {
+	util.ReadConfig(&Sysconfig)
+	MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
+	MongoTool = mongodb.MongodbSim{
+		MongodbAddr: Sysconfig["mongodbServers"].(string),
+		Size:        util.IntAll(Sysconfig["mongodbPoolSize"]),
+		DbName:      Sysconfig["mongodbName"].(string),
+	}
+	MongoTool.InitPool()
+	ExtractColl = Sysconfig["extractColl"].(string)
+	ProjectColl = Sysconfig["projectColl"].(string)
+
+	udpport, _ := Sysconfig["udpport"].(string)
+	udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	log.Println("Udp服务监听", udpport)
+
+	//---不能通过
+	vm := []string{"C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareNoPass[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------")
+
+	//三个元素一致 [AB][AB][AB],分值最高
+	vm = []string{"A", "B"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareAB[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB))
+	//---至少两个一致,其他可能不存在
+	//[AB][AB][ABD]
+	//[AB][ABD][AB]
+	vm = []string{"A", "B"}
+	vm2 := []string{"A", "B", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2D))
+	//---至少一个一致,其他可能不存在
+	//[ABD][ABD][ABD] //已经删除DDD
+	vm = []string{"A", "B", "D"}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
+					compareABD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABD))
+
+	//[AB][ABCD][AB]
+	//[AB][AB][ABCD]
+	vm = []string{"A", "B"}
+	vm2 = []string{"A", "B", "C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2CD))
+	//[ABECD][ABECD][ABECD]  //已经删除[CD][CD][CD]   //这个要重点讨论
+	vm = []string{"A", "B", "C", "D"}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
+					compareABCD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+}
+
+func CheckHanAndNum(str string) (b bool) {
+	return nreg1.MatchString(str) && hreg1.MatchString(str)
+}
+func CheckZimuAndNum(str string) (b bool) {
+	return zreg1.MatchString(str) && nreg1.MatchString(str)
+}
+
+type KeyMap struct {
+	Lock sync.Mutex
+	Map  map[string]*Key
+}
+
+type ID struct {
+	Id       string
+	Lock     sync.Mutex
+	lastTime int64
+	pos      int
+	P        *ProjectInfo
+}
+type Key struct {
+	Arr  []string
+	Lock sync.Mutex
+}
+type IdAndLock struct {
+	Id   string
+	Lock sync.Mutex
+}
+
+func NewKeyMap() *KeyMap {
+	return &KeyMap{
+		Map:  map[string]*Key{},
+		Lock: sync.Mutex{},
+	}
+}
+
+//招标信息实体类
+type Info struct {
+	Id            string                 `json:"_id"`
+	Href          string                 `json:"href"` //源地址
+	Publishtime   int64                  `json:"publishtime"`
+	Title         string                 `json:"title"`
+	TopType       string                 `json:"toptype"`
+	SubType       string                 `json:"subtype"`
+	ProjectName   string                 `json:"projectname"`
+	ProjectCode   string                 `json:"projectcode"`
+	Buyer         string                 `json:"buyer"`
+	Buyerperson   string                 `json:"buyerperson"`
+	Buyertel      string                 `json:"buyertel"`
+	Agency        string                 `json:"agency"`
+	Area          string                 `json:"area"`
+	City          string                 `json:"city"`
+	District      string                 `json:"district"`
+	HasPackage    bool                   `json:"haspackage"`
+	Package       map[string]interface{} `json:"package"`
+	PNum          string                 `json:"pnum"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"`
+	Buyerclass    string                 `json:"buyerclass"`
+	Bidopentime   int64                  `json:"bidopentime"`
+	Budget        float64                `json:"budget"`
+	Bidamount     float64                `json:"bidamount"`
+	Winners       []string
+	dealtype      int
+
+	Winnerorder []string
+
+	PTC    string //从标题中抽的项目编号
+	pnbval int    //项目名称、编号、采购单位存在的个数
+	LenPC  int    //项目编号长度
+	LenPN  int    //项目名称长度
+	LenPTC int    //标题抽的项目编号长度
+	//以下三个元素做对比,计算包含时候使用
+	PNBH  int //0初始,+包含,-被包含
+	PCBH  int
+	PTCBH int
+}
+
+//项目实体类
+type ProjectInfo struct {
+	Id            bson.ObjectId          `bson:"_id"`
+	FirstTime     int64                  `json:"firsttime"` //项目的最早时间
+	LastTime      int64                  `json:"lasttime"`  //项目的最后时间
+	Ids           []string               `json:"ids"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"` //子行业分类
+	Winners       []string               `json:"winners"`       //中标人
+	ProjectName   string                 `json:"projectname"`   //项目名称
+	ProjectCode   string                 `json:"projectcode"`   //项目代码唯一(纯数字的权重低)
+	Buyer         string                 `json:"buyer"`         //采购单位唯一
+	MPN           []string               `json:"mpn"`           //合并后多余的项目名称
+	MPC           []string               `json:"mpc"`           //合并后多余的项目编号
+	Buyerperson   string                 `json:"buyerperson"`   //采购联系人
+	Buyertel      string                 `json:"buyertel"`      //采购联系人电话
+	Agency        string                 `json:"agency"`        //代理机构
+	Area          string                 `json:"area"`          //地区
+	City          string                 `json:"city"`          //地市
+	District      string                 `json:"district"`      //区县
+	HasPackage    bool                   `json:"haspackage"`    //是否有分包
+	Package       map[string]interface{} `json:"package"`       //分包的对比对象
+	Buyerclass    string                 `json:"buyerclass"`    //采购单位分类
+	Bidopentime   int64                  `json:"bidopentime"`   //开标时间
+	Zbtime        int64                  `json:"zbtime"`        //招标时间
+	Jgtime        int64                  `json:"jgtime"`        //结果中标时间
+	Bidamount     float64                `json:"bidamount"`     //中标金额
+	Budget        float64                `json:"budget"`        //预算
+	Winnerorder   []string               `json:"winnerorder"`   //中标候选人
+	score         int
+	comStr        string
+}

+ 65 - 0
fullproject/src_dev4/load_data.go

@@ -0,0 +1,65 @@
+package main
+
+import (
+	"time"
+	//"encoding/json"
+	"log"
+)
+
+//初始加载数据,默认加载最近6个月的数据
+
+func loadData(projectColl string, month int, bCacheRedis bool) {
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+
+	q := map[string]interface{}{}
+	it := sess.DB(MongoTool.DbName).C(projectColl).Find(&q).Iter()
+	AllIdsMapLock.Lock()
+	tmp := &ProjectInfo{}
+	n := 0
+	for it.Next(tmp) {
+		n++
+		if n%1000 == 0 {
+			log.Println("current", n, "\n", tmp.Id, tmp)
+			time.Sleep(2 * time.Second)
+		}
+		for _, v := range append([]string{tmp.ProjectName}, tmp.MPN...) {
+			if v != "" {
+				k := mapPn[v]
+				if k == nil {
+					k = &Key{Arr: []string{}}
+					mapPn[v] = k
+				}
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		for _, v := range append([]string{tmp.ProjectCode}, tmp.MPC...) {
+			if v != "" {
+				k := mapPc[v]
+				if k == nil {
+					k = &Key{Arr: []string{}}
+					mapPc[v] = k
+				}
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		if tmp.Buyer != "" {
+			k := mapPb[tmp.Buyer]
+			if k == nil {
+				k = &Key{Arr: []string{}}
+				mapPb[tmp.Buyer] = k
+			}
+			k.Arr = append(k.Arr, tmp.Id.Hex())
+		}
+
+		AllIdsMap2[tmp.Id.Hex()] = &ID{Id: tmp.Id.Hex(), lastTime: tmp.LastTime, P: tmp}
+
+		if bCacheRedis {
+			//存入redis
+
+		}
+
+	}
+	AllIdsMapLock.Unlock()
+	log.Println("load over")
+}

+ 68 - 0
fullproject/src_dev4/main.go

@@ -0,0 +1,68 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	mu "mfw/util"
+	"net"
+	"time"
+)
+
+var (
+	udpclient    mu.UdpClient //udp对象
+	SingleThread = make(chan bool, 1)
+)
+
+func main() {
+
+	//udp跑增量  id段   zl
+	//udp跑全量			ql
+	//udp跑历史数据  信息id1,id2/或id段  ls
+	//udp强制合并  信息id1,id2,id3 [项目id] 不存在时新建  qzhb
+	//udp强制拆分  项目id,信息id1,id2          qzcf
+	//udp重新合并  信息id1,id2,id3             cxhb
+	//loadData("project_0809", 1, true)
+	time.Sleep(99999 * time.Hour)
+}
+
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA: //上个节点的数据
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		log.Println("err:", err, "mapInfo:", mapInfo)
+		if err != nil {
+			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+		} else if mapInfo != nil {
+			key, _ := mapInfo["key"].(string)
+			if key == "" {
+				key = "udpok"
+			}
+			go udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+			SingleThread <- true
+			tasktype, _ := mapInfo["stype"].(string)
+			log.Println("tasktype:", tasktype)
+			switch tasktype {
+			case "ql":
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					taskQl(mapInfo)
+				}()
+			case "ids":
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					Ids(mapInfo)
+				}()
+			}
+		}
+	case mu.OP_NOOP: //下个节点回应
+		ok := string(data)
+		if ok != "" {
+			log.Println("ok:", ok)
+		}
+	}
+}

+ 406 - 0
fullproject/src_dev4/merge.go

@@ -0,0 +1,406 @@
+package main
+
+import (
+	"log"
+	"math"
+	qu "qfw/util"
+	"sort"
+	//"strconv"
+	"strings"
+	"sync"
+)
+
+//单线程控制从map查找可对比的项目
+var findLock = sync.Mutex{}
+
+//从对应map中获取对比的项目id
+func getCompareIds(pn, pc, ptc, pb string) (bpn, bpc, bptc, bpb int, res []*Key, idArr []string, IDArr []*ID) {
+	findLock.Lock()
+	defer findLock.Unlock()
+	wg.Add(1)
+	res = []*Key{}
+	//是否查找到,并标识位置。-1代表未查找到。
+	bpn, bpc, bptc, bpb = -1, -1, -1, -1
+	if pn != "" {
+		ids := mapPn[pn]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPn[pn] = ids
+			bpn = 0
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pc != "" {
+		ids := mapPc[pc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[pc] = ids
+			bpc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if ptc != "" {
+		ids := mapPc[ptc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPc[ptc] = ids
+			bptc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pb != "" {
+		ids := mapPb[pb]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			mapPb[pb] = ids
+			bpb = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+	repeatId := map[string]bool{}
+	idArr = []string{} //项目id
+	IDArr = []*ID{}    //项目信息
+	for _, m := range res {
+		for _, id := range m.Arr {
+			if !repeatId[id] {
+				repeatId[id] = true
+				//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+				AllIdsMapLock.Lock()
+				Id := AllIdsMap2[id]
+				AllIdsMapLock.Unlock()
+				if Id != nil {
+					Id.Lock.Lock()
+					idArr = append(idArr, id)
+					IDArr = append(IDArr, Id)
+				}
+			}
+		}
+	}
+	return
+}
+
+//项目合并入口
+func startProjectMerge(info *Info, tmp map[string]interface{}) {
+	//只有或没有采购单位的无法合并
+	//bpn, bpc, bptc, bpb 是否查找到,并标识位置。-1代表未查找到。
+	//pids 是项目id数组集合
+	//IDArr,是单个项目ID对象集合
+	bpn, bpc, bptc, bpb, pids, _, IDArr := getCompareIds(info.ProjectName, info.ProjectCode, info.PTC, info.Buyer)
+	defer wg.Done()
+	//map--k为pn,ptn,pc,ptc,buyer值 v为Id数组和lock
+
+	for _, m := range pids {
+		defer m.Lock.Unlock()
+	}
+	for _, id := range IDArr {
+		defer id.Lock.Unlock()
+	}
+
+	bFindProject := false
+	findPid := ""
+	//获取完id,进行计算
+	//定义两组
+	comRes1 := []*ProjectInfo{} //优先级最高的对比结果数组
+	comRes2 := []*ProjectInfo{} //优化级其次
+	comRes3 := []*ProjectInfo{}
+	resVal, pjVal := 0, 0
+	for _, v := range IDArr {
+		comStr := ""
+		compareProject := v.P
+		//问题出地LastTime!!!!!
+		diffTime := math.Abs(float64(info.Publishtime - compareProject.LastTime))
+		if diffTime < 185*86400 {
+			//"A 相等 	B 被包含 	C 不相等	 	D不存在  E被包含
+			info.PNBH = 0
+			info.PCBH = 0
+			info.PTCBH = 0
+			compareStr, score := comparePNCB(info, compareProject)
+
+			resVal, pjVal = Select(compareStr, info, compareProject)
+			//---------------------------------------
+
+			if resVal > 0 {
+
+				compareBuyer := "D"
+				if info.Buyer != "" {
+					if info.Buyer == compareProject.Buyer {
+						compareBuyer = "A"
+						score += 3
+					} else if compareProject.Buyer != "" {
+						if strings.Contains(info.Buyer, compareProject.Buyer) || strings.Contains(compareProject.Buyer, info.Buyer) {
+							compareBuyer = "B"
+							score += 1
+						} else {
+							compareBuyer = "C"
+						}
+					}
+				}
+
+				//---------------------------------------
+
+				compareCity := ""
+				if info.Area != "全国" && info.Area != "" && info.Area == compareProject.Area {
+					compareCity += "A"
+					score += 2
+				} else if info.Area == "全国" || compareProject.Area == "全国" {
+					compareCity += "B"
+					score += 1
+				} else {
+					compareCity += "C"
+				}
+				if compareCity != "C" {
+					if info.City != "" && info.City == compareProject.City {
+						compareCity += "A"
+						score += 2
+					} else {
+						if info.Area == "全国" || compareProject.Area == "全国" {
+							compareCity += "B"
+						} else if info.City == compareCity {
+							compareCity += "B"
+						} else {
+							compareCity += "C"
+						}
+					}
+				} else {
+					compareCity += "C"
+				}
+				score2 := 0
+				if compareCity == "AA" {
+					if info.District != "" && info.District == compareProject.District {
+						score2 = 1
+					}
+				}
+
+				compareTime := "D"
+				if diffTime < 45*86400 {
+					compareTime = "A"
+					score += 2
+				} else if diffTime < 90*86400 {
+					compareTime = "B"
+					score += 1
+				}
+
+				compareAgency := "D"
+				if info.Agency != "" {
+					if info.Agency == compareProject.Agency {
+						compareAgency = "A"
+						score += 2
+						score2 += 1
+					} else if compareProject.Agency != "" {
+						if strings.Contains(info.Agency, compareProject.Agency) || strings.Contains(compareProject.Agency, info.Agency) {
+							compareAgency = "B"
+							score += 1
+							score2 += 1
+						} else {
+							compareAgency = "C"
+						}
+					}
+				}
+				compareBudget := "C"
+				if info.Budget > 0 && (info.Budget == compareProject.Budget || (compareProject.Bidamount > 0 && info.Budget > compareProject.Bidamount && (info.Budget-compareProject.Bidamount) < (0.1*info.Budget))) {
+					compareBudget = "A"
+					score += 1
+					score2 += 1
+				} else if info.Budget == 0 && compareProject.Budget == 0 {
+					compareBudget = "B"
+				}
+				compareBidmount := "C"
+				if info.Bidamount > 0 && (info.Bidamount == compareProject.Bidamount || (compareProject.Budget > 0 && compareProject.Budget > info.Bidamount && (compareProject.Budget-info.Bidamount) < 0.1*compareProject.Budget)) {
+					compareBidmount = "A"
+					score += 1
+					score2 += 1
+				} else if info.Bidamount == 0 && compareProject.Bidamount == 0 {
+					compareBidmount = "B"
+				}
+
+				//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+
+				comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency + compareBudget + compareBidmount
+				compareProject.comStr = comStr
+				eqV := 0
+				switch resVal {
+				case 3:
+					if pjVal == 3 && comStr[3:] != "CCCDCCC" {
+						eqV = 1
+					} else if compareBuyer < "C" {
+						if pjVal > 1 {
+							eqV = 1
+						} else { //if (compareCity[1:1] != "C" || compareTime != "D") && score2 > 0
+							eqV = 2
+						}
+					} else if compareBuyer == "D" {
+						if pjVal > 1 && (compareCity[1:1] != "C" || score2 > 0) {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+					} else {
+						if pjVal == 3 && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if pjVal == 2 && compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						} else if compareCity == "AA" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+					}
+				case 2:
+					if compareBuyer < "C" {
+						if pjVal > 1 {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" || score2 > 0 {
+							eqV = 3
+						}
+						//						if compareTime != "D" && "AA" == compareCity && compareAgency != "C" {
+						//							eqV = 2
+						//						} else { //if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0
+						//							eqV = 3
+						//						}
+					} else if compareBuyer == "D" {
+						if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+						//						if "AA" == compareCity && compareTime != "D" && score2 > 0 {
+						//							eqV = 2
+						//						} else if compareCity[1:1] != "C" && score2 > 0 && compareTime == "A" {
+						//							eqV = 3
+						//						}
+					} else {
+						if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 1) {
+							eqV = 3
+						}
+						//						if "AA" == compareCity && (compareAgency == "A" || score2 > 1) && compareTime == "A" {
+						//							eqV = 3
+						//						}
+					}
+				case 1:
+					if compareBuyer < "C" {
+						if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 1) {
+							eqV = 3
+						}
+						//						if compareTime != "D" && "AA" == compareCity && score2 > 1 {
+						//							eqV = 2
+						//						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 1 {
+						//							eqV = 3
+						//						}
+					} else if compareBuyer == "D" {
+						if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 1) {
+							eqV = 3
+						}
+						//						if "AA" == compareCity && score2 > 1 && compareTime == "A" {
+						//							eqV = 3
+						//						}
+					} else {
+						if pjVal > 1 && compareTime == "A" && score2 > 1 && compareCity[1:1] != "C" {
+							eqV = 3
+						}
+					}
+				}
+				if eqV == 1 {
+					comRes1 = append(comRes1, compareProject)
+				} else if eqV == 2 {
+					comRes2 = append(comRes2, compareProject)
+				} else if eqV == 3 {
+					comRes3 = append(comRes3, compareProject)
+				} else if resVal == 3 || pjVal > 1 {
+					log.Println("===", resVal, pjVal, comStr, info.ProjectCode, compareProject.ProjectCode,
+						info.ProjectName, compareProject.ProjectName, info.Buyer, compareProject.Buyer, info.Id, compareProject.Id.Hex())
+				}
+			}
+		}
+	}
+	//--------------------------------对比完成-----------------------
+	//更新数组、更新项目
+	for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
+		if len(resN) > 0 {
+			if len(resN) > 1 {
+				sort.Slice(resN, func(i, j int) bool {
+					return resN[i].score > resN[j].score
+				})
+			}
+
+			bFindProject = true
+			findPid = resN[0].Id.Hex()
+			for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+				if bv > -1 {
+					pids[bv].Arr = append(pids[bv].Arr, findPid)
+					if k2 == 0 {
+						if resN[0].ProjectName == "" {
+							resN[0].ProjectName = info.ProjectName
+						} else {
+							if resN[0].MPN == nil {
+								resN[0].MPN = []string{info.ProjectName}
+							} else {
+								resN[0].MPN = append(resN[0].MPN, info.ProjectName)
+							}
+						}
+
+					} else if k2 < 3 {
+						if resN[0].ProjectCode == "" {
+							resN[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
+						} else {
+							if resN[0].MPC == nil {
+								resN[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+							} else {
+								resN[0].MPC = append(resN[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+							}
+						}
+
+					} else {
+						if resN[0].Buyer == "" {
+							resN[0].Buyer = info.Buyer
+						}
+					}
+				}
+			}
+			UpdateProject(tmp, info, resN[0], kv+1, resN[0].comStr, resVal, pjVal)
+			//更新AllIdsMao中的时间
+			break
+		}
+	}
+
+	if !bFindProject {
+		//没有找到
+		id, p := NewProject(tmp, info)
+		//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+		AllIdsMapLock.Lock()
+		//AllIdsMap[int(t)%idsMapSize][id] = &ID{Id: id, lastTime: info.Publishtime}
+		AllIdsMap2[id] = &ID{Id: id, lastTime: info.Publishtime, P: p}
+		AllIdsMapLock.Unlock()
+		for _, m := range pids {
+			m.Arr = append(m.Arr, id)
+		}
+	}
+
+}
+
+//二分字符串查找
+func BinarySearch(s []string, k string) int {
+	sort.Strings(s)
+	lo, hi := 0, len(s)-1
+	for lo <= hi {
+		m := (lo + hi) >> 1
+		if s[m] < k {
+			lo = m + 1
+		} else if s[m] > k {
+			hi = m - 1
+		} else {
+			return m
+		}
+	}
+	return -1
+}

+ 182 - 0
fullproject/src_dev4/merge_comparepncb.go

@@ -0,0 +1,182 @@
+package main
+
+import (
+	"strings"
+)
+
+func comparePNCB(info *Info, compareProject *ProjectInfo) (compareStr string, score int) {
+	if info.ProjectName != "" {
+		pns := []string{}
+		if compareProject.ProjectName != "" {
+			pns = append(pns, compareProject.ProjectName)
+		}
+		if len(compareProject.MPN) > 0 {
+			pns = append(pns, compareProject.MPN...)
+		}
+		ifind := 0
+		templen := 0
+		for _, v := range pns {
+			if info.ProjectName == v {
+				ifind = 1
+				break
+			} else {
+				//if strings.Contains(info.ProjectName, v) || strings.Contains(v, info.ProjectName) ||
+				retv := CheckContain(info.ProjectName, v)
+				if retv == 1 {
+					ifind = 1
+					break
+				} else if retv == 2 {
+					templen = len([]rune(v))
+					ifind = 2
+				} else if ifind == 0 {
+					ifind = 3
+				}
+			}
+		}
+		switch ifind {
+		case 0:
+			compareStr = "D"
+		case 1:
+			compareStr = "A"
+			score += 4
+			if len([]rune(info.ProjectName)) > 18 {
+				score += 2
+			}
+		case 2:
+			compareStr = "B"
+			score += 2
+			if templen > info.LenPN {
+				templen = info.LenPN
+			}
+			info.PNBH = templen
+			if templen > 12 {
+				score += 1
+			}
+		case 3:
+			compareStr = "C"
+		}
+	} else {
+		compareStr = "D"
+	}
+
+	/*
+				项目编号 - -()() 要注意
+				init_text = ["号","(重)","(第二次)","(重)"]
+		all_clean_mark = ["[","(","【","(","〖","]",")","】",")","〗","-","〔","〕","《","[","]","{","}","{","—"," ","-","﹝","﹞","–"]
+	*/
+	for index, pc := range []string{info.ProjectCode, info.PTC} {
+		if pc != "" {
+			pcs := []string{}
+			if compareProject.ProjectCode != "" {
+				pcs = append(pcs, compareProject.ProjectCode)
+			}
+			if len(compareProject.MPC) > 0 {
+				pcs = append(pcs, compareProject.MPC...)
+			}
+			ifind := 0
+			templen := 0
+			for _, v := range pcs {
+				if pc == v {
+					ifind = 1
+					break
+				} else {
+					// math.Abs(float64(len([]rune(pc))-len([]rune(v)))) < 6
+					//if !_numreg1.MatchString(pc) && !_zimureg1.MatchString(pc) && !_numreg1.MatchString(v) && !_zimureg1.MatchString(v)
+					if strings.Contains(pc, v) || strings.Contains(v, pc) {
+						t1 := pc
+						t2 := v
+						if len(v) > len(pc) {
+							t1 = v
+							t2 = pc
+						}
+						t3 := strings.Replace(t1, t2, "", -1)
+						t3 = _datereg.ReplaceAllString(t3, "")
+						if t3 == "" {
+							ifind = 1
+							break
+						} else {
+							ifind = 2
+							templen = len([]rune(v))
+						}
+					} else if ifind == 0 {
+						ifind = 3
+					}
+				}
+			}
+			switch ifind {
+			case 0:
+				compareStr += "D"
+			case 1:
+				compareStr += "A"
+				score += 4
+				if len([]rune(pc)) > 18 {
+					score += 2
+				}
+			case 2:
+				compareStr += "B"
+				score += 2
+				if index == 0 {
+					if templen > info.LenPC {
+						templen = info.LenPC
+					}
+					info.PCBH = templen
+					if templen > 12 {
+						score += 1
+					}
+
+				} else {
+					if templen > info.LenPTC {
+						templen = info.LenPTC
+					}
+					info.PTCBH = templen
+					if templen > 12 {
+						score += 1
+					}
+				}
+
+			case 3:
+				compareStr += "C"
+			}
+
+		} else {
+			compareStr += "D"
+		}
+	}
+	return
+}
+
+func CheckContain(b1, b2 string) (res int) {
+	b1 = replaceStr.ReplaceAllString(b1, "")
+	b2 = replaceStr.ReplaceAllString(b2, "")
+
+	if b1 == b2 {
+		res = 1 //相等
+		return
+	}
+	bs1 := []rune(b1)
+	bs2 := []rune(b2)
+	tmp := ""
+	for i := 0; i < len(bs1); i++ {
+		for j := 0; j < len(bs2); j++ {
+			if bs1[i] == bs2[j] {
+				tmp += string(bs1[i])
+			} else if tmp != "" {
+				b1 = strings.Replace(b1, tmp, "", -1)
+				b2 = strings.Replace(b2, tmp, "", -1)
+				tmp = ""
+			}
+		}
+	}
+	if tmp != "" {
+		b1 = strings.Replace(b1, tmp, "", -1)
+		b2 = strings.Replace(b2, tmp, "", -1)
+	}
+	if b1 == b2 {
+		res = 1 //相等
+	} else if b1 == "" || b2 == "" {
+		res = 2 //包含
+	} else {
+		res = 3 //不相等
+	}
+	return
+}

+ 508 - 0
fullproject/src_dev4/merge_select.go

@@ -0,0 +1,508 @@
+package main
+
+func Select(compareStr string, info *Info, compareInfo *ProjectInfo) (res, pj int) {
+	//没有可对比的项目名称、或项目编号 //评级
+	if compareNoPass[compareStr] {
+
+	} else {
+		switch compareStr {
+		case "AAA":
+			res = 3
+			pj = 3
+		case "AAB":
+			res = 3
+			pj = 3
+		case "ABA":
+			res = 3
+			pj = 3
+		case "ABB":
+			if info.LenPN > 10 || info.PCBH > 8 || info.PTCBH > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BAA":
+			if info.PNBH > 10 || info.LenPC > 8 || info.LenPTC > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BAB":
+			if info.PNBH > 10 || info.LenPTC > 8 || info.PTCBH > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BBA":
+			if info.PNBH > 10 || info.PCBH > 8 || info.LenPC > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BBB":
+			v := 0
+			if info.PNBH > 10 {
+				v++
+			}
+			if info.PCBH > 8 {
+				v++
+			}
+			if info.PTCBH > 8 {
+				v++
+			}
+			if v > 1 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 2
+		case "AAD":
+			if info.LenPC > 8 || info.LenPN > 12 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "ABD":
+			if info.LenPN > 10 && info.PCBH > 8 {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BAD":
+			if info.LenPC > 13 || (info.PNBH > 10 && info.LenPC > 8) {
+				res = 3
+				pj = 3
+			} else if info.PNBH > 10 || info.LenPC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BBD":
+			if info.PNBH > 12 && info.PCBH > 10 {
+				res = 3
+				pj = 1
+			} else if info.PNBH > 10 && info.PCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "ADA":
+			if info.LenPN > 12 || (info.LenPTC > 8 && !StrOrNum2.MatchString(info.PTC)) {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 2
+		case "ADB":
+			if info.LenPN > 10 && info.PTCBH > 8 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PTCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BDA":
+			if info.PNBH > 10 && info.LenPTC > 8 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.PNBH > 10 || info.LenPTC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BDB":
+			if info.PNBH > 12 && info.PTCBH > 10 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+			} else if info.PNBH > 10 && info.PTCBH > 8 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 2
+		case "ADD":
+			if info.LenPN > 18 {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "BDD":
+			if info.PNBH > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "DAA":
+			if info.LenPTC > 8 || info.LenPC > 8 {
+				res = 3
+				pj = 2
+			} else {
+				res = 2
+				pj = 3
+			}
+		case "DAB":
+			if info.LenPC > 8 && info.PTCBH > 8 {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 8 || info.PTCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "DAD":
+			if info.LenPC > 14 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "DBA":
+			if info.PCBH > 8 && info.LenPC > 8 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.PCBH > 8 || info.LenPC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "DBB":
+			if info.PCBH > 10 && info.PTCBH > 10 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 1
+			} else if info.PCBH > 8 && info.PTCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "DBD":
+			if info.PCBH > 12 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+				pj = 1
+			} else {
+				res = 1
+				pj = 1
+			}
+		case "DDA":
+			if info.LenPTC > 14 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 1
+			} else if info.LenPTC > 8 {
+				res = 2
+				pj = 1
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "DDB":
+			if info.PTCBH > 12 && !StrOrNum2.MatchString(info.PTC) {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "ACA":
+			if info.LenPN > 10 && info.LenPTC > 8 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.LenPTC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "ACB":
+			if info.LenPN > 10 && info.PTCBH > 8 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PTCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BCA":
+			if (info.PNBH > 10 && info.LenPTC > 8) || info.LenPTC > 12 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.PNBH > 10 || info.LenPTC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BCB":
+			if info.PNBH > 12 && info.PTCBH > 12 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 1
+			} else if info.PNBH > 10 || info.PTCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "AAC":
+			if (info.LenPN > 10 && info.LenPC > 8) || info.LenPN > 14 || (info.LenPC > 10 && !StrOrNum2.MatchString(info.ProjectCode)) {
+				res = 3
+				pj = 3
+			} else {
+				res = 2
+				pj = 3
+			}
+		case "ABC":
+			if info.LenPN > 14 && info.PCBH > 10 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BAC":
+			if info.PNBH > 14 && info.LenPC > 8 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 2
+			} else if info.PNBH > 10 || info.LenPC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BBC":
+			if info.PNBH > 14 && info.PCBH > 10 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 1
+			} else if info.PNBH > 10 || info.PCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "ACC":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPN > 16 {
+					res = 2
+					pj = 1
+				} else {
+					res = 1
+					pj = 2
+				}
+			}
+		case "ACD":
+			//项目编号不一致
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPN > 16 {
+
+					res = 2
+				} else {
+					res = 1
+
+				}
+				pj = 1
+			}
+
+		case "ADC":
+			if info.LenPN > 16 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "BCC":
+			//项目编号不一致
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PNBH > 12 {
+					res = 1
+				}
+				pj = 1
+			}
+		case "BCD":
+			//项目编号不一致
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PNBH > 8 {
+					res = 1
+				}
+				pj = 1
+			}
+		case "BDC":
+			if info.PNBH > 7 {
+				res = 1
+			}
+			pj = 1
+		case "CAA":
+			if info.LenPC > 12 || info.LenPTC > 12 {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 8 || info.LenPTC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "CAB":
+			if info.LenPC > 12 && info.PTCBH > 8 {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 12 || info.PTCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "CAC":
+			if info.LenPC > 9 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 1
+			}
+		case "CAD":
+			if info.LenPC > 9 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "CBA":
+			if info.LenPTC > 14 && info.PCBH > 12 {
+				res = 3
+				pj = 2
+			} else if info.LenPTC > 12 || info.PCBH > 10 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "CBB":
+			if info.PCBH > 13 && info.PTCBH > 13 {
+				res = 3
+				pj = 1
+			} else if info.PCBH > 9 || info.PTCBH > 9 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "CBC":
+			if info.PCBH > 14 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+			} else if info.PCBH > 5 {
+				res = 1
+			}
+			pj = 1
+		case "CBD":
+			if info.PCBH > 14 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+			} else if info.PCBH > 5 {
+				res = 1
+			}
+			pj = 1
+		case "CCA":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPTC > 12 && !StrOrNum2.MatchString(info.PTC) {
+					res = 2
+				} else if info.LenPTC > 5 {
+					res = 1
+				}
+				pj = 1
+			}
+		case "CCB":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PTCBH > 10 && !StrOrNum2.MatchString(info.PTC) {
+					res = 1
+				}
+				pj = 1
+			}
+		case "CDA":
+			if info.LenPTC > 12 && !StrOrNum2.MatchString(info.PTC) {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "CDB":
+			if info.PTCBH > 10 && !StrOrNum2.MatchString(info.PTC) {
+				res = 1
+				pj = 1
+			}
+		case "DAC":
+			if info.LenPC > 13 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+			} else if info.LenPC > 8 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "DBC":
+			if info.PCBH > 8 {
+				res = 1
+			}
+			pj = 1
+		case "DCA":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPTC > 10 {
+					res = 2
+				} else {
+					res = 1
+				}
+				pj = 1
+			}
+		case "DCB":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PTCBH > 8 && !StrOrNum2.MatchString(info.PTC) {
+					res = 1
+				}
+				pj = 1
+			}
+		}
+
+	}
+	return
+}

+ 172 - 0
fullproject/src_dev4/new_project.go

@@ -0,0 +1,172 @@
+package main
+
+import (
+	qu "qfw/util"
+	"strings"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+var FIELDS = []string{"area", "city", "district", "projectname", "projectcode", "buyer", "winner", "budget", "bidamount", "bidstatus", "agency", "projectscope"}
+
+func NewProject(tmp map[string]interface{}, thisinfo *Info) (string, *ProjectInfo) {
+	set := map[string]interface{}{}
+	for _, f := range FIELDS {
+		if tmp[f] != nil {
+			set[f] = tmp[f]
+		}
+	}
+	set["s_projectname"] = thisinfo.ProjectName
+	set["createtime"] = time.Now().Unix()
+	set["sourceinfoid"] = qu.BsonIdToSId(tmp["_id"])
+	set["sourceinfourl"] = tmp["href"]
+	set["topscopeclass"] = thisinfo.Topscopeclass
+	set["subscopeclass"] = thisinfo.Subscopeclass
+	if thisinfo.Buyerperson != "" {
+		set["buyerperson"] = thisinfo.Buyerperson
+	}
+	if thisinfo.Buyertel != "" {
+		set["buyertel"] = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		set["buyerclass"] = thisinfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > 0 {
+		set["bidopentime"] = thisinfo.Bidopentime
+	}
+	if len(thisinfo.Winnerorder) > 0 {
+		set["winnerorder"] = thisinfo.Winnerorder
+	}
+	s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
+	set["s_subscopeclass"] = s_subscopeclass
+	s_winner := strings.Join(thisinfo.Winners, ",")
+	set["s_winner"] = s_winner
+	if tmp["package"] != nil {
+		set["package"] = tmp["package"] //没定义优先级
+	}
+	push := NewPushInfo(tmp)
+	set["list"] = []bson.M{
+		push,
+	}
+	pId := bson.NewObjectId()
+	set["_id"] = pId
+	///id := MongoTool.Save(ProjectColl, set)
+	p1 := NewPinfo(pId, thisinfo, set)
+	return pId.Hex(), &p1
+}
+
+func NewPushInfo(tmp map[string]interface{}) bson.M {
+	res := bson.M{
+		"comeintime":  tmp["comeintime"],
+		"publishtime": tmp["publishtime"],
+		"title":       tmp["title"],
+		"toptype":     tmp["toptype"],
+		"subtype":     tmp["subtype"],
+		"infoformat":  tmp["infoformat"],
+		"infoid":      qu.BsonIdToSId(tmp["_id"]),
+		"area":        tmp["area"],
+		"city":        tmp["city"],
+		"projectname": tmp["projectname"],
+		"projectcode": tmp["projectcode"],
+		"buyer":       tmp["buyer"],
+		"href":        tmp["href"],
+	}
+	for _, k := range []string{"winner", "budget", "bidamount"} {
+		if tmp[k] != nil {
+			res[k] = tmp[k]
+		}
+	}
+	return res
+}
+
+//生成存放在redis数组中的对象
+func NewPinfo(id bson.ObjectId, thisinfo *Info, set map[string]interface{}) ProjectInfo {
+	p1 := ProjectInfo{
+		Id:            id,
+		Ids:           []string{thisinfo.Id},
+		Topscopeclass: thisinfo.Topscopeclass,
+		Subscopeclass: thisinfo.Subscopeclass,
+		Winners:       thisinfo.Winners,
+		ProjectName:   thisinfo.ProjectName,
+		ProjectCode:   thisinfo.ProjectCode,
+		Buyer:         thisinfo.Buyer,
+		Agency:        thisinfo.Agency,
+		Area:          thisinfo.Area,
+		City:          thisinfo.City,
+		District:      thisinfo.District,
+		MPN:           []string{},
+		MPC:           []string{},
+		HasPackage:    thisinfo.HasPackage,
+		Package:       map[string]interface{}{},
+		Buyerclass:    thisinfo.Buyerclass,
+		Bidopentime:   thisinfo.Bidopentime,
+		Winnerorder:   thisinfo.Winnerorder,
+		FirstTime:     thisinfo.Publishtime,
+		LastTime:      thisinfo.Publishtime,
+		Budget:        thisinfo.Budget,
+		Bidamount:     thisinfo.Bidamount,
+	}
+	if thisinfo.LenPTC > 5 {
+		p1.MPC = append(p1.MPC, thisinfo.PTC)
+	}
+	//savePool <- set
+	MongoTool.SaveByOriID(ProjectColl, set)
+
+	return p1
+}
+
+var updatePool = make(chan []map[string]interface{}, 2000)
+
+//var savePool = make(chan map[string]interface{}, 6)
+//func SaveQueue() {
+//	arr := []map[string]interface{}{}
+//	sp := make(chan bool, 3)
+//	for {
+//		select {
+//		case <-beforUpdate:
+//			if len(arr) > 0 {
+//				MongoTool.SaveBulk(ProjectColl, arr...)
+//				arr = []map[string]interface{}{}
+//			}
+//		case v := <-savePool:
+//			arr = append(arr, v)
+//			if len(arr) > 50 {
+//				sp <- true
+//				go func(arr []map[string]interface{}) {
+//					MongoTool.SaveBulk(ProjectColl, arr...)
+//					<-sp
+//				}(arr)
+//				arr = []map[string]interface{}{}
+//			}
+//		case <-time.After(80 * time.Millisecond):
+//			if len(arr) > 0 {
+//				sp <- true
+//				go func(arr []map[string]interface{}) {
+//					MongoTool.SaveBulk(ProjectColl, arr...)
+//					<-sp
+//				}(arr)
+//				arr = []map[string]interface{}{}
+//			}
+//		}
+//	}
+//}
+
+func updateQueue() {
+	arr := [][]map[string]interface{}{}
+	for {
+		select {
+		case v := <-updatePool:
+			arr = append(arr, v)
+			if len(arr) > 300 {
+				MongoTool.UpdateBulk(ProjectColl, arr...)
+				arr = [][]map[string]interface{}{}
+			}
+		case <-time.After(500 * time.Millisecond):
+			if len(arr) > 0 {
+				MongoTool.UpdateBulk(ProjectColl, arr...)
+				arr = [][]map[string]interface{}{}
+			}
+		}
+	}
+}

+ 222 - 0
fullproject/src_dev4/task.go

@@ -0,0 +1,222 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"qfw/util"
+	"regexp"
+	"strings"
+	"sync"
+	//"strings"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+const (
+	InitMinTime = int64(1325347200) //最小时间位置2012
+)
+
+//全量合并
+func taskQl(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	//1、检查pubilshtime索引
+	db, _ := udpInfo["db"].(string)
+	if db == "" {
+		db = MongoTool.DbName
+	}
+	coll, _ := udpInfo["coll"].(string)
+	if coll == "" {
+		coll = ExtractColl
+	}
+	sess := MongoTool.GetMgoConn()
+	bcon := false
+	if sess.DB(db).C(coll).EnsureIndexKey("publishtime_1", "publishtime_-1") == nil {
+		bcon = true
+	} else {
+		log.Println("publishtime_1索引不存在")
+	}
+	MongoTool.DestoryMongoConn(sess)
+	thread := util.IntAllDef(udpInfo["thread"], 1)
+	if bcon {
+		//go SaveQueue()
+		go updateQueue()
+		go clearMem()
+		//获取起始时间
+		startTime, END := int64(0), int64(0)
+
+		sts, bres := MongoTool.Find(ExtractColl, `{}`, "publishtime", `{"publishtime":1}`, true, 0, 1)
+		if bres && sts != nil && len(*sts) == 1 {
+			startTime = util.Int64All((*sts)[0]["publishtime"])
+			sts, bres = MongoTool.Find(ExtractColl, `{}`, "-publishtime", `{"publishtime":1}`, true, 0, 1)
+			if bres && sts != nil && len(*sts) == 1 {
+				END = util.Int64All((*sts)[0]["publishtime"])
+			}
+			log.Println("查询到的起始时间", startTime, END)
+		} else {
+			return
+		}
+		startTime -= 1
+		sum := 0
+		if startTime < InitMinTime {
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": InitMinTime,
+				},
+			}
+			sum = Mql(q, thread, db, coll, sum)
+			startTime = InitMinTime
+		}
+		for {
+			if startTime >= END {
+				break
+			}
+			et := startTime + 50*86400
+			if et >= END {
+				et = END
+			}
+			q := map[string]interface{}{
+				"publishtime": map[string]interface{}{
+					"$gt":  startTime,
+					"$lte": et,
+				},
+			}
+			sum = Mql(q, thread, db, coll, sum)
+			startTime = et
+			time.Sleep(1 * time.Second)
+		}
+	}
+	log.Println("task over!!!")
+}
+
+var wg = sync.WaitGroup{}
+
+func Ids(udpInfo map[string]interface{}) {
+	oid := []interface{}{}
+	n1, _ := udpInfo["ids"].(string)
+	idArr := strings.Split(n1, ",")
+	for _, v := range idArr {
+		oid = append(oid, util.StringTOBsonId(v))
+	}
+	thread := util.IntAllDef(udpInfo["thread"], 1)
+	q := bson.M{"_id": bson.M{"$in": oid}}
+	go updateQueue()
+	go clearMem()
+	Mql(q, thread, MongoTool.DbName, ExtractColl, 0)
+
+}
+
+func Mql(q map[string]interface{}, thread int, db, coll string, sum int) int {
+	defer util.Catch()
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	query := sess.DB(db).C(coll).Find(q).Sort("publishtime").Iter()
+	pool := make(chan bool, thread)
+	count := 0
+	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
+		info := ParseInfo(tmp)
+		if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
+			pool <- true
+			go func(info *Info, tmp map[string]interface{}) {
+				defer func() {
+					currentTime = info.Publishtime
+					<-pool
+				}()
+				startProjectMerge(info, tmp)
+			}(info, tmp)
+		} else {
+			//log.Println("info err:", tmp["_id"], tmp["title"], tmp["buyer"])
+		}
+		if sum%1000 == 0 {
+			log.Println("current", sum)
+		}
+		sum++
+		tmp = make(map[string]interface{})
+	}
+	//阻塞
+	for n := 0; n < thread; n++ {
+		pool <- true
+	}
+	//完成
+	log.Println("sontask over:", count, sum, q)
+	return sum
+}
+
+var (
+	titleGetPc  = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
+	titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
+	titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
+	pcReplace   = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$|[ __]+")
+	StrOrNum    = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$")
+	StrOrNum2   = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$")
+)
+
+func ParseInfo(tmp map[string]interface{}) (info *Info) {
+	bys, _ := json.Marshal(tmp)
+	var thisinfo *Info
+	json.Unmarshal(bys, &thisinfo)
+	if thisinfo == nil {
+		return nil
+	}
+	if len(thisinfo.Topscopeclass) == 0 {
+		thisinfo.Topscopeclass = []string{}
+	}
+	if len(thisinfo.Subscopeclass) == 0 {
+		thisinfo.Subscopeclass = []string{}
+	}
+
+	//从标题中查找项目编号
+	res := titleGetPc.FindStringSubmatch(thisinfo.Title)
+	if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+		thisinfo.PTC = res[1]
+	} else {
+		res = titleGetPc1.FindStringSubmatch(thisinfo.Title)
+		if len(res) > 3 && len(res[3]) > 6 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) {
+			thisinfo.PTC = res[3]
+		} else {
+			res = titleGetPc2.FindStringSubmatch(thisinfo.Title)
+			if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+				thisinfo.PTC = res[1]
+			}
+		}
+	}
+
+	if thisinfo.ProjectName != "" && len([]rune(thisinfo.ProjectName)) > 0 {
+		thisinfo.ProjectName = pcReplace.ReplaceAllString(thisinfo.ProjectName, "")
+		if thisinfo.ProjectName != "" {
+			thisinfo.pnbval++
+		}
+	}
+
+	if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+		if thisinfo.ProjectCode != "" {
+			thisinfo.ProjectCode = pcReplace.ReplaceAllString(thisinfo.ProjectCode, "")
+			if thisinfo.pnbval == 0 && len([]rune(thisinfo.ProjectCode)) < 5 {
+				thisinfo.ProjectCode = StrOrNum.ReplaceAllString(thisinfo.ProjectCode, "")
+			}
+		} else {
+			thisinfo.PTC = pcReplace.ReplaceAllString(thisinfo.PTC, "")
+			if thisinfo.pnbval == 0 && len([]rune(thisinfo.PTC)) < 5 {
+				thisinfo.PTC = StrOrNum.ReplaceAllString(thisinfo.PTC, "")
+			}
+		}
+		if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+			thisinfo.pnbval++
+		}
+	}
+	if thisinfo.ProjectCode == thisinfo.PTC || strings.Index(thisinfo.ProjectCode, thisinfo.PTC) > -1 {
+		thisinfo.PTC = ""
+	}
+
+	if thisinfo.Buyer != "" && len([]rune(thisinfo.Buyer)) > 2 {
+		thisinfo.pnbval++
+	} else {
+		thisinfo.Buyer = ""
+	}
+
+	thisinfo.LenPC = len([]rune(thisinfo.ProjectCode))
+	thisinfo.LenPTC = len([]rune(thisinfo.PTC))
+	thisinfo.LenPN = len([]rune(thisinfo.ProjectName))
+	return thisinfo
+}

+ 234 - 0
fullproject/src_dev4/update_project.go

@@ -0,0 +1,234 @@
+package main
+
+import (
+	qu "qfw/util"
+	"sort"
+	"strings"
+)
+
+func UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string, resVal, pjVal int) {
+	//log.Println(thisinfo.ProjectName, pInfo.ProjectName, comStr)
+	//	updateid := pInfo.Id
+	//	if BinarySearch(pInfo.Ids, thisinfo.Id) > -1 {
+	//		return //updateid
+	//	}
+	set := map[string]interface{}{}
+	//res, bres := MongoTool.FindById(ProjectColl, pInfo.Id.Hex(), `{"list":0}`)
+	EqInfoUpdate(thisinfo, pInfo)
+	//if bres && res != nil && *res != nil {
+	set["topscopeclass"] = pInfo.Topscopeclass
+	set["subscopeclass"] = pInfo.Subscopeclass
+	s_subscopeclass := strings.Join(pInfo.Subscopeclass, ",")
+	if len(s_subscopeclass) > 0 {
+		s_subscopeclass = "," + s_subscopeclass + ","
+	}
+	set["s_subscopeclass"] = s_subscopeclass
+	s_winner := strings.Join(pInfo.Winners, ",")
+	if len(s_winner) > 0 {
+		s_winner = "," + s_winner + ","
+	}
+	set["s_winner"] = s_winner
+	if pInfo.Buyerperson != "" && pInfo.Buyertel != "" {
+		set["buyerperson"] = pInfo.Buyerperson
+		set["buyertel"] = pInfo.Buyertel
+	}
+	if pInfo.Buyerclass != "" {
+		set["buyerclass"] = pInfo.Buyerclass
+	}
+	if pInfo.District != "" {
+		set["district"] = pInfo.District
+	}
+	if pInfo.Bidopentime > 0 {
+		set["bidopentime"] = pInfo.Bidopentime
+	}
+	if len(pInfo.Winnerorder) > 0 {
+		set["winnerorder"] = pInfo.Winnerorder
+	}
+	if thisinfo.HasPackage {
+		set["multipackage"] = 1
+	} else {
+		set["multipackage"] = 0
+	}
+
+	if pInfo.ProjectName != "" {
+		set["projectname"] = pInfo.ProjectName
+	}
+
+	if pInfo.ProjectCode != "" {
+		set["projectcode"] = pInfo.ProjectCode
+	}
+
+	if pInfo.Buyer != "" {
+		set["buyer"] = pInfo.Buyer
+	}
+
+	//预算、中标价
+	if qu.Float64All(tmp["bidamount"]) > 0 && pInfo.Bidamount == 0 {
+		set["bidamount"] = tmp["bidamount"]
+	}
+
+	if qu.Float64All(tmp["budget"]) > 0 && pInfo.Budget == 0 {
+		set["budget"] = tmp["budget"]
+	}
+
+	set["mpn"] = pInfo.MPN
+	set["mpc"] = pInfo.MPC
+	set["area"] = pInfo.Area
+	set["city"] = pInfo.City
+
+	//e := InitEL(util.ObjToString((*res)["extractpos"]))
+	if thisinfo.dealtype == 1 {
+		var sonpackage map[string]interface{}
+		for _, obj := range tmp["package"].(map[string]interface{}) {
+			sonpackage, _ = obj.(map[string]interface{})
+		}
+		for _, v2 := range []string{"budget", "budget_w", "winner", "winner_w", "bidstatus", "bidstatus_w"} {
+			if sonpackage[v2] != nil {
+				tmp[v2] = sonpackage[v2]
+			}
+		}
+	}
+	//e.fieldpriority(&tmp, res, &set)
+	//set["extractpos"] = e.GetVal()
+	//	if thisinfo.HasPackage { //多包处理
+	//		p1, _ := (*res)["package"].(map[string]interface{})
+	//		p2, _ := tmp["package"].(map[string]interface{})
+	//		if p2 != nil {
+	//			if p1 != nil {
+	//				for pk2, pv2 := range p2 {
+	//					if p1[pk2] != nil { //合并
+	//						item1, _ := p1[pk2].(map[string]interface{})
+	//						item2, _ := pv2.(map[string]interface{})
+	//						if item1 != nil && item2 != nil { //原始项
+	//							for ik1, iv1 := range item2 {
+	//								if item1[ik1] == nil {
+	//									item1[ik1] = iv1
+	//								}
+	//							}
+	//						}
+	//					} else {
+	//						p1[pk2] = pv2
+	//					}
+	//				}
+	//			} else {
+	//				p1 = p2
+	//			}
+	//		}
+	//		set["package"] = p1
+	//	}
+	//中标候选人合并
+
+	update := map[string]interface{}{}
+	if len(set) > 0 {
+		update["$set"] = set
+	}
+	//保留原数据吧
+	push := NewPushInfo(tmp)
+	push["compareStr"] = comStr
+	push["resVal"] = resVal
+	push["pjVal"] = pjVal
+	update["$push"] = map[string]interface{}{
+		"list": push,
+	}
+	if len(update) > 0 {
+		updateInfo := []map[string]interface{}{
+			map[string]interface{}{
+				"_id": pInfo.Id,
+			},
+			update,
+		}
+		//批量更新,这块可能需要再做判重处理
+		updatePool <- updateInfo
+		//			MongoTool.Update(ProjectColl, map[string]interface{}{
+		//				"_id": qu.StringTOBsonId(pInfo.Id.Hex()),
+		//			}, &update, false, false)
+	}
+	//}
+	//再往redis中放 index
+	//往队列中增加时间 -------------->start
+}
+
+func EqInfoUpdate(thisinfo *Info, pInfo *ProjectInfo) {
+	if thisinfo.Publishtime > pInfo.LastTime {
+		pInfo.LastTime = thisinfo.Publishtime
+	}
+	if pInfo.FirstTime == 0 || (thisinfo.Publishtime < pInfo.FirstTime && thisinfo.Publishtime > 0) {
+		pInfo.FirstTime = thisinfo.Publishtime
+	}
+	pInfo.Ids = append(pInfo.Ids, thisinfo.Id)
+	//增加发布时间结束----------------->end
+
+	if (pInfo.Buyer == "" && thisinfo.Buyer != "") || (len([]rune(pInfo.Buyer)) < 5 && len([]rune(thisinfo.Buyer)) > 5) {
+		pInfo.Buyer = thisinfo.Buyer
+	}
+	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
+		pInfo.Agency = thisinfo.Agency
+	}
+	if (pInfo.ProjectCode == "" && thisinfo.ProjectCode != "") || (len([]rune(pInfo.ProjectCode)) < 6 && len([]rune(thisinfo.ProjectCode)) > 6) {
+		pInfo.ProjectCode = thisinfo.ProjectCode
+	}
+
+	if pInfo.Area == "全国" && thisinfo.Area != "全国" {
+		pInfo.Area = thisinfo.Area
+		pInfo.City = thisinfo.City
+	}
+	if thisinfo.District != "" {
+		pInfo.District = thisinfo.District
+	}
+	if thisinfo.Buyerperson != "" && thisinfo.Buyertel != "" {
+		pInfo.Buyerperson = thisinfo.Buyerperson
+		pInfo.Buyertel = thisinfo.Buyertel
+	}
+	if thisinfo.Buyerclass != "" {
+		pInfo.Buyerclass = thisinfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > 0 {
+		pInfo.Bidopentime = thisinfo.Bidopentime
+	}
+	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
+		pInfo.Bidamount = thisinfo.Bidamount
+	}
+	if thisinfo.Budget > 0 && pInfo.Budget < 1 {
+		pInfo.Budget = thisinfo.Budget
+	}
+
+	if len(thisinfo.Topscopeclass) > 0 {
+		sort.Strings(pInfo.Topscopeclass)
+		for _, k := range thisinfo.Topscopeclass {
+			if BinarySearch(pInfo.Topscopeclass, k) == -1 {
+				pInfo.Topscopeclass = append(pInfo.Topscopeclass, k)
+				sort.Strings(pInfo.Topscopeclass)
+			}
+		}
+	}
+
+	if len(thisinfo.Subscopeclass) > 0 {
+		sort.Strings(pInfo.Subscopeclass)
+		for _, k := range thisinfo.Subscopeclass {
+			if BinarySearch(pInfo.Subscopeclass, k) == -1 {
+				pInfo.Subscopeclass = append(pInfo.Subscopeclass, k)
+				sort.Strings(pInfo.Subscopeclass)
+			}
+		}
+	}
+	//winner
+	if len(thisinfo.Winners) > 0 {
+		sort.Strings(pInfo.Winners)
+		for _, k := range thisinfo.Winners {
+			if BinarySearch(pInfo.Winners, k) == -1 {
+				pInfo.Winners = append(pInfo.Winners, k)
+				sort.Strings(pInfo.Winners)
+			}
+		}
+	}
+	//winnerorder
+	//	if len(thisinfo.Winnerorder) > 0 {
+	//		sort.Strings(pInfo.Winnerorder)
+	//		for _, k := range thisinfo.Winnerorder {
+	//			//if BinarySearch(pInfo.Winnerorder, k) == -1 {
+	//				pInfo.Winnerorder = append(pInfo.Winnerorder, k)
+	//				sort.Strings(pInfo.Winnerorder)
+	//			//}
+	//		}
+	//	}
+}

二進制
fullproject/src_dev5.zip


+ 14 - 0
fullproject/src_dev5/config.json

@@ -0,0 +1,14 @@
+{
+    "loadData":true,
+    "mongodbServers": "192.168.3.207:27082",
+    "mongodbPoolSize": 10,
+    "mongodbName": "cesuo",
+    "extractColl": "key1_biddingall",
+    "projectColl": "projectset_0809",
+    "jkmail": {
+        "to": "zhangjinkun@topnet.net.cn",
+        "api": "http://10.171.112.160:19281/_send/_mail"
+    },
+    "udpport": ":1482",
+    "nextNode": []
+}

+ 368 - 0
fullproject/src_dev5/init.go

@@ -0,0 +1,368 @@
+package main
+
+import (
+	"log"
+	"math"
+	mu "mfw/util"
+	"qfw/util"
+	"qfw/util/mongodb"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+const (
+	ProjectCache = "info" //存放每条项目信息,key为项目ID
+)
+
+var (
+	Sysconfig                map[string]interface{} //读取配置文件
+	MongoTool                mongodb.MongodbSim     //mongodb连接
+	ExtractColl, ProjectColl string                 //抽取表、项目表
+)
+
+var (
+	//判断是日期
+	_datereg   = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
+	_numreg1   = regexp.MustCompile("^[0-9-]{1,8}$")
+	_zimureg1  = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
+	_nzreg     = regexp.MustCompile("^[0-9a-zA-Z-]+$")
+	_hanreg    = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
+	replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
+	//判断带有分包、等特定词的
+	pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
+	//判断包含数值
+	nreg1 = regexp.MustCompile("[0-9]{2,}")
+	//判断包含字母
+	zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
+	//判断包含汉字
+	hreg1 = regexp.MustCompile(`[\p{Han}]+`)
+	//判断项目编号是在10以内的纯数字结构
+	numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
+	//仅初始化使用
+	compareNoPass = map[string]bool{}
+	compareAB     = map[string]bool{}
+	compareAB2D   = map[string]bool{}
+	compareABD    = map[string]bool{}
+	compareAB2CD  = map[string]bool{}
+	compareABCD   = map[string]bool{}
+)
+
+func init() {
+	util.ReadConfig(&Sysconfig)
+	MongoTool = mongodb.MongodbSim{
+		MongodbAddr: Sysconfig["mongodbServers"].(string),
+		Size:        util.IntAll(Sysconfig["mongodbPoolSize"]),
+		DbName:      Sysconfig["mongodbName"].(string),
+	}
+	MongoTool.InitPool()
+	ExtractColl = Sysconfig["extractColl"].(string)
+	ProjectColl = Sysconfig["projectColl"].(string)
+
+	udpport, _ := Sysconfig["udpport"].(string)
+	udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	log.Println("Udp服务监听", udpport)
+
+	//加载项目数据
+
+	//---不能通过
+	vm := []string{"C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareNoPass[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------")
+
+	//三个元素一致 [AB][AB][AB],分值最高
+	vm = []string{"A", "B"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				compareAB[key] = true
+				//fmt.Println(key)
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB))
+	//---至少两个一致,其他可能不存在
+	//[AB][AB][ABD]
+	//[AB][ABD][AB]
+	vm = []string{"A", "B"}
+	vm2 := []string{"A", "B", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] {
+					compareAB2D[key] = true
+					//fmt.Println(key)
+
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2D))
+	//---至少一个一致,其他可能不存在
+	//[ABD][ABD][ABD] //已经删除DDD
+	vm = []string{"A", "B", "D"}
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
+					compareABD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareABD))
+
+	//[AB][ABCD][AB]
+	//[AB][AB][ABCD]
+	vm = []string{"A", "B"}
+	vm2 = []string{"A", "B", "C", "D"}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 2; k++ {
+				key := vm[i] + vm2[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	for i := 0; i < 2; i++ {
+		for j := 0; j < 2; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm2[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
+					compareAB2CD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+	//fmt.Println("-------------------", len(compareAB2CD))
+	//[ABECD][ABECD][ABECD]  //已经删除[CD][CD][CD]   //这个要重点讨论
+	vm = []string{"A", "B", "C", "D"}
+	for i := 0; i < 4; i++ {
+		for j := 0; j < 4; j++ {
+			for k := 0; k < 4; k++ {
+				key := vm[i] + vm[j] + vm[k]
+				if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
+					compareABCD[key] = true
+					//fmt.Println(key)
+				}
+			}
+		}
+	}
+}
+
+func CheckHanAndNum(str string) (b bool) {
+	return nreg1.MatchString(str) && hreg1.MatchString(str)
+}
+func CheckZimuAndNum(str string) (b bool) {
+	return zreg1.MatchString(str) && nreg1.MatchString(str)
+}
+
+type KeyMap struct {
+	Lock sync.Mutex
+	Map  map[string]*Key
+}
+
+type ID struct {
+	Id   string
+	Lock sync.Mutex
+	pos  int
+	P    *ProjectInfo
+}
+type Key struct {
+	Arr  []string
+	Lock sync.Mutex
+}
+type IdAndLock struct {
+	Id   string
+	Lock sync.Mutex
+}
+
+func NewKeyMap() *KeyMap {
+	return &KeyMap{
+		Map:  map[string]*Key{},
+		Lock: sync.Mutex{},
+	}
+}
+
+//招标信息实体类
+type Info struct {
+	Id            string                 `json:"_id"`
+	Href          string                 `json:"href"` //源地址
+	Publishtime   int64                  `json:"publishtime"`
+	Title         string                 `json:"title"`
+	TopType       string                 `json:"toptype"`
+	SubType       string                 `json:"subtype"`
+	ProjectName   string                 `json:"projectname"`
+	ProjectCode   string                 `json:"projectcode"`
+	Buyer         string                 `json:"buyer"`
+	Buyerperson   string                 `json:"buyerperson"`
+	Buyertel      string                 `json:"buyertel"`
+	Agency        string                 `json:"agency"`
+	Area          string                 `json:"area"`
+	City          string                 `json:"city"`
+	District      string                 `json:"district"`
+	HasPackage    bool                   `json:"haspackage"`
+	Package       map[string]interface{} `json:"package"`
+	PNum          string                 `json:"pnum"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"`
+	Buyerclass    string                 `json:"buyerclass"`
+	Bidopentime   int64                  `json:"bidopentime"`
+	Budget        float64                `json:"budget"`
+	Bidamount     float64                `json:"bidamount"`
+	Winners       []string
+	dealtype      int
+
+	Winnerorder []string
+
+	PTC    string //从标题中抽的项目编号
+	pnbval int    //项目名称、编号、采购单位存在的个数
+	LenPC  int    //项目编号长度
+	LenPN  int    //项目名称长度
+	LenPTC int    //标题抽的项目编号长度
+	//以下三个元素做对比,计算包含时候使用
+	PNBH  int //0初始,+包含,-被包含
+	PCBH  int
+	PTCBH int
+}
+
+//项目实体类
+type ProjectInfo struct {
+	Id            bson.ObjectId          `bson:"_id"`
+	FirstTime     int64                  `json:"firsttime"` //项目的最早时间
+	LastTime      int64                  `json:"lasttime"`  //项目的最后时间
+	Ids           []string               `json:"ids"`
+	Topscopeclass []string               `json:"topscopeclass"`
+	Subscopeclass []string               `json:"subscopeclass"` //子行业分类
+	Winners       []string               `json:"winners"`       //中标人
+	ProjectName   string                 `json:"projectname"`   //项目名称
+	ProjectCode   string                 `json:"projectcode"`   //项目代码唯一(纯数字的权重低)
+	Buyer         string                 `json:"buyer"`         //采购单位唯一
+	MPN           []string               `json:"mpn"`           //合并后多余的项目名称
+	MPC           []string               `json:"mpc"`           //合并后多余的项目编号
+	Buyerperson   string                 `json:"buyerperson"`   //采购联系人
+	Buyertel      string                 `json:"buyertel"`      //采购联系人电话
+	Agency        string                 `json:"agency"`        //代理机构
+	Area          string                 `json:"area"`          //地区
+	City          string                 `json:"city"`          //地市
+	District      string                 `json:"district"`      //区县
+	HasPackage    bool                   `json:"haspackage"`    //是否有分包
+	Package       map[string]interface{} `json:"package"`       //分包的对比对象
+	Buyerclass    string                 `json:"buyerclass"`    //采购单位分类
+	Bidopentime   int64                  `json:"bidopentime"`   //开标时间
+	//	Zbtime        int64                  `json:"zbtime"`        //招标时间
+	//	Jgtime        int64                  `json:"jgtime"`        //结果中标时间
+	Bidamount float64 `json:"bidamount"` //中标金额
+	Budget    float64 `json:"budget"`    //预算
+	//Winnerorder []string `json:"winnerorder"` //中标候选人
+	score  int
+	comStr string
+}
+
+//二分字符串查找
+func BinarySearch(s []string, k string) int {
+	sort.Strings(s)
+	lo, hi := 0, len(s)-1
+	for lo <= hi {
+		m := (lo + hi) >> 1
+		if s[m] < k {
+			lo = m + 1
+		} else if s[m] > k {
+			hi = m - 1
+		} else {
+			return m
+		}
+	}
+	return -1
+}
+
+//计算文本相似度
+func CosineSimilar(srcWords1, dstWords1 string) float64 {
+	srcWords, dstWords := strings.Split(srcWords1, ""), strings.Split(dstWords1, "")
+	// get all words
+	allWordsMap := make(map[string]int, 0)
+	for _, word := range srcWords {
+		if _, found := allWordsMap[word]; !found {
+			allWordsMap[word] = 1
+		} else {
+			allWordsMap[word] += 1
+		}
+	}
+	for _, word := range dstWords {
+		if _, found := allWordsMap[word]; !found {
+			allWordsMap[word] = 1
+		} else {
+			allWordsMap[word] += 1
+		}
+	}
+
+	// stable the sort
+	allWordsSlice := make([]string, 0)
+	for word, _ := range allWordsMap {
+		allWordsSlice = append(allWordsSlice, word)
+	}
+
+	// assemble vector
+	srcVector := make([]int, len(allWordsSlice))
+	dstVector := make([]int, len(allWordsSlice))
+	for _, word := range srcWords {
+		if index := BinarySearch(allWordsSlice, word); index != -1 {
+			srcVector[index] += 1
+		}
+	}
+	for _, word := range dstWords {
+		if index := BinarySearch(allWordsSlice, word); index != -1 {
+			dstVector[index] += 1
+		}
+	}
+
+	// calc cos
+	numerator := float64(0)
+	srcSq := 0
+	dstSq := 0
+	for i, srcCount := range srcVector {
+		dstCount := dstVector[i]
+		numerator += float64(srcCount * dstCount)
+		srcSq += srcCount * srcCount
+		dstSq += dstCount * dstCount
+	}
+	denominator := math.Sqrt(float64(srcSq * dstSq))
+
+	v1 := numerator / denominator
+	//	if v1 > 0.6 {
+	//		log.Println(v1, srcWords1, dstWords1)
+	//	}
+	return v1
+}

+ 70 - 0
fullproject/src_dev5/load_data.go

@@ -0,0 +1,70 @@
+package main
+
+import (
+	//"encoding/json"
+	"log"
+)
+
+//初始加载数据,默认加载最近6个月的数据
+
+func (p *ProjectTask) loadData(starttime int64) {
+	p.findLock.Lock()
+	defer p.findLock.Unlock()
+	p.AllIdsMapLock.Lock()
+	defer p.AllIdsMapLock.Unlock()
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	q := map[string]interface{}{
+		"lasttime": map[string]interface{}{"$gt": starttime},
+	}
+	it := sess.DB(MongoTool.DbName).C(p.coll).Find(&q).Iter()
+	tmp := &ProjectInfo{}
+	n := 0
+	for it.Next(tmp) {
+		n++
+		if n%1000 == 0 {
+			log.Println("current", n, "\n", tmp.Id, len(p.mapPn), len(p.mapPc), len(p.mapPb), tmp.ProjectName, tmp.MPN, tmp.ProjectCode, tmp.MPC, tmp.Buyer)
+		}
+		for _, v := range append([]string{tmp.ProjectName}, tmp.MPN...) {
+			if v != "" {
+				v = pcReplace.ReplaceAllString(v, "")
+				if v != "" {
+					k := p.mapPn[v]
+					if k == nil {
+						k = &Key{Arr: []string{tmp.Id.Hex()}}
+						p.mapPn[v] = k
+					} else {
+						k.Arr = append(k.Arr, tmp.Id.Hex())
+					}
+				}
+			}
+		}
+		for _, v := range append([]string{tmp.ProjectCode}, tmp.MPC...) {
+			if v != "" {
+				v = pcReplace.ReplaceAllString(v, "")
+				if v != "" {
+					k := p.mapPc[v]
+					if k == nil {
+						k = &Key{Arr: []string{tmp.Id.Hex()}}
+						p.mapPc[v] = k
+					} else {
+						k.Arr = append(k.Arr, tmp.Id.Hex())
+					}
+				}
+			}
+		}
+		if tmp.Buyer != "" && len([]rune(tmp.Buyer)) > 2 {
+			k := p.mapPb[tmp.Buyer]
+			if k == nil {
+				k = &Key{Arr: []string{tmp.Id.Hex()}}
+				p.mapPb[tmp.Buyer] = k
+			} else {
+				k.Arr = append(k.Arr, tmp.Id.Hex())
+			}
+		}
+		p.AllIdsMap[tmp.Id.Hex()] = &ID{Id: tmp.Id.Hex(), P: tmp}
+		tmp = &ProjectInfo{}
+	}
+
+	log.Println("load over..", n)
+}

+ 78 - 0
fullproject/src_dev5/main.go

@@ -0,0 +1,78 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	mu "mfw/util"
+	"net"
+	"time"
+)
+
+var (
+	udpclient    mu.UdpClient //udp对象
+	SingleThread = make(chan bool, 1)
+)
+
+func main() {
+	//udp跑增量  id段   project
+	//udp跑全量			ql
+	//udp跑历史数据  信息id1,id2/或id段  ls
+	//udp强制合并  信息id1,id2,id3 [项目id] 不存在时新建  qzhb
+	//udp强制拆分  项目id,信息id1,id2          qzcf
+	//udp重新合并  信息id1,id2,id3             cxhb
+	P_QL.loadData(0)
+	time.Sleep(99999 * time.Hour)
+}
+
+//udp调用信号
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	switch act {
+	case mu.OP_TYPE_DATA: //上个节点的数据
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		log.Println("err:", err, "mapInfo:", mapInfo)
+		if err != nil {
+			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+		} else if mapInfo != nil {
+			key, _ := mapInfo["key"].(string)
+			if key == "" {
+				key = "udpok"
+			}
+			go udpclient.WriteUdp([]byte(key), mu.OP_NOOP, ra)
+			SingleThread <- true
+			tasktype, _ := mapInfo["stype"].(string)
+			log.Println("tasktype:", tasktype)
+			switch tasktype {
+			case "ql": //全量合并
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					P_QL.currentType = tasktype
+					P_QL.taskQl(mapInfo)
+				}()
+			case "project": //增量合并,未抽取到项目名称或项目编号的不合并  bidding中mergestatus 1已合并 2字段问题不合并 3历史待合并
+				//合同、验收公告在6个月内查询不到可扩展到两年
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+					P_QL.currentType = tasktype
+					P_QL.taskZl(mapInfo)
+				}()
+
+			case "history": //历史数据合并,暂时不写
+				go func() {
+					defer func() {
+						<-SingleThread
+					}()
+				}()
+			}
+		}
+	case mu.OP_NOOP: //下个节点回应
+		ok := string(data)
+		if ok != "" {
+			log.Println("ok:", ok)
+		}
+	}
+}

+ 187 - 0
fullproject/src_dev5/merge_comparepnc.go

@@ -0,0 +1,187 @@
+package main
+
+import (
+	"strings"
+)
+
+//对比项目名称、项目编号
+
+func comparePNC(info *Info, compareProject *ProjectInfo) (compareStr string, score int) {
+	if info.ProjectName != "" {
+		pns := []string{}
+		if compareProject.ProjectName != "" {
+			pns = append(pns, compareProject.ProjectName)
+		}
+		if len(compareProject.MPN) > 0 {
+			pns = append(pns, compareProject.MPN...)
+		}
+		ifind := 0
+		templen := 0
+		for _, v := range pns {
+			if info.ProjectName == v {
+				ifind = 1
+				break
+			} else {
+				//if strings.Contains(info.ProjectName, v) || strings.Contains(v, info.ProjectName) ||
+				retv := CheckContain(info.ProjectName, v)
+				if retv == 1 {
+					ifind = 1
+					break
+				} else {
+					v1 := CosineSimilar(info.ProjectName, v)
+					if retv == 2 || v1 > 0.81 {
+						templen = len([]rune(v))
+						ifind = 2
+					} else if ifind == 0 {
+						ifind = 3
+					}
+				}
+			}
+		}
+		switch ifind {
+		case 0:
+			compareStr = "D"
+		case 1:
+			compareStr = "A"
+			score += 4
+			if len([]rune(info.ProjectName)) > 18 {
+				score += 2
+			}
+		case 2:
+			compareStr = "B"
+			score += 2
+			if templen > info.LenPN {
+				templen = info.LenPN
+			}
+			info.PNBH = templen
+			if templen > 12 {
+				score += 1
+			}
+		case 3:
+			compareStr = "C"
+		}
+	} else {
+		compareStr = "D"
+	}
+
+	/*
+				项目编号 - -()() 要注意
+				init_text = ["号","(重)","(第二次)","(重)"]
+		all_clean_mark = ["[","(","【","(","〖","]",")","】",")","〗","-","〔","〕","《","[","]","{","}","{","—"," ","-","﹝","﹞","–"]
+	*/
+	for index, pc := range []string{info.ProjectCode, info.PTC} {
+		if pc != "" {
+			pcs := []string{}
+			if compareProject.ProjectCode != "" {
+				pcs = append(pcs, compareProject.ProjectCode)
+			}
+			if len(compareProject.MPC) > 0 {
+				pcs = append(pcs, compareProject.MPC...)
+			}
+			ifind := 0
+			templen := 0
+			for _, v := range pcs {
+				if pc == v {
+					ifind = 1
+					break
+				} else {
+					// math.Abs(float64(len([]rune(pc))-len([]rune(v)))) < 6
+					//if !_numreg1.MatchString(pc) && !_zimureg1.MatchString(pc) && !_numreg1.MatchString(v) && !_zimureg1.MatchString(v)
+					if strings.Contains(pc, v) || strings.Contains(v, pc) {
+						t1 := pc
+						t2 := v
+						if len(v) > len(pc) {
+							t1 = v
+							t2 = pc
+						}
+						t3 := strings.Replace(t1, t2, "", -1)
+						t3 = _datereg.ReplaceAllString(t3, "")
+						if t3 == "" {
+							ifind = 1
+							break
+						} else {
+							ifind = 2
+							templen = len([]rune(v))
+						}
+					} else if ifind == 0 {
+						ifind = 3
+					}
+				}
+			}
+			switch ifind {
+			case 0:
+				compareStr += "D"
+			case 1:
+				compareStr += "A"
+				score += 4
+				if len([]rune(pc)) > 18 {
+					score += 2
+				}
+			case 2:
+				compareStr += "B"
+				score += 2
+				if index == 0 {
+					if templen > info.LenPC {
+						templen = info.LenPC
+					}
+					info.PCBH = templen
+					if templen > 12 {
+						score += 1
+					}
+
+				} else {
+					if templen > info.LenPTC {
+						templen = info.LenPTC
+					}
+					info.PTCBH = templen
+					if templen > 12 {
+						score += 1
+					}
+				}
+
+			case 3:
+				compareStr += "C"
+			}
+
+		} else {
+			compareStr += "D"
+		}
+	}
+	return
+}
+
+func CheckContain(b1, b2 string) (res int) {
+	b1 = replaceStr.ReplaceAllString(b1, "")
+	b2 = replaceStr.ReplaceAllString(b2, "")
+
+	if b1 == b2 {
+		res = 1 //相等
+		return
+	}
+	bs1 := []rune(b1)
+	bs2 := []rune(b2)
+	tmp := ""
+	for i := 0; i < len(bs1); i++ {
+		for j := 0; j < len(bs2); j++ {
+			if bs1[i] == bs2[j] {
+				tmp += string(bs1[i])
+			} else if tmp != "" {
+				b1 = strings.Replace(b1, tmp, "", -1)
+				b2 = strings.Replace(b2, tmp, "", -1)
+				tmp = ""
+			}
+		}
+	}
+	if tmp != "" {
+		b1 = strings.Replace(b1, tmp, "", -1)
+		b2 = strings.Replace(b2, tmp, "", -1)
+	}
+	if b1 == b2 {
+		res = 1 //相等
+	} else if b1 == "" || b2 == "" {
+		res = 2 //包含
+	} else {
+		res = 3 //不相等
+	}
+	return
+}

+ 510 - 0
fullproject/src_dev5/merge_select.go

@@ -0,0 +1,510 @@
+package main
+
+//根据字符特征打分
+//3为最高分,pj为评级 A AD A  AA AA AB
+func Select(compareStr string, info *Info, compareInfo *ProjectInfo) (res, pj int) {
+	//没有可对比的项目名称、或项目编号 //评级
+	if compareNoPass[compareStr] {
+
+	} else {
+		switch compareStr {
+		case "AAA":
+			res = 3
+			pj = 3
+		case "AAB":
+			res = 3
+			pj = 3
+		case "ABA":
+			res = 3
+			pj = 3
+		case "ABB":
+			if info.LenPN > 10 || info.PCBH > 8 || info.PTCBH > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BAA":
+			if info.PNBH > 10 || info.LenPC > 8 || info.LenPTC > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BAB":
+			if info.PNBH > 10 || info.LenPTC > 8 || info.PTCBH > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BBA":
+			if info.PNBH > 10 || info.PCBH > 8 || info.LenPC > 8 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "BBB":
+			v := 0
+			if info.PNBH > 10 {
+				v++
+			}
+			if info.PCBH > 8 {
+				v++
+			}
+			if info.PTCBH > 8 {
+				v++
+			}
+			if v > 1 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 2
+		case "AAD":
+			if info.LenPC > 8 || info.LenPN > 12 {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 3
+		case "ABD":
+			if info.LenPN > 10 && info.PCBH > 8 {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BAD":
+			if info.LenPC > 13 || (info.PNBH > 10 && info.LenPC > 8) {
+				res = 3
+				pj = 3
+			} else if info.PNBH > 10 || info.LenPC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BBD":
+			if info.PNBH > 12 && info.PCBH > 10 {
+				res = 3
+				pj = 1
+			} else if info.PNBH > 10 && info.PCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "ADA":
+			if info.LenPN > 12 || (info.LenPTC > 8 && !StrOrNum2.MatchString(info.PTC)) {
+				res = 3
+			} else {
+				res = 2
+			}
+			pj = 2
+		case "ADB":
+			if info.LenPN > 10 && info.PTCBH > 8 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PTCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BDA":
+			if info.PNBH > 10 && info.LenPTC > 8 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.PNBH > 10 || info.LenPTC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BDB":
+			if info.PNBH > 12 && info.PTCBH > 10 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+			} else if info.PNBH > 10 && info.PTCBH > 8 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 2
+		case "ADD":
+			if info.LenPN > 18 {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "BDD":
+			if info.PNBH > 10 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "DAA":
+			if info.LenPTC > 8 || info.LenPC > 8 {
+				res = 3
+				pj = 2
+			} else {
+				res = 2
+				pj = 3
+			}
+		case "DAB":
+			if info.LenPC > 8 && info.PTCBH > 8 {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 8 || info.PTCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "DAD":
+			if info.LenPC > 14 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "DBA":
+			if info.PCBH > 8 && info.LenPC > 8 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.PCBH > 8 || info.LenPC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "DBB":
+			if info.PCBH > 10 && info.PTCBH > 10 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 1
+			} else if info.PCBH > 8 && info.PTCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "DBD":
+			if info.PCBH > 12 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+				pj = 1
+			} else {
+				res = 1
+				pj = 1
+			}
+		case "DDA":
+			if info.LenPTC > 14 && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 1
+			} else if info.LenPTC > 8 {
+				res = 2
+				pj = 1
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "DDB":
+			if info.PTCBH > 12 && !StrOrNum2.MatchString(info.PTC) {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "ACA":
+			if info.LenPN > 10 && info.LenPTC > 8 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.LenPTC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "ACB":
+			if info.LenPN > 10 && info.PTCBH > 8 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PTCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BCA":
+			if (info.PNBH > 10 && info.LenPTC > 8) || info.LenPTC > 12 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 2
+			} else if info.PNBH > 10 || info.LenPTC > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BCB":
+			if info.PNBH > 12 && info.PTCBH > 12 && info.LenPC != len([]rune(compareInfo.ProjectCode)) && !StrOrNum2.MatchString(info.PTC) {
+				res = 3
+				pj = 1
+			} else if info.PNBH > 10 || info.PTCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "AAC":
+			if (info.LenPN > 10 && info.LenPC > 8) || info.LenPN > 14 || (info.LenPC > 10 && !StrOrNum2.MatchString(info.ProjectCode)) {
+				res = 3
+				pj = 3
+			} else {
+				res = 2
+				pj = 3
+			}
+		case "ABC":
+			if info.LenPN > 14 && info.PCBH > 10 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 2
+			} else if info.LenPN > 10 || info.PCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BAC":
+			if info.PNBH > 14 && info.LenPC > 8 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 2
+			} else if info.PNBH > 10 || info.LenPC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "BBC":
+			if info.PNBH > 14 && info.PCBH > 10 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+				pj = 1
+			} else if info.PNBH > 10 || info.PCBH > 8 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "ACC":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPN > 16 {
+					res = 2
+					pj = 1
+				} else {
+					res = 1
+					pj = 2
+				}
+			}
+		case "ACD":
+			//项目编号不一致
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPN > 16 {
+
+					res = 2
+				} else {
+					res = 1
+
+				}
+				pj = 1
+			}
+
+		case "ADC":
+			if info.LenPN > 16 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "BCC":
+			//项目编号不一致
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PNBH > 12 {
+					res = 1
+				}
+				pj = 1
+			}
+		case "BCD":
+			//项目编号不一致
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PNBH > 8 {
+					res = 1
+				}
+				pj = 1
+			}
+		case "BDC":
+			if info.PNBH > 7 {
+				res = 1
+			}
+			pj = 1
+		case "CAA":
+			if info.LenPC > 12 || info.LenPTC > 12 {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 8 || info.LenPTC > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "CAB":
+			if info.LenPC > 12 && info.PTCBH > 8 {
+				res = 3
+				pj = 2
+			} else if info.LenPC > 12 || info.PTCBH > 8 {
+				res = 2
+				pj = 3
+			} else {
+				res = 1
+				pj = 3
+			}
+		case "CAC":
+			if info.LenPC > 9 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 1
+			}
+		case "CAD":
+			if info.LenPC > 9 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "CBA":
+			if info.LenPTC > 14 && info.PCBH > 12 {
+				res = 3
+				pj = 2
+			} else if info.LenPTC > 12 || info.PCBH > 10 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "CBB":
+			if info.PCBH > 13 && info.PTCBH > 13 {
+				res = 3
+				pj = 1
+			} else if info.PCBH > 9 || info.PTCBH > 9 {
+				res = 2
+				pj = 2
+			} else {
+				res = 1
+				pj = 2
+			}
+		case "CBC":
+			if info.PCBH > 14 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+			} else if info.PCBH > 5 {
+				res = 1
+			}
+			pj = 1
+		case "CBD":
+			if info.PCBH > 14 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 2
+			} else if info.PCBH > 5 {
+				res = 1
+			}
+			pj = 1
+		case "CCA":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPTC > 12 && !StrOrNum2.MatchString(info.PTC) {
+					res = 2
+				} else if info.LenPTC > 5 {
+					res = 1
+				}
+				pj = 1
+			}
+		case "CCB":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PTCBH > 10 && !StrOrNum2.MatchString(info.PTC) {
+					res = 1
+				}
+				pj = 1
+			}
+		case "CDA":
+			if info.LenPTC > 12 && !StrOrNum2.MatchString(info.PTC) {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "CDB":
+			if info.PTCBH > 10 && !StrOrNum2.MatchString(info.PTC) {
+				res = 1
+				pj = 1
+			}
+		case "DAC":
+			if info.LenPC > 13 && !StrOrNum2.MatchString(info.ProjectCode) {
+				res = 3
+			} else if info.LenPC > 8 {
+				res = 2
+			} else {
+				res = 1
+			}
+			pj = 1
+		case "DBC":
+			if info.PCBH > 8 {
+				res = 1
+			}
+			pj = 1
+		case "DCA":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.LenPTC > 10 {
+					res = 2
+				} else {
+					res = 1
+				}
+				pj = 1
+			}
+		case "DCB":
+			if info.LenPC != len([]rune(compareInfo.ProjectCode)) {
+				if info.PTCBH > 8 && !StrOrNum2.MatchString(info.PTC) {
+					res = 1
+				}
+				pj = 1
+			}
+		}
+
+	}
+	return
+}

+ 711 - 0
fullproject/src_dev5/project.go

@@ -0,0 +1,711 @@
+package main
+
+import (
+	"log"
+	//	"log"
+	"math"
+	qu "qfw/util"
+	"sort"
+	"strings"
+	"time"
+
+	"gopkg.in/mgo.v2/bson"
+)
+
+/**
+项目合并,对比,计算,合并,生成项目
+**/
+
+//从对应map中获取对比的项目id
+func (p *ProjectTask) getCompareIds(pn, pc, ptc, pb string) (bpn, bpc, bptc, bpb int, res []*Key, idArr []string, IDArr []*ID) {
+	p.findLock.Lock()
+	defer p.findLock.Unlock()
+	p.wg.Add(1)
+	//查找到id数组
+	res = []*Key{}
+	//是否查找到,并标识位置。-1代表值为空或已经存在。
+	bpn, bpc, bptc, bpb = -1, -1, -1, -1
+	if pn != "" {
+		ids := p.mapPn[pn]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			p.mapPn[pn] = ids
+			bpn = 0
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pc != "" {
+		ids := p.mapPc[pc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			p.mapPc[pc] = ids
+			bpc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if ptc != "" {
+		ids := p.mapPc[ptc]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			p.mapPc[ptc] = ids
+			bptc = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+
+	if pb != "" {
+		ids := p.mapPb[pb]
+		if ids == nil {
+			ids = &Key{Arr: []string{}}
+			p.mapPb[pb] = ids
+			bpb = len(res)
+		}
+		ids.Lock.Lock()
+		res = append(res, ids)
+	}
+	repeatId := map[string]bool{}
+	idArr = []string{} //项目id
+	IDArr = []*ID{}    //项目信息
+	for _, m := range res {
+		for _, id := range m.Arr {
+			if !repeatId[id] {
+				repeatId[id] = true
+				//_, _ = strconv.ParseInt(id[0:8], 16, 64)
+				p.AllIdsMapLock.Lock()
+				Id := p.AllIdsMap[id]
+				p.AllIdsMapLock.Unlock()
+				if Id != nil {
+					Id.Lock.Lock()
+					idArr = append(idArr, id)
+					IDArr = append(IDArr, Id)
+				}
+			}
+		}
+	}
+	return
+}
+
+func (p *ProjectTask) startProjectMerge(info *Info, tmp map[string]interface{}) {
+	//只有或没有采购单位的无法合并
+	//bpn, bpc, bptc, bpb 是否查找到,并标识位置。-1代表未查找到。
+	//pids 是项目id数组集合
+	//IDArr,是单个项目ID对象集合
+	bpn, bpc, bptc, bpb, pids, _, IDArr := p.getCompareIds(info.ProjectName, info.ProjectCode, info.PTC, info.Buyer)
+	defer p.wg.Done()
+	//map--k为pn,ptn,pc,ptc,buyer值 v为Id数组和lock
+
+	for _, m := range pids {
+		defer m.Lock.Unlock()
+	}
+	for _, id := range IDArr {
+		defer id.Lock.Unlock()
+	}
+
+	bFindProject := false
+	findPid := ""
+	//获取完id,进行计算
+	//定义两组
+	comRes1 := []*ProjectInfo{} //优先级最高的对比结果数组
+	comRes2 := []*ProjectInfo{} //优化级其次
+	comRes3 := []*ProjectInfo{}
+	resVal, pjVal := 0, 0
+	for _, v := range IDArr {
+		comStr := ""
+		compareProject := v.P
+		compareProject.score = 0
+		//问题出地LastTime!!!!!
+		diffTime := int64(math.Abs(float64(info.Publishtime - compareProject.LastTime)))
+		if diffTime < 185*86400 {
+			//"A 相等 	B 被包含 	C 不相等	 	D不存在  E被包含
+			info.PNBH = 0
+			info.PCBH = 0
+			info.PTCBH = 0
+			compareStr, score := comparePNC(info, compareProject)
+
+			resVal, pjVal = Select(compareStr, info, compareProject)
+			//---------------------------------------
+			//log.Println(resVal, pjVal, compareProject)
+			if resVal > 0 {
+
+				compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount, score2 := p.compareBCTABB(info, compareProject, diffTime, score)
+
+				//项目名称、项目编号、标题项目编号、采购单位、省、市、发布时间、代理机构
+
+				comStr = compareStr + compareBuyer + compareCity + compareTime + compareAgency + compareBudget + compareBidmount
+				compareProject.comStr = comStr
+				//log.Println(compareProject.comStr)
+				eqV := 0
+				switch resVal {
+				case 3:
+					if pjVal == 3 && comStr[3:] != "CCCDCCC" {
+						eqV = 1
+					} else if compareBuyer < "C" {
+						if pjVal > 1 {
+							eqV = 1
+						} else { //if (compareCity[1:1] != "C" || compareTime != "D") && score2 > 0
+							eqV = 2
+						}
+					} else if compareBuyer == "D" {
+						if pjVal > 1 && (compareCity[1:1] != "C" || score2 > 0) {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+					} else {
+						if pjVal == 3 && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if pjVal == 2 && compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						} else if compareCity == "AA" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+					}
+				case 2:
+					if compareBuyer < "C" {
+						if pjVal > 1 {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" || score2 > 0 {
+							eqV = 3
+						}
+					} else if compareBuyer == "D" {
+						if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && score2 > 0 {
+							eqV = 3
+						}
+
+					} else {
+						if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 1) {
+							eqV = 3
+						}
+					}
+				case 1:
+					if compareBuyer < "C" {
+						if pjVal > 1 && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 1) {
+							eqV = 3
+						}
+					} else if compareBuyer == "D" {
+						if pjVal > 1 && compareTime == "A" && (score2 > 0 || compareCity[1:1] != "C") {
+							eqV = 2
+						} else if compareCity[1:1] != "C" && compareTime == "A" && (compareAgency == "A" || score2 > 1) {
+							eqV = 3
+						}
+					} else {
+						if pjVal > 1 && compareTime == "A" && score2 > 1 && compareCity[1:1] != "C" {
+							eqV = 3
+						}
+					}
+				}
+				if eqV == 1 {
+					comRes1 = append(comRes1, compareProject)
+				} else if eqV == 2 {
+					comRes2 = append(comRes2, compareProject)
+				} else if eqV == 3 {
+					comRes3 = append(comRes3, compareProject)
+				}
+				//				else if resVal == 3 || pjVal > 1 {
+				//					log.Println("===", resVal, pjVal, comStr, info.ProjectCode, compareProject.ProjectCode,
+				//						info.ProjectName, compareProject.ProjectName, info.Buyer, compareProject.Buyer, info.Id, compareProject.Id.Hex())
+				//				}
+			}
+		}
+	}
+	//--------------------------------对比完成-----------------------
+	//更新数组、更新项目
+	for kv, resN := range [][]*ProjectInfo{comRes1, comRes2, comRes3} {
+		if len(resN) > 0 {
+			if len(resN) > 1 {
+				sort.Slice(resN, func(i, j int) bool {
+					return resN[i].score > resN[j].score
+				})
+			}
+
+			bFindProject = true
+			findPid = resN[0].Id.Hex()
+			for k2, bv := range []int{bpn, bpc, bptc, bpb} {
+				if bv > -1 {
+					pids[bv].Arr = append(pids[bv].Arr, findPid)
+					if k2 == 0 {
+						if resN[0].ProjectName == "" {
+							resN[0].ProjectName = info.ProjectName
+						} else {
+							if resN[0].MPN == nil {
+								resN[0].MPN = []string{info.ProjectName}
+							} else {
+								resN[0].MPN = append(resN[0].MPN, info.ProjectName)
+							}
+						}
+
+					} else if k2 < 3 {
+						if resN[0].ProjectCode == "" {
+							resN[0].ProjectCode = qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)
+						} else {
+							if resN[0].MPC == nil {
+								resN[0].MPC = []string{qu.If(k2 == 1, info.ProjectCode, info.PTC).(string)}
+							} else {
+								resN[0].MPC = append(resN[0].MPC, qu.If(k2 == 1, info.ProjectCode, info.PTC).(string))
+							}
+						}
+
+					} else {
+						if resN[0].Buyer == "" {
+							resN[0].Buyer = info.Buyer
+						}
+					}
+				}
+			}
+			p.UpdateProject(tmp, info, resN[0], kv+1, resN[0].comStr, resVal, pjVal)
+			break
+		}
+	}
+
+	if !bFindProject {
+		//没有找到
+		id, p1 := p.NewProject(tmp, info)
+		p.AllIdsMapLock.Lock()
+		p.AllIdsMap[id] = &ID{Id: id, P: p1}
+		p.AllIdsMapLock.Unlock()
+		for _, m := range pids {
+			m.Arr = append(m.Arr, id)
+		}
+	}
+
+}
+
+func (p *ProjectTask) compareBCTABB(info *Info, cp *ProjectInfo, diffTime int64, score int) (compareBuyer, compareCity, compareTime, compareAgency, compareBudget, compareBidmount string, score2 int) {
+	compareBuyer = "D"
+	if len([]rune(info.Buyer)) > 3 && len([]rune(cp.Buyer)) > 3 {
+		v := CheckContain(info.Buyer, cp.Buyer)
+		if v == 1 {
+			compareBuyer = "A"
+			score += 3
+		} else {
+			v1 := CosineSimilar(info.Buyer, cp.Buyer)
+			if v == 2 || v1 > 0.8 {
+				compareBuyer = "B"
+				score += 1
+			} else {
+				compareBuyer = "C"
+			}
+		}
+	}
+	//---------------------------------------
+
+	compareCity = ""
+	if info.Area != "全国" && info.Area != "" && info.Area == cp.Area {
+		compareCity += "A"
+		score += 2
+	} else if info.Area == "全国" || cp.Area == "全国" {
+		compareCity += "B"
+		score += 1
+	} else {
+		compareCity += "C"
+	}
+	if compareCity != "C" {
+		if info.City != "" && info.City == cp.City {
+			compareCity += "A"
+			score += 2
+		} else {
+			if info.Area == "全国" || cp.Area == "全国" {
+				compareCity += "B"
+			} else if info.City == compareCity {
+				compareCity += "B"
+			} else {
+				compareCity += "C"
+			}
+		}
+	} else {
+		compareCity += "C"
+	}
+	score2 = 0
+	if compareCity == "AA" {
+		if info.District != "" && info.District == cp.District {
+			score2 = 1
+		}
+	}
+
+	compareTime = "D"
+	if diffTime < 45*86400 {
+		compareTime = "A"
+		score += 2
+	} else if diffTime < 90*86400 {
+		compareTime = "B"
+		score += 1
+	}
+
+	compareAgency = "D"
+	if info.Agency != "" {
+		if info.Agency == cp.Agency {
+			compareAgency = "A"
+			score += 2
+			score2 += 1
+		} else if cp.Agency != "" {
+			if strings.Contains(info.Agency, cp.Agency) || strings.Contains(cp.Agency, info.Agency) {
+				compareAgency = "B"
+				score += 1
+				score2 += 1
+			} else {
+				compareAgency = "C"
+			}
+		}
+	}
+	compareBudget = "C"
+	if info.Budget > 0 && (info.Budget == cp.Budget || (cp.Bidamount > 0 && info.Budget > cp.Bidamount && (info.Budget-cp.Bidamount) < (0.1*info.Budget))) {
+		compareBudget = "A"
+		score += 1
+		score2 += 1
+	} else if info.Budget == 0 && cp.Budget == 0 {
+		compareBudget = "B"
+	}
+	compareBidmount = "C"
+	if info.Bidamount > 0 && (info.Bidamount == cp.Bidamount || (cp.Budget > 0 && cp.Budget > info.Bidamount && (cp.Budget-info.Bidamount) < 0.1*cp.Budget)) {
+		compareBidmount = "A"
+		score += 1
+		score2 += 1
+	} else if info.Bidamount == 0 && cp.Bidamount == 0 {
+		compareBidmount = "B"
+	}
+
+	cp.score = score
+	return
+}
+
+var FIELDS = []string{
+	"area",
+	"city",
+	"district",
+	"projectname",
+	"projectcode",
+	"buyer",
+	"buyerclass",
+	"buyerperson",
+	"buyertel",
+	"winner",
+	"budget",
+	"bidamount",
+	"bidstatus",
+	"agency",
+	"projectscope",
+	"bidopentime",
+	"topscopeclass",
+	"subscopeclass",
+	"winnerorder",
+	"package",
+}
+
+var bidtype = map[string]string{
+	"招标": "招标",
+	"邀标": "邀标",
+	"询价": "询价",
+	"竞谈": "竞谈",
+	"单一": "单一",
+	"竞价": "竞价",
+}
+
+//招标时间zbtime、中标时间jgtime、项目状态bidstatus、招标类型bidtype、最后发布时间lasttime、首次发布时间firsttime
+
+func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (string, *ProjectInfo) {
+	pId := bson.NewObjectId()
+	p1 := p.NewCachePinfo(pId, thisinfo)
+	set := map[string]interface{}{}
+	set["_id"] = pId
+	for _, f := range FIELDS {
+		if tmp[f] != nil {
+			set[f] = tmp[f]
+		}
+	}
+	if thisinfo.ProjectName != "" {
+		set["s_projectname"] = tmp["projectname"] //兼容老版本
+	}
+	now := time.Now().Unix()
+	set["createtime"] = now
+	set["sourceinfoid"] = thisinfo.Id
+	set["sourceinfourl"] = tmp["href"]
+	set["firsttime"] = tmp["publishtime"]
+	set["lasttime"] = tmp["publishtime"]
+	set["pici"] = now
+	set["ids"] = []string{thisinfo.Id}
+	if thisinfo.TopType == "招标" {
+		set["zbtime"] = tmp["publishtime"]
+	} else if thisinfo.TopType == "结果" {
+		set["jgtime"] = tmp["publishtime"]
+	}
+	//招标类型
+	bt := bidtype[thisinfo.SubType]
+	if bt == "" {
+		bt = "招标"
+	}
+	set["bidtype"] = bt
+	if set["bidstatus"] == nil && thisinfo.TopType == "结果" {
+		set["bidstatus"] = thisinfo.SubType
+	}
+	if len(thisinfo.Subscopeclass) > 0 {
+		s_subscopeclass := strings.Join(thisinfo.Subscopeclass, ",")
+		set["s_subscopeclass"] = s_subscopeclass
+	}
+	if len(thisinfo.Winners) > 0 {
+		set["s_winner"] = strings.Join(thisinfo.Winners, ",")
+		p1.Winners = thisinfo.Winners
+	}
+	push := p.PushListInfo(tmp)
+	set["list"] = []bson.M{
+		push,
+	}
+	p.updatePool <- []map[string]interface{}{
+		map[string]interface{}{
+			"_id": pId,
+		},
+		map[string]interface{}{
+			"$set": set,
+		},
+	}
+	return pId.Hex(), &p1
+}
+
+var INFOFIELDS = []string{
+	"projectname",
+	"projectcode",
+	"title",
+	"href",
+	"publishtime",
+	"comeintime",
+	"bidopentime",
+	"toptype",
+	"subtype",
+	"buyer",
+	"buyerclass",
+	"agency",
+	"winner",
+	"budget",
+	"bidamount",
+	"topscopeclass",
+	"subscopclass",
+	"infoformat",
+}
+
+//项目中list的信息
+func (p *ProjectTask) PushListInfo(tmp map[string]interface{}) bson.M {
+	res := bson.M{
+
+		"infoid": qu.BsonIdToSId(tmp["_id"]),
+	}
+	for _, k := range INFOFIELDS {
+		if tmp[k] != nil {
+			res[k] = tmp[k]
+		}
+	}
+	return res
+}
+
+//生成存放在内存中的对象
+func (p *ProjectTask) NewCachePinfo(id bson.ObjectId, thisinfo *Info) ProjectInfo {
+	p1 := ProjectInfo{
+		Id:            id,
+		Ids:           []string{thisinfo.Id},
+		ProjectName:   thisinfo.ProjectName,
+		ProjectCode:   thisinfo.ProjectCode,
+		Buyer:         thisinfo.Buyer,
+		Buyerclass:    thisinfo.Buyerclass,
+		Buyerperson:   thisinfo.Buyerperson,
+		Buyertel:      thisinfo.Buyertel,
+		Topscopeclass: thisinfo.Topscopeclass,
+		Subscopeclass: thisinfo.Subscopeclass,
+		Agency:        thisinfo.Agency,
+		Area:          thisinfo.Area,
+		City:          thisinfo.City,
+		District:      thisinfo.District,
+		MPN:           []string{},
+		MPC:           []string{},
+		HasPackage:    thisinfo.HasPackage,
+		FirstTime:     thisinfo.Publishtime,
+		LastTime:      thisinfo.Publishtime,
+		Budget:        thisinfo.Budget,
+		Bidamount:     thisinfo.Bidamount,
+	}
+	if thisinfo.LenPTC > 5 {
+		p1.MPC = append(p1.MPC, thisinfo.PTC)
+	}
+	return p1
+}
+
+//更新项目
+func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info, pInfo *ProjectInfo, weight int, comStr string, resVal, pjVal int) {
+	if p.currentType != "ql" {
+		if BinarySearch(pInfo.Ids, thisinfo.Id) > -1 {
+			log.Println("repeat", thisinfo.Id)
+			return
+		}
+	}
+	set := map[string]interface{}{}
+	pInfo.Ids = append(pInfo.Ids, thisinfo.Id)
+
+	//1--firsttime
+	if thisinfo.Publishtime < pInfo.FirstTime && thisinfo.Publishtime > 0 {
+		pInfo.FirstTime = thisinfo.Publishtime
+		set["firsttime"] = thisinfo.Publishtime
+		if thisinfo.TopType == "招标" {
+			set["zbtime"] = tmp["publishtime"]
+		}
+	}
+	//2--lasttime
+	if thisinfo.Publishtime > pInfo.LastTime {
+		pInfo.LastTime = thisinfo.Publishtime
+		set["lasttime"] = thisinfo.Publishtime
+		bt := bidtype[thisinfo.SubType]
+		if bt != "" {
+			set["bidtype"] = bt
+		}
+		if thisinfo.TopType == "结果" {
+			set["bidstatus"] = thisinfo.SubType
+			set["jgtime"] = tmp["publishtime"]
+		}
+	}
+	//3\4\5--省、市、县
+	if thisinfo.Area != "全国" {
+		//xt := true
+		if pInfo.Area == "全国" {
+			pInfo.Area = thisinfo.Area
+			set["area"] = thisinfo.Area
+		} else if pInfo.Area != thisinfo.Area {
+			//xt = false
+		}
+		if pInfo.City == "" && thisinfo.City != "" {
+			pInfo.City = thisinfo.City
+			set["city"] = thisinfo.City
+		} else if pInfo.City != thisinfo.City {
+			//xt = false
+		}
+		if thisinfo.District != "" && pInfo.District == "" {
+			pInfo.District = thisinfo.District
+			set["district"] = thisinfo.District
+		}
+		//省市县有不相同的
+		//		if !xt {
+		//			log.Println(pInfo.Area, pInfo.City, thisinfo.Area, thisinfo.District)
+		//		}
+	}
+	//6--项目名称
+	if (thisinfo.ProjectName != "" && pInfo.ProjectName == "") || (len([]rune(pInfo.ProjectName)) < 6 && thisinfo.LenPN > 6) {
+		pInfo.ProjectName = thisinfo.ProjectName
+		set["projectname"] = thisinfo.ProjectName
+	}
+	//7--项目编号
+	if (pInfo.ProjectCode == "" && thisinfo.ProjectCode != "") || (len([]rune(pInfo.ProjectCode)) < 6 && len([]rune(thisinfo.ProjectCode)) > 6) {
+		pInfo.ProjectCode = thisinfo.ProjectCode
+		set["projectcode"] = thisinfo.ProjectCode
+	}
+	//7--采购单位
+	if (pInfo.Buyer == "" && thisinfo.Buyer != "") || (len([]rune(pInfo.Buyer)) < 5 && len([]rune(thisinfo.Buyer)) > 5) {
+		pInfo.Buyer = thisinfo.Buyer
+		set["buyer"] = thisinfo.Buyer
+	}
+	//8--代理机构
+	if (pInfo.Agency == "" && thisinfo.Agency != "") || (len([]rune(pInfo.Agency)) < 5 && len([]rune(thisinfo.Agency)) > 5) {
+		pInfo.Agency = thisinfo.Agency
+		set["agency"] = thisinfo.Agency
+	}
+	//9--采购单位联系人
+	if thisinfo.Buyerperson != "" && strings.Index(pInfo.Buyerperson, thisinfo.Buyerperson) < 0 {
+		pInfo.Buyerperson += thisinfo.Buyerperson
+		set["buyerperson"] = pInfo.Buyerperson
+	}
+	//10--采购单位電話
+	if thisinfo.Buyertel != "" && strings.Index(pInfo.Buyertel, thisinfo.Buyertel) < 0 {
+		pInfo.Buyertel += thisinfo.Buyertel
+		set["buyertel"] = pInfo.Buyertel
+	}
+
+	if thisinfo.Buyerclass != "" && pInfo.Buyerclass == "" {
+		pInfo.Buyerclass = thisinfo.Buyerclass
+		set["buyerclass"] = pInfo.Buyerclass
+	}
+	if thisinfo.Bidopentime > pInfo.Bidopentime {
+		pInfo.Bidopentime = thisinfo.Bidopentime
+		set["bidopentime"] = pInfo.Bidopentime
+	}
+	if thisinfo.Bidamount > 0 && pInfo.Bidamount < 1 {
+		pInfo.Bidamount = thisinfo.Bidamount
+		set["bidamount"] = pInfo.Bidamount
+	}
+
+	if thisinfo.Budget > 0 && pInfo.Budget < 1 {
+		pInfo.Budget = thisinfo.Budget
+		set["budget"] = pInfo.Budget
+	}
+
+	if len(thisinfo.Topscopeclass) > 0 {
+		sort.Strings(pInfo.Topscopeclass)
+		for _, k := range thisinfo.Topscopeclass {
+			if BinarySearch(pInfo.Topscopeclass, k) == -1 {
+				pInfo.Topscopeclass = append(pInfo.Topscopeclass, k)
+				sort.Strings(pInfo.Topscopeclass)
+			}
+		}
+		set["topscopeclass"] = pInfo.Topscopeclass
+	}
+
+	if len(thisinfo.Subscopeclass) > 0 {
+		sort.Strings(pInfo.Subscopeclass)
+		for _, k := range thisinfo.Subscopeclass {
+			if BinarySearch(pInfo.Subscopeclass, k) == -1 {
+				pInfo.Subscopeclass = append(pInfo.Subscopeclass, k)
+				sort.Strings(pInfo.Subscopeclass)
+			}
+		}
+		set["subscopeclass"] = pInfo.Subscopeclass
+		set["s_subscopeclass"] = strings.Join(pInfo.Subscopeclass, ",")
+	}
+	//winner
+	if len(thisinfo.Winners) > 0 {
+		sort.Strings(pInfo.Winners)
+		for _, k := range thisinfo.Winners {
+			if BinarySearch(pInfo.Winners, k) == -1 {
+				pInfo.Winners = append(pInfo.Winners, k)
+				sort.Strings(pInfo.Winners)
+			}
+		}
+		set["winners"] = pInfo.Winners
+		set["s_winner"] = strings.Join(pInfo.Winners, ",")
+	}
+
+	set["mpn"] = pInfo.MPN
+	set["mpc"] = pInfo.MPC
+
+	if thisinfo.HasPackage {
+		set["multipackage"] = 1
+	} else {
+		set["multipackage"] = 0
+	}
+
+	update := map[string]interface{}{}
+	if len(set) > 0 {
+		update["$set"] = set
+	}
+	//保留原数据吧
+	push := p.PushListInfo(tmp)
+	push["compareStr"] = comStr
+	push["resVal"] = resVal
+	push["pjVal"] = pjVal
+	update["$push"] = map[string]interface{}{
+		"list": push,
+		"ids":  thisinfo.Id,
+	}
+	if len(update) > 0 {
+		updateInfo := []map[string]interface{}{
+			map[string]interface{}{
+				"_id": pInfo.Id,
+			},
+			update,
+		}
+		p.updatePool <- updateInfo
+	}
+}

+ 387 - 0
fullproject/src_dev5/task.go

@@ -0,0 +1,387 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"qfw/util"
+	"regexp"
+	"strings"
+	"sync"
+	//"strings"
+	"time"
+
+	"github.com/robfig/cron"
+)
+
+/**
+任务入口
+全量、增量合并
+更新、插入,内存清理
+转换成info对象
+**/
+
+//项目合并对象
+type ProjectTask struct {
+	InitMinTime int64 //最小时间,小于0的处理一次
+	name        string
+	thread      int //线程数
+	//查找锁
+	findLock sync.Mutex
+	wg       sync.WaitGroup
+	//map锁
+	AllIdsMapLock sync.Mutex
+	//对应的id
+	AllIdsMap map[string]*ID
+	//采购单位、项目名称、项目编号
+	mapPb, mapPn, mapPc map[string]*Key
+	//更新或新增通道
+	updatePool chan []map[string]interface{}
+	//表名
+	coll string
+	//当前状态是全量还是增量
+	currentType string //当前是跑全量还是跑增量
+	//
+	clearContimes int
+	//当前时间
+	currentTime int64
+	//保存长度
+	saveSize int
+}
+
+func NewPT() *ProjectTask {
+	return &ProjectTask{
+		InitMinTime: int64(1325347200),
+		name:        "全/增量对象",
+		thread:      3,
+		updatePool:  make(chan []map[string]interface{}, 2000),
+		wg:          sync.WaitGroup{},
+		AllIdsMap:   make(map[string]*ID, 5000000),
+		mapPb:       make(map[string]*Key, 5000000),
+		mapPn:       make(map[string]*Key, 5000000),
+		mapPc:       make(map[string]*Key, 5000000),
+		saveSize:    200,
+		coll:        ProjectColl,
+	}
+}
+
+var P_QL *ProjectTask
+
+//初始化全量合并对象
+func init() {
+	P_QL = NewPT()
+	go P_QL.updateQueue()
+	go P_QL.clearMem()
+
+}
+
+//项目保存和更新通道
+func (p *ProjectTask) updateQueue() {
+	arr := make([][]map[string]interface{}, p.saveSize)
+	index := 0
+	for {
+		select {
+		case v := <-p.updatePool:
+			arr[index] = v
+			index++
+			if index == p.saveSize {
+				MongoTool.UpSertBulk(p.coll, arr...)
+				arr = make([][]map[string]interface{}, p.saveSize)
+				index = 0
+			}
+		case <-time.After(2 * time.Second):
+			if index > 0 {
+				MongoTool.UpSertBulk(p.coll, arr[:index]...)
+				arr = make([][]map[string]interface{}, p.saveSize)
+				index = 0
+			}
+		}
+	}
+}
+
+//项目合并内存更新
+func (p *ProjectTask) clearMem() {
+	c := cron.New()
+	//在内存中保留最近6个月的信息
+	validTime := int64(6 * 30 * 86400)
+	//跑全量时每4分钟跑一次,跑增量时400分钟跑一次
+	c.AddFunc("50 0/4 * * * *", func() {
+		if p.currentType == "ql" || p.clearContimes >= 100 {
+			//跳过的次数清零
+			p.clearContimes = 0
+			//信息进入查找对比全局锁
+			p.findLock.Lock()
+			defer p.findLock.Unlock()
+			//合并进行的任务都完成
+			p.wg.Wait()
+			//遍历id
+			//所有内存中的项目信息
+			p.AllIdsMapLock.Lock()
+			defer p.AllIdsMapLock.Unlock()
+			//清除计数
+			clearNum := 0
+			for k, v := range p.AllIdsMap {
+				if p.currentTime-v.P.LastTime > validTime {
+					clearNum++
+					//删除id的map
+					delete(p.AllIdsMap, k)
+					//删除pb
+					if v.P.Buyer != "" {
+						ids := p.mapPb[v.P.Buyer]
+						if ids != nil {
+							ids.Lock.Lock()
+							ids.Arr = deleteSlice(ids.Arr, k)
+							if len(ids.Arr) == 0 {
+								delete(p.mapPb, v.P.Buyer)
+							}
+							ids.Lock.Unlock()
+						}
+					}
+					//删除mapPn
+					for _, vn := range append([]string{v.P.ProjectName}, v.P.MPN...) {
+						if vn != "" {
+							ids := p.mapPn[vn]
+							if ids != nil {
+								ids.Lock.Lock()
+								ids.Arr = deleteSlice(ids.Arr, k)
+								if len(ids.Arr) == 0 {
+									delete(p.mapPn, vn)
+								}
+								ids.Lock.Unlock()
+							}
+						}
+					}
+					//删除mapPc
+					for _, vn := range append([]string{v.P.ProjectCode}, v.P.MPC...) {
+						if vn != "" {
+							ids := p.mapPc[vn]
+							if ids != nil {
+								ids.Lock.Lock()
+								ids.Arr = deleteSlice(ids.Arr, k)
+								if len(ids.Arr) == 0 {
+									delete(p.mapPc, vn)
+								}
+								ids.Lock.Unlock()
+							}
+						}
+					}
+					v = nil
+				}
+			}
+			log.Println("清除完成:", clearNum, len(p.AllIdsMap))
+		} else {
+			p.clearContimes++
+		}
+	})
+	c.Start()
+	select {}
+}
+
+//全量合并
+func (p *ProjectTask) taskQl(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	//1、检查pubilshtime索引
+	db, _ := udpInfo["db"].(string)
+	if db == "" {
+		db = MongoTool.DbName
+	}
+	coll, _ := udpInfo["coll"].(string)
+	if coll == "" {
+		coll = ExtractColl
+	}
+	sess := MongoTool.GetMgoConn()
+	bcon := false
+	if sess.DB(db).C(coll).EnsureIndexKey("publishtime_1", "publishtime_-1") == nil {
+		bcon = true
+	} else {
+		log.Println("publishtime_1索引不存在")
+	}
+	MongoTool.DestoryMongoConn(sess)
+	thread := util.IntAllDef(udpInfo["thread"], 3)
+	if thread > 0 {
+		p.thread = thread
+	}
+	if bcon {
+		//生成查询语句执行
+		p.enter(db, coll, map[string]interface{}{})
+
+	}
+}
+
+//增量合并
+func (p *ProjectTask) taskZl(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	//1、检查pubilshtime索引
+	db, _ := udpInfo["db"].(string)
+	if db == "" {
+		db = MongoTool.DbName
+	}
+	coll, _ := udpInfo["coll"].(string)
+	if coll == "" {
+		coll = ExtractColl
+	}
+	thread := util.IntAllDef(udpInfo["thread"], 3)
+	if thread > 0 {
+		p.thread = thread
+	}
+	//开始id和结束id
+	q, _ := udpInfo["query"].(map[string]interface{})
+	if q == nil {
+		q = map[string]interface{}{
+			"_id": map[string]interface{}{
+				"$gt":  util.StringTOBsonId(udpInfo["gtid"].(string)),
+				"$lte": util.StringTOBsonId(udpInfo["lteid"].(string)),
+			},
+		}
+	}
+	if q != nil {
+		//生成查询语句执行
+		p.enter(db, coll, q)
+	}
+
+}
+
+func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
+	defer util.Catch()
+	sess := MongoTool.GetMgoConn()
+	defer MongoTool.DestoryMongoConn(sess)
+	query := sess.DB(db).C(coll).Find(q).Sort("publishtime").Iter()
+	pool := make(chan bool, p.thread)
+	count := 0
+	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
+		info := ParseInfo(tmp)
+		if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
+			pool <- true
+			go func(info *Info, tmp map[string]interface{}) {
+				defer func() {
+					p.currentTime = info.Publishtime
+					<-pool
+				}()
+				p.startProjectMerge(info, tmp)
+			}(info, tmp)
+		} else {
+			//信息错误,进行更新
+		}
+		if count%1000 == 0 {
+			log.Println("current", count)
+		}
+		tmp = make(map[string]interface{})
+	}
+	//阻塞
+	for n := 0; n < p.thread; n++ {
+		pool <- true
+	}
+	log.Println("所有线程执行完成...", count)
+
+}
+
+var (
+	//从标题获取项目编号
+	titleGetPc  = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
+	titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
+	titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
+	//项目编号过滤
+	pcReplace = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-;{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$|[ __]+|((采购)?项目|采购(项目)?)$")
+	//项目编号只是数字或只是字母4个以下
+	StrOrNum = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$")
+	//纯数字或纯字母
+	StrOrNum2 = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$")
+)
+
+func ParseInfo(tmp map[string]interface{}) (info *Info) {
+	bys, _ := json.Marshal(tmp)
+	var thisinfo *Info
+	json.Unmarshal(bys, &thisinfo)
+	if thisinfo == nil {
+		return nil
+	}
+	if len(thisinfo.Topscopeclass) == 0 {
+		thisinfo.Topscopeclass = []string{}
+	}
+	if len(thisinfo.Subscopeclass) == 0 {
+		thisinfo.Subscopeclass = []string{}
+	}
+
+	//从标题中查找项目编号
+	res := titleGetPc.FindStringSubmatch(thisinfo.Title)
+	if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+		thisinfo.PTC = res[1]
+	} else {
+		res = titleGetPc1.FindStringSubmatch(thisinfo.Title)
+		if len(res) > 3 && len(res[3]) > 6 && thisinfo.ProjectCode != res[3] && !numCheckPc.MatchString(res[3]) && !_zimureg1.MatchString(res[3]) {
+			thisinfo.PTC = res[3]
+		} else {
+			res = titleGetPc2.FindStringSubmatch(thisinfo.Title)
+			if len(res) > 1 && len(res[1]) > 6 && thisinfo.ProjectCode != res[1] && !numCheckPc.MatchString(res[1]) && !_zimureg1.MatchString(res[1]) {
+				thisinfo.PTC = res[1]
+			}
+		}
+	}
+
+	if thisinfo.ProjectName != "" && len([]rune(thisinfo.ProjectName)) > 0 {
+		thisinfo.ProjectName = pcReplace.ReplaceAllString(thisinfo.ProjectName, "")
+		if thisinfo.ProjectName != "" {
+			thisinfo.pnbval++
+		}
+	}
+
+	if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+		if thisinfo.ProjectCode != "" {
+			thisinfo.ProjectCode = pcReplace.ReplaceAllString(thisinfo.ProjectCode, "")
+			if thisinfo.pnbval == 0 && len([]rune(thisinfo.ProjectCode)) < 5 {
+				thisinfo.ProjectCode = StrOrNum.ReplaceAllString(thisinfo.ProjectCode, "")
+			}
+		} else {
+			thisinfo.PTC = pcReplace.ReplaceAllString(thisinfo.PTC, "")
+			if thisinfo.pnbval == 0 && len([]rune(thisinfo.PTC)) < 5 {
+				thisinfo.PTC = StrOrNum.ReplaceAllString(thisinfo.PTC, "")
+			}
+		}
+		if thisinfo.ProjectCode != "" || thisinfo.PTC != "" {
+			thisinfo.pnbval++
+		}
+	}
+	if thisinfo.ProjectCode == thisinfo.PTC || strings.Index(thisinfo.ProjectCode, thisinfo.PTC) > -1 {
+		thisinfo.PTC = ""
+	}
+
+	if thisinfo.Buyer != "" && len([]rune(thisinfo.Buyer)) > 2 {
+		thisinfo.pnbval++
+	} else {
+		thisinfo.Buyer = ""
+	}
+	//winners整理
+	winner, _ := tmp["winner"].(string)
+	m1 := map[string]bool{}
+	winners := []string{}
+	if winner != "" {
+		m1[winner] = true
+		winners = append(winners, winner)
+	}
+	if thisinfo.HasPackage {
+		packageM, _ := tmp["package"].(map[string]interface{})
+		for _, p := range packageM {
+			pm, _ := p.(map[string]interface{})
+			pw, _ := pm["winner"].(string)
+			if pw != "" {
+				m1[pw] = true
+				winners = append(winners, pw)
+			}
+		}
+	}
+	thisinfo.Winners = winners
+
+	thisinfo.LenPC = len([]rune(thisinfo.ProjectCode))
+	thisinfo.LenPTC = len([]rune(thisinfo.PTC))
+	thisinfo.LenPN = len([]rune(thisinfo.ProjectName))
+	return thisinfo
+}
+
+//从数组中删除元素
+func deleteSlice(arr []string, v string) []string {
+	for k, v1 := range arr {
+		if v1 == v {
+			return append(arr[:k], arr[k+1:]...)
+		}
+	}
+	return arr
+}

+ 5 - 0
fullproject/src_dev5/test/t.go

@@ -0,0 +1,5 @@
+package main
+
+func main() {
+
+}