Jianghan 4 éve
szülő
commit
60bf2c35a4

+ 2 - 2
fullproject/src_v1/config.json

@@ -72,8 +72,8 @@
         "pre_regexp": ["\\(不见面开标\\)", "\\(买方)", "\\(\\d+个月\\)", "\\d{4}年", "\\d+月", "<[^>]*>", "AB", "CC", "CB", "CA", "[A-Z]?\\d+", "\\(第(\\d+|一|二|三|四|五|六|七)(批|次)?\\)", "\\(\\d+局\\)", "\\d+标段", "\\(二次)", "(重招\\d)", "(\\(|(?)周(五|四|三|二|一|日|六)?(\\)|))",
             "(\\(|()?[a-zA-Z](\\)|))?", "(\\(|()?\\d{0,8}.?\\d+(\\)|))?", "#(~|、)?", "#\\d{1,};?", "(\\+|\\.|、|-|#|%|:|:)+"],
         "back_regexp": ["采购人$", "[^a-zA-Z\\p{Han}]{1,}$"],
-        "back_rep_regexp": ["(有限公$)#有限公司", "(有限责任公$)#有限责任公司", "(公司公司&)#公司", "(公司等&)#公司"],
-        "blacklist": ["有问题", "个县公司", "T及分公司", "大厦\\d+室", "住宅楼", "购物中心", "盖法人章", "东侧路面拓宽", "、技术研发中心", "地下室", "车库", "二公司", "二类居住", "钢芯铝绞线", "已仔细"]
+        "back_rep_regexp": ["(有限公$)#有限公司", "(有限责任公$)#er有限责任公司", "(公司公司&)#公司", "(公司等&)#公司"],
+        "blacklist": ["有问题", "个县公司", "T及分公司", "大厦\\d+室", "住宅楼", "购物中心", "盖法人章", "东侧路面拓宽", "、技术研发中心", "地下室", "车库", "二类居住", "钢芯铝绞线", "已仔细"]
     },
     "agency": {
         "pre_regexp": ["发布人:", "<[^>]*>", "(\\(|()?[a-zA-Z](\\)|))?", "(\\(|()?\\d{0,8}.?\\d+(\\)|))?", "#(~|、)?", "#\\d{1,};?", "(\\+|\\.|、|-|#|%|:|:)+"],

+ 4 - 4
fullproject/src_v1/main.go

@@ -71,7 +71,7 @@ func DealSign() {
 	}
 }
 
-func mainT() {
+func main() {
 	//udp跑增量  id段   project
 	//udp跑全量			qlT
 	//udp跑历史数据  信息id1,id2/或id段  ls
@@ -90,9 +90,9 @@ func mainT() {
 }
 
 //测试组人员使用
-func main() {
-	sid = "5cde1a74a5cb26b9b782dd1f"
-	eid = "5dee234de9d1f601e4c4b813"
+func mainT() {
+	sid = "5eddb9189e628c5991654802"
+	eid = "5f9670aa50cded0641b30736"
 	//flag.StringVar(&sid, "sid", "", "开始id")
 	//flag.StringVar(&eid, "eid", "", "结束id")
 	//flag.Parse()

+ 11 - 9
fullproject/src_v1/task.go

@@ -322,10 +322,7 @@ func (p *ProjectTask) taskZl(udpInfo map[string]interface{}) {
 //招标字段更新
 func (p *ProjectTask) taskUpdateInfo(udpInfo map[string]interface{}) {
 	defer util.Catch()
-	db, _ := udpInfo["db"].(string)
-	if db == "" {
-		db = MongoTool.DbName
-	}
+	db := MongoTool.DbName
 	coll, _ := udpInfo["coll"].(string)
 	if coll == "" {
 		coll = ExtractColl
@@ -563,6 +560,7 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 	}()
 	p.Brun = true
 	count, taskcount := 0, 0
+	countRepeat := 0
 
 	pool := make(chan bool, p.thread)
 	log.Println("start project", q)
@@ -584,6 +582,7 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 					if util.IntAll(tmp["repeat"]) == 0 {
 						if P_QL.currentType == "project" && util.IntAll(tmp["dataging"]) == 1 {
 							//增量	dataging为1不参与合并
+							util.Debug("增量   dataging == 1 ", tmp["_id"])
 							return
 						}
 						p.fillInPlace(tmp)
@@ -598,6 +597,8 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 						}
 					} else {
 						//信息错误,进行更新
+						util.Debug(tmp["_id"])
+						countRepeat++
 					}
 				}(tmp)
 			case <-over:
@@ -605,9 +606,10 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 			}
 		}
 	}()
-	fields := map[string]interface{} {"area": 1, "city": 1, "district": 1, "comeintime": 1, "publishtime": 1, "bidopentime": 1, "title": 1, "projectname": 1, "href": 1,
-		"projectcode": 1, "buyerclass": 1, "winner": 1, "buyer": 1, "buyerperson": 1, "buyertel": 1, "infoformat": 1, "toptype": 1, "subtype": 1, "spidercode": 1,
-		"site": 1, "topscopeclass": 1, "subscopeclass": 1, "bidamount": 1, "budget": 1, "agency": 1, "package": 1, "jsondata": 1, "review_experts": 1, "purchasing": 1, "winnerorder": 1}
+	//fields := map[string]interface{} {"repeat": 1, "dataging": 1, "area": 1, "city": 1, "district": 1, "comeintime": 1, "publishtime": 1, "bidopentime": 1, "title": 1, "projectname": 1, "href": 1,
+	//	"projectcode": 1, "buyerclass": 1, "winner": 1, "buyer": 1, "buyerperson": 1, "buyertel": 1, "infoformat": 1, "toptype": 1, "subtype": 1, "spidercode": 1, "projectscope": 1, "contractcode": 1,
+	//	"site": 1, "topscopeclass": 1, "subscopeclass": 1, "bidamount": 1, "budget": 1, "agency": 1, "package": 1, "jsondata": 1, "review_experts": 1, "purchasing": 1, "winnerorder": 1}
+	fields := map[string]interface{}{"kvtext": 0, "repeat_reason": 0}
 	ms := sess.DB(db).C(coll).Find(q).Select(fields).Sort("publishtime")
 	if Sysconfig["hints"] != nil {
 		ms.Hint(Sysconfig["hints"])
@@ -643,7 +645,7 @@ L:
 	for n := 0; n < p.thread; n++ {
 		pool <- true
 	}
-	log.Println("所有线程执行完成...", count, taskcount)
+	log.Println("所有线程执行完成...", count, taskcount, countRepeat)
 
 }
 
@@ -801,7 +803,7 @@ func ParseInfo(tmp map[string]interface{}) (info *Info) {
 	}
 	thisinfo.WinnerOrder = wins
 	//清理buyer
-	buyer := QyFilter(util.ObjToString(tmp["tmp"]), "buyer")
+	buyer := QyFilter(util.ObjToString(tmp["buyer"]), "buyer")
 	tmp["buyer"] = buyer
 	thisinfo.Buyer = buyer
 

+ 1 - 1
qyxy_change/qy_baidu/config.json

@@ -72,7 +72,7 @@
       "change_name": "其他事项备案",
       "change_push": false,
       "change_info": "其他事项备案",
-      "change_keyword": ["备案", "设立"]
+      "change_keyword": ["备案"]
     },
     {
       "change_code": "100009",

+ 3 - 2
qyxy_change/qy_baidu/main.go

@@ -3,6 +3,7 @@ package main
 import (
 	"mongodb"
 	"qfw/util"
+	"regexp"
 )
 
 var (
@@ -15,6 +16,8 @@ var (
 	BdTaskTime                                 int
 	PaTaskTime								   string
 	ChangeMap                                  []map[string]interface{}
+
+	timeReg, _ = regexp.Compile(`^[\d]{4}-[\d]{1,2}-[\d]{1,2}`)
 )
 
 func init() {
@@ -69,8 +72,6 @@ func initChangeMap() {
 func main() {
 	go SaveData()
 	go TimeTask()
-	//GetBdData()
-	//GetPaData()
 	ch := make(chan bool, 1)
 	<-ch
 }

+ 74 - 4
qyxy_change/qy_baidu/task.go

@@ -9,11 +9,12 @@ import (
 	"mongodb"
 	"qfw/util"
 	"regexp"
+	"strings"
 	"time"
 )
 
 func TimeTask() {
-	GetBdData()
+	//GetPaData()
 	c := cron.New()
 	cronstrBd := "0 0 */" + fmt.Sprint(BdTaskTime) + " * * ?" 		//每TaskTime小时执行一次
 	//cronstr := "0 0 " + fmt.Sprint(TaskTime) + " * * ?"			//每天TaskTime跑一次
@@ -48,16 +49,22 @@ func GetPaData() {
 	lastid := ""
 	sess := MgoMix.GetMgoConn()
 	defer MgoMix.DestoryMongoConn(sess)
-	fields := map[string]interface{}{"changes": 1, "company_id": 1, "company_name": 1}
+	fields := map[string]interface{}{"changes": 1, "company_id": 1, "company_name": 1, "company_type": 1, "establish_date": 1, "create_time": 1}
 	query := sess.DB(Dbname_pa).C(CollPa).Find(nil).Select(fields).Iter()
+	c := MgoMix.Count(CollPa, nil)
+	util.Debug("ping an count ------", c)
 	tmp := make(map[string]interface{})
 	for query.Next(&tmp) {
-		lastid = mongodb.BsonIdToSId(tmp["_id"])
+		lastid = mongodb.BsonIdToSId(tmp["company_id"])
 		if count%1000 == 0 {
 			util.Debug("ping an ----current-----", count, lastid)
 		}
+		if strings.Contains(util.ObjToString(tmp["company_type"]), "个体") {
+			continue
+		}
+		currentTime := time.Now().Unix()
 		if tmp["changes"] != nil && len(tmp["changes"].([]interface{})) > 0 {
-			currentTime := time.Now().Unix()
+			delete(tmp, "establish_date")
 			q := bson.M{"company_name": tmp["company_name"]}
 			changeEnt, _ := MgoMix.FindOne(CollSave, q)
 			if changeEnt != nil && len(*changeEnt) > 0 {
@@ -83,6 +90,68 @@ func GetPaData() {
 				tmp["updatetime"] = currentTime
 			}
 			update := make(map[string]interface{})
+			tmp["datasource"] = "pingan"
+			update["$set"] = tmp
+			updateInfo := []map[string]interface{}{
+				{
+					"_id": tmp["_id"],
+				},
+				update,
+			}
+			MgoSaveCache <- updateInfo
+			count++
+		}else {
+			//{
+			//	"change_code": "100000",
+			//	"change_name": "新设立公司",
+			//	"change_push": true,
+			//	"change_info": "新设立公司",
+			//	"change_keyword": ["新设立"]
+			//},
+			setupData := ""
+			if tmp["establish_date"] != nil {
+				if timeTmp, ok := tmp["establish_date"].(primitive.DateTime); ok {
+					t := timeTmp.Time()
+					setupData = util.FormatDate(&t, util.Date_Short_Layout)
+				} else if timeTmp, ok := tmp["establish_date"].(string); ok && timeTmp != "" {
+					t := timeReg.FindString(timeTmp)
+					if t != "" {
+						setupData = t
+					}
+				}
+			}
+			createData := ""
+			if tmp["create_time"] != nil {
+				if timeTmp, ok := tmp["create_time"].(primitive.DateTime); ok {
+					t := timeTmp.Time()
+					createData = util.FormatDate(&t, util.Date_Short_Layout)
+				} else if timeTmp, ok := tmp["create_time"].(string); ok && timeTmp != "" {
+					t := timeReg.FindString(timeTmp)
+					if t != "" {
+						createData = t
+					}
+				}
+			}
+			tm2, _ := time.Parse("2006-01-02", createData)
+			//当前时间17天内
+			if tm2.Unix() < (time.Now().Unix() - 17 * 60 * 60 * 24) {
+				continue
+			}
+			delete(tmp, "establish_date")
+			delete(tmp, "create_time")
+			changeInfo := make(map[string]interface{})
+			changeInfo["change_field"] = "新设立公司"
+			changeInfo["change_name_new"] = "新设立公司"
+			changeInfo["content_before"] = ""
+			changeInfo["content_after"] = "新设立公司"
+			changeInfo["change_date"] = setupData
+
+			tmp["changes"] = []map[string]interface{}{changeInfo}
+			tmp["_id"] = primitive.NewObjectID()
+			tmp["createtime"] = currentTime
+			tmp["updatetime"] = currentTime
+			tmp["datasource"] = "pingan"
+			update := make(map[string]interface{})
 			update["$set"] = tmp
 			updateInfo := []map[string]interface{}{
 				{
@@ -135,6 +204,7 @@ func findEnt(tmp map[string]interface{}) {
 		saveEnt := map[string]interface{}{}
 		if saveEnt != nil && len(*paEnt) > 0 {
 			//3、企业库有该企业信息
+			saveEnt["datasource"] = "baidu"
 			saveEnt["_id"] = primitive.NewObjectID()
 			saveEnt["company_id"] = (*paEnt)["company_id"]
 			saveEnt["company_name"] = (*ent)["entName"]

+ 1 - 0
qyxy_change/qy_foucs/task.go

@@ -74,6 +74,7 @@ func findEnt(tmp map[string]interface{}) {
 	tmp["_id"] = primitive.NewObjectID()
 	tmp["createtime"] = currentTime
 	tmp["updatetime"] = currentTime
+	tmp["datasource"] = "focus"
 	update := make(map[string]interface{})
 	update["$set"] = tmp
 	updateInfo := []map[string]interface{}{