Переглянути джерело

新增处理中国招标投标公共服务平台数据定时任务

maxiaoshan 5 роки тому
батько
коміт
9596f86044

+ 5 - 1
udpcreateindex/src/biddingall.go

@@ -12,7 +12,11 @@ import (
 
 //对字段处理 bidamount  budget
 //招标数据表和抽取表一一对应开始更新
-
+/*
+	注意:
+	1、biddingall任务跑历史数据生成索引并更新bidding表
+	2、调用biddingall任务时config.json中indexfields配置要有purchasing、purchasinglist、filetext
+*/
 func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
 	defer qutil.Catch()
 	thread := 40

+ 36 - 28
udpcreateindex/src/bidingpurchasing.go

@@ -37,14 +37,14 @@ func biddingPurchaingTask(q map[string]interface{}) {
 	i := 0
 	for tmp := make(map[string]interface{}); query.Next(tmp); i = i + 1 {
 		n++
-		if util.IntAll(tmp["extracttype"]) == -1 || util.IntAll(tmp["dataging"]) == 1 { //重复数据不生索引
+		if util.IntAll(tmp["extracttype"]) == -1 { // || util.IntAll(tmp["dataging"]) == 1 { //重复数据不生索引
 			tmp = make(map[string]interface{})
 			continue
 		}
 		newTmp := map[string]interface{}{} //最终生索引的数据
 		saveArr := []map[string]interface{}{}
 		//oss拼装filetext
-		if filetext := getFileText(tmp); len(filetext) > 0 {
+		if filetext := getFileText(tmp); len(filetext) > 10 {
 			if site, _ := tmp["site"].(string); site == "中国招标投标公共服务平台" { //site:中国招标投标公共服务平台 detail替换成filetext 并加入标记filedetail=1
 				tmp["detail"] = filetext
 				saveArr = append(saveArr, map[string]interface{}{"_id": tmp["_id"]})
@@ -65,6 +65,7 @@ func biddingPurchaingTask(q map[string]interface{}) {
 		}
 		//purchasinglist
 		if purchasinglist, ok := tmp["purchasinglist"].([]interface{}); ok {
+			util.Debug(len(purchasinglist))
 			if len(purchasinglist) > 0 {
 				purchasinglist_new := []map[string]interface{}{}
 				for _, ls := range purchasinglist {
@@ -79,6 +80,7 @@ func biddingPurchaingTask(q map[string]interface{}) {
 						purchasinglist_new = append(purchasinglist_new, lsm_new)
 					}
 				}
+				util.Debug(len(purchasinglist_new), purchasinglist_new)
 				if len(purchasinglist_new) > 0 {
 					newTmp["purchasinglist"] = purchasinglist_new
 				}
@@ -183,28 +185,11 @@ func biddingPurchaingTask(q map[string]interface{}) {
 	log.Println("create filetext index...over", n)
 }
 
-func getFileText(tmp map[string]interface{}) (filetext string) {
-	if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok {
-		for _, tmpData1 := range attchMap {
-			if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok {
-				for _, result := range tmpData2 {
-					if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
-						if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
-							bs := u.OssGetObject(attach_url) //oss读数据
-							if utf8.RuneCountInString(filetext+bs) < util.IntAllDef(Sysconfig["filelength"], 100000) {
-								filetext += bs + "\n"
-							} else {
-								break
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-	return
-}
-
+//定时任务site:中国招标投标公共服务平台
+/*
+	注意:
+	1、调用此任务时config.json中indexfields配置不要有purchasing、purchasinglist、filetext
+*/
 func site_attach_text(q map[string]interface{}) {
 	defer util.Catch()
 	//锁
@@ -232,15 +217,16 @@ func site_attach_text(q map[string]interface{}) {
 	for tmp := make(map[string]interface{}); query.Next(tmp); i = i + 1 {
 		n++
 		site, _ := tmp["site"].(string)
-		if util.IntAll(tmp["extracttype"]) == -1 || util.IntAll(tmp["dataging"]) == 1 || tmp["attach_text"] == nil || site != "中国招标投标公共服务平台" {
+		if util.IntAll(tmp["extracttype"]) == -1 || site != "中国招标投标公共服务平台" || tmp["attach_text"] == nil {
 			tmp = make(map[string]interface{})
 			continue
 		}
 		newTmp := map[string]interface{}{} //最终生索引的数据
 		saveArr := []map[string]interface{}{}
 
-		filetext := getFileText(tmp) //oss拼装filetext
-		if len(filetext) > 0 {
+		filetext := getFileText(tmp)                       //oss拼装filetext
+		filetext_afterspace := FilterDetailSpace(filetext) //去除空格
+		if len(filetext_afterspace) > 10 {
 			tmp["detail"] = filetext //filetext替换detail
 			saveArr = append(saveArr, map[string]interface{}{"_id": tmp["_id"]})
 			saveArr = append(saveArr, map[string]interface{}{
@@ -251,7 +237,7 @@ func site_attach_text(q map[string]interface{}) {
 			})
 			newTmp["filetext"] = filetext //
 		} else {
-			//log.Println("filetext is null string:", tmp["_id"])
+			log.Println("filetext is null string:", tmp["_id"])
 			tmp = make(map[string]interface{})
 			continue
 		}
@@ -355,3 +341,25 @@ func site_attach_text(q map[string]interface{}) {
 	SaveUpdageLock.Unlock()
 	log.Println("create filetext index...over", n, indexnum)
 }
+
+func getFileText(tmp map[string]interface{}) (filetext string) {
+	if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok {
+		for _, tmpData1 := range attchMap {
+			if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok {
+				for _, result := range tmpData2 {
+					if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
+						if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
+							bs := u.OssGetObject(attach_url) //oss读数据
+							if utf8.RuneCountInString(filetext+bs) < util.IntAllDef(Sysconfig["filelength"], 100000) {
+								filetext += bs + "\n"
+							} else {
+								break
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	return
+}

+ 1 - 1
udpcreateindex/src/config.json

@@ -30,7 +30,7 @@
     },
     "bidding": {
         "db": "mxs",
-        "collect": "test",
+        "collect": "filetext",
         "index": "bidding_v2",
         "type": "bidding",
         "extractdb": "mxs",

+ 3 - 2
udpcreateindex/src/task.go

@@ -55,12 +55,13 @@ func task_qyxyindex() {
 	qyxyTask(q)
 }
 
+//定时任务site:中国招标投标公共服务平台
 func crontab() {
 	defer qutil.Catch()
 	q := map[string]interface{}{
 		"_id": map[string]interface{}{
-			"$gte": qutil.StringTOBsonId("5e990e1e50b5ea296ef47129"), //2020-02-01 5e344f0b50b5ea296ed0cfbd
-			"$lte": qutil.StringTOBsonId("5ea00952511b12033763cebd"), //2020-04-22 5e9f1880f2c1a7850ba43979
+			"$gte": qutil.StringTOBsonId("5e9a0b7650b5ea296ef5403c"), //2020-02-01 5e344f0b50b5ea296ed0cfbd
+			"$lte": qutil.StringTOBsonId("5e9a0b7650b5ea296ef5403c"), //2020-04-22 5e9f1880f2c1a7850ba43979
 		},
 	}
 	site_attach_text(q)