Browse Source

优化 存量 索引

wcc 2 years ago
parent
commit
2c01aa3377
4 changed files with 86 additions and 223 deletions
  1. 0 201
      common.toml
  2. 22 19
      createEsIndex/bidding_es.go
  3. 61 0
      createEsIndex/biddingall.toml
  4. 3 3
      createEsIndex/main.go

+ 0 - 201
common.toml

@@ -1,201 +0,0 @@
-
-[udp]
-locport = ":17834"
-jyaddr = "172.17.145.178"
-jyport = 11118
-
-[db]
-[db.mongoB]
-addr = "172.17.4.187:27082,172.17.145.163:27083"
-dbname = "qfw"
-coll = "bidding"
-size = 15
-user = "SJZY_RWbid_ES"
-password = "SJZY@B4i4D5e6S"
-
-[db.mongoP]
-addr = "172.17.4.85:27080"
-dbname = "qfw"
-coll = "projectset_20230407"
-size = 15
-user = ""
-password = ""
-
-[db.mongoQ]
-addr = "172.17.4.187:27082,172.17.145.163:27083"
-dbname = "mixdata"
-coll = ""
-size = 15
-user = "SJZY_RWbid_ES"
-password = "SJZY@B4i4D5e6S"
-
-[db.oss]
-    endpoint = "oss-cn-beijing-internal.aliyuncs.com"## 正式环境
-  ##  endpoint = "oss-cn-beijing.aliyuncs.com"## 测试环境
-    accesskey = "LTAI4G5x9aoZx8dDamQ7vfZi"
-    accesssecret = "Bk98FsbPYXcJe72n1bG3Ssf73acuNh"
-    bucketname = "topjy"
-
-
-[db.es]
-addr = "http://172.17.4.184:19805,http://172.17.148.50:19805,http://172.17.145.164:19805"
-addrp = "http://172.17.145.178:9800"
-size = 5
-username = "es_all"
-password = "TopJkO2E_d1x"
-indexb = "bidding_v1"
-typeb = "bidding"
-indexp = "projectset_v1"
-typep = "projectset"
-indexwinner = "winner"
-typewinner = "winner"
-indexbuyer = "buyer"
-typebuyer = "buyer"
-detailfilter = ["(招标网|千里马|采招网|招标采购导航网|招标与采购网|中国招投标网|中国采购与招标网|中国采购与招标|优质采)[\\w\\W]{0,15}[http|https|htpps]?[a-z0-9:\\/\\/.]{0,20}(qianlima|zhaobiao|okcis|zbytb|infobidding|bidcenter|youzhicai|chinabidding|Chinabidding|CHINABIDDING)[a-z0-9.\\/\\/]{0,40}",
-    "招标网[\\w\\W]{0,15}[http|https|htpps]?[a-z0-9:\\/\\/.]{0,20}zhaobiao[a-z0-9.\\/\\/]{0,40}",
-    "千里马[\\w\\W]{0,15}[a-z0-9:\\/\\/.]{0,20}qianlima[a-z0-9.\\/\\/]{0,10}",
-    "[\\((]?(网址)?[::;;]?(http|https|htpps)*[::]?(\\/\\/)?(www|jinan|WWW)?.(zhaobiao|chinabidding|Chinabidding|CHINABIDDING|infobidding|zbytb|okcis|qianlima|youzhicai).(com|cn|COM|CN)?(.cn|.CN)?\\/?[\\))]?",
-    "[\\((]?(网址)?(::)?(http|https|htpps)*(:|:)?\\/\\/www.bidcenter.com.cn\\/",
-    "千里马(平台|网站)+", "[“\"]?优质采(平台|电子交易平台|云采购平台|交易平台)?[”\"]?", "《?(中国采购与|中国)?招(投)?标(与采购|采购导航)?网》?",
-    "《?元博网(采购与招标网)?》?", "《?(中国)?招标采购导航网》?", "中\\W{0,3}国采\\W{0,3}招\\W{0,3}网\\W*[((]?(bidcenter.com.cn)?[))]?", "已方宝", "中国招标与采购"]
-
-[mail]
-send = true
-to = "wangjianghan@topnet.net.cn"
-api = "http://172.17.145.179:19281/_send/_mail"
-
-# 日志
-[log]
-# 日志路径,为空将输出控制台
-logpath = "./log/log.out"
-# log size (M)
-maxsize = 10
-# compress log
-compress = true
-# log save  time (day)
-maxage =  7
-# save total log file total
-maxbackups = 10
-# log level
-loglevel  = "debug"
-# text or json output
-format = "text"
-
-[db.es.fieldes]
-"_id" = ""
-"buyerzipcode" = "string"
-"winnertel" = "string"
-"winnerperson" = "string"
-"contractcode" = "string"
-"winneraddr" = "string"
-"agencyaddr" = "string"
-"buyeraddr" = "string"
-"signaturedate" = "int64"
-"projectperiod" = "string"
-"projectaddr" = "string"
-"agencytel" = "string"
-"agencyperson" = "string"
-"buyerperson" = "string"
-"agency" = "string"
-"projectscope" = "string"
-"projectcode" = "string"
-"bidopentime" = "int64"
-"supervisorrate" = "float64"
-"buyertel" = "string"
-"bidamount" = "float64"
-"winner" = "string"
-"buyer" = "string"
-"budget" = "float64"
-"projectname" = "string"
-"bidstatus" = "string"
-"buyerclass" = "string"
-"topscopeclass" = ""
-"s_topscopeclass" = "string"
-"s_subscopeclass" = "string"
-"area" = "string"
-"city" = "string"
-"district" = "string"
-"s_winner" = "string"
-"title" = "string"
-"detail" = "string"
-"site" = "string"
-"comeintime" = "int64"
-"href" = "string"
-"infoformat" = "int32"
-"publishtime" = "int64"
-"s_sha" = "string"
-"spidercode" = "string"
-"subtype" = "string"
-"toptype" = "string"
-"projectinfo" = ""
-"purchasing" = "string"
-"purchasinglist" = ""
-"channel" = "string"
-"winnerorder" = ""
-"project_scale" = "string"
-"project_duration" = "int32"
-"project_timeunit" = "string"
-"project_startdate" = "int64"
-"project_completedate" = "int64"
-"payway" = "string"
-"contract_guarantee" = "bool"
-"bid_guarantee" = "bool"
-"qualifies" = ""
-"entidlist" = ""
-"funds" = "string"
-"review_experts" = "string"
-"bidmethod" = "string"
-"bidendtime" = "int64"
-"bidopenaddress" = "string"
-"docamount" = "float64"
-"agencyrate" = "float64"
-"agencyfee" = "float64"
-"bidway" = "string"
-"getdocmethod" = "string"
-"china_bidding" = "string"
-"purchasing_tag" = "string"
-"multipackage" = "int32"
-"isValidFile" = "bool"
-"bid_field" = "string"
-"bidstarttime" = "int64"
-"docendtime" = "int64"
-"docstarttime" = "int64"
-"signendtime" = "int64"
-"signstarttime" = "int64"
-"issue_quota" = "float64"
-"bidopen_shape" = "string"
-"quote_mode" = "string"
-"is_acquire_tender" = "bool"
-"is_payment_deposit" = "bool"
-"is_joint_bidding" = "bool"
-"procurementlist" = ""
-"object_type"="string"  ##针对中国政府采购网,添加字段,区分货物、服务和工程
-[db.es.fieldprojectinfo]
-"approvecode" = "string"
-"approvecontent" = "string"
-"approvestatus" = "string"
-"approvetime" = "string"
-"approvedept" = "string"
-"approvenumber" = "string"
-"projecttype" = "string"
-"approvecity" = "string"
-[db.es.fieldpurchasinglist]
-"itemname" = "string"
-"item" = "string"
-"brandname" = "string"
-"model" = "string"
-"unitname" = "string"
-"number" = "float64"
-"unitprice" = "float64"
-"totalprice" = "float64"
-[db.es.fieldprocurementlist]
-"projectname" = "string"
-"buyer" = "string"
-"item" = "string"
-"projectscope" = "string"
-"expurasingtime" = "string"
-"totalprice" = "float64"
-[db.es.fieldwinnerorder]
-"sort" = "int"
-"sortstr" = "string"
-"entname" = "string"

+ 22 - 19
createEsIndex/bidding_es.go

@@ -235,11 +235,11 @@ func biddingAllDataTask() {
 		Lteid string
 	}
 	type RoutinesConf struct {
-		num int
+		Num int
 	}
 	type AllConf struct {
-		All     map[string]Biddingall
-		Routine RoutinesConf
+		All      map[string]Biddingall
+		Routines RoutinesConf
 	}
 	var all AllConf
 
@@ -257,15 +257,15 @@ func biddingAllDataTask() {
 		fmt.Println("biddingAllDataTask Unmarshal err =>", err)
 		return
 	}
-	//fmt.Printf("%+v", all)
+
 	for k, conf := range all.All {
-		go dealData(conf.Coll, conf.Gtid, conf.Lteid, k)
+		go dealData(conf.Coll, conf.Gtid, conf.Lteid, k, all.Routines.Num)
 	}
 
 }
 
-func dealData(coll, gtid, lteid, kword string) {
-	ch := make(chan bool, 50)
+func dealData(coll, gtid, lteid, kword string, routines int) {
+	ch := make(chan bool, routines)
 	wg := &sync.WaitGroup{}
 	q := map[string]interface{}{
 		"_id": map[string]interface{}{
@@ -415,10 +415,11 @@ func GetEsField(tmp map[string]interface{}, stype string) (map[string]interface{
 			} else if field == "projectscope" {
 				ps, _ := tmp["projectscope"].(string)
 				newTmp["projectscope"] = ps
-				if len(ps) > pscopeLength {
-					saveErr["projectscope"] = ps
-					saveErr["projectscope_length"] = len(ps)
-				}
+				//新版本已无需记录长度
+				//if len(ps) > pscopeLength {
+				//	saveErr["projectscope"] = ps
+				//	saveErr["projectscope_length"] = len(ps)
+				//}
 			} else if field == "winnerorder" { //中标候选
 				winnerorder_new := []map[string]interface{}{}
 				if winnerorder, _ := tmp[field].([]interface{}); len(winnerorder) > 0 {
@@ -469,10 +470,11 @@ func GetEsField(tmp map[string]interface{}, stype string) (map[string]interface{
 			} else if field == "detail" { //过滤
 				detail, _ := tmp[field].(string)
 				detail = filterSpace.ReplaceAllString(detail, "")
-				if len(detail) > pscopeLength {
-					saveErr["detail"] = detail
-					saveErr["detail_length"] = len(detail)
-				}
+				// 不需要再保存记录长度
+				//if len(detail) > pscopeLength {
+				//	saveErr["detail"] = detail
+				//	saveErr["detail_length"] = len(detail)
+				//}
 				if tmp["cleartag"] != nil {
 					if tmp["cleartag"].(bool) {
 						text, _ := FilterDetail(detail)
@@ -509,10 +511,11 @@ func GetEsField(tmp map[string]interface{}, stype string) (map[string]interface{
 	filetext := getFileText(tmp)
 	if len([]rune(filetext)) > 10 {
 		newTmp["filetext"] = filetext
-		if len(filetext) > pscopeLength {
-			saveErr["filetext"] = filetext
-			saveErr["filetext_length"] = len(filetext)
-		}
+		// 新版不再需要记录
+		//if len(filetext) > pscopeLength {
+		//	saveErr["filetext"] = filetext
+		//	saveErr["filetext_length"] = len(filetext)
+		//}
 	}
 	YuceEndtime(newTmp) // 预测结果时间
 	if stype == "bidding" || stype == "bidding_history" {

+ 61 - 0
createEsIndex/biddingall.toml

@@ -0,0 +1,61 @@
+[routines]  ## 开启协程个数
+num = 50
+
+
+
+[[all]]
+
+    [all.bidding_back]
+    coll = "bidding_back"
+    gtid = "0"
+    lteid = "5a862e7040d2d9bbe88e3b1f" ## bidding_back 最后一个ID
+
+    [all.02]
+    coll = "bidding"
+    gtid = "0"
+    lteid = "5c531b800000000000000000" ## 2019.2.1  15493432
+
+    [all.03]
+    coll = "bidding"
+    gtid = "5c531b800000000000000000"
+    lteid = "5e0b70800000000000000000" ## 2020.1.1  17995862
+
+    [all.04]
+    coll = "bidding"
+    gtid = "5e0b70800000000000000000"
+    lteid = "5f74ab800000000000000000" ## 2020.10.1 17611742
+
+    [all.05]
+    coll = "bidding"
+    gtid = "5f74ab800000000000000000"
+    lteid = "608c29800000000000000000" ## 2021.5.1  17135203
+
+    [all.06]
+    coll = "bidding"
+    gtid = "608c29800000000000000000"
+    lteid = "6155df000000000000000000" ## 2021.10.1  20316855
+
+    [all.07]
+    coll = "bidding"
+    gtid = "6155df000000000000000000"
+    lteid = "621cf1800000000000000000" ## 2022.3.1  18930270
+
+    [all.08]
+    coll = "bidding"
+    gtid = "6155df000000000000000000"
+    lteid = "62bdc8800000000000000000" ## 2022.7.1 18373938
+
+    [all.09]
+    coll = "bidding"
+    gtid = "62bdc8800000000000000000"
+    lteid = "633712800000000000000000" ## 2022.10.1  19093157
+
+    [all.10]
+    coll = "bidding"
+    gtid = "633712800000000000000000"
+    lteid = "63b05c800000000000000000" ## 2023.1.1   20198847
+
+    [all.11]
+    coll = "bidding"
+    gtid = "63b05c800000000000000000"
+    lteid = "644e90800000000000000000" ##  2023.5.1  18038591

+ 3 - 3
createEsIndex/main.go

@@ -43,9 +43,9 @@ var (
 	saveErrBidPool = make(chan map[string]interface{}, 5000)
 	saveBidSp      = make(chan bool, 5)
 
-	detailLength = 50000 // es保存detail长度
-	fileLength   = 50000 // es保存附件文本长度
-	pscopeLength = 32766 // projectscope长度
+	//detailLength = 50000 // es保存detail长度
+	fileLength = 50000 // es保存附件文本长度
+	//pscopeLength = 32766 // projectscope长度
 
 )