Procházet zdrojové kódy

更新 预处理,定时提前创建好索引

wcc před 1 rokem
rodič
revize
0552d7de2a

+ 20 - 0
pre_extract/config.go

@@ -4,6 +4,8 @@ type GlobalConf struct {
 	MongoB MgoConf
 	Log    Log
 	Env    EnvConf
+	ES     EsConf
+	Email  EmailConf
 }
 
 type EnvConf struct {
@@ -12,6 +14,9 @@ type EnvConf struct {
 	NextAddr   string
 	NextPort   string
 	LocalPort  string
+	Spec       string
+	SpecType   string
+	Send       bool
 }
 
 type MgoConf struct {
@@ -33,3 +38,18 @@ type Log struct {
 	LogLevel   string
 	Format     string
 }
+
+type EsConf struct {
+	URL      string
+	Username string
+	Password string
+
+	URL2      string
+	Username2 string
+	Password2 string
+}
+
+type EmailConf struct {
+	Api string
+	To  string
+}

+ 28 - 3
pre_extract/config.toml

@@ -19,9 +19,34 @@
 [env]
 #    filestart = "652c0c83296f2847a1c87241"      ## bidding_file 起始ID
 #    nomalstart = "652c0c85ff68a4004101fe97"     ## bidding_nomal 起始ID
-    nextaddr = "127.0.0.1"       ## 调用抽取地址
-    nextport = "1177"       ## 调用抽取端口
-    localport = ":2345"
+    nextaddr = "127.0.0.1"      ## 调用抽取地址
+    nextport = "1177"           ## 调用抽取端口
+    localport = ":1176"         ## 本地监听端口
+    send = false                ## 是否给抽取发送数据;正式环境应为 true
+    spec = "0 00 23 * * *"      ## 每天23点执行;//已日更新的方式创建索引
+    ## spec = "0 00 01 * * *"   ## 每天01点执行;适合 按照月份创建索引
+    spectype = "day" ## 定时任务类型;正式环境应该是 month 。day 表示每天创建一个索引;month 表示每个月创建一个
+
+[es]
+
+    url = "http://192.168.3.149:9201" ## 测试环境
+    username = ""
+    password = ""
+#
+#    url = "http://127.0.0.1:19905"
+#    username = "jybid"
+#    password = "Top2023_JEB01i@31"
+
+#    url2 = ""
+#    username2 = ""
+#    password2 = ""
+
+
+[email]
+    api = "http://172.17.145.179:19281/_send/_mail"
+    to = "wangchengcheng@topnet.net.cn"
+
+
 
 
 [log]

+ 415 - 0
pre_extract/es.go

@@ -0,0 +1,415 @@
+package main
+
+import (
+	"bytes"
+	"context"
+	"encoding/base64"
+	"errors"
+	"fmt"
+	"github.com/olivere/elastic/v7"
+	"go.uber.org/zap"
+	"net/http"
+	"strconv"
+
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
+	"time"
+)
+
+var setting = fmt.Sprintf(`    "settings": {
+        "index": {
+            "analysis": {
+                "analyzer": {
+                    "my_ngram_title": {
+                        "filter": [
+                            "lowercase"
+                        ],
+                        "tokenizer": "my_ngram_title"
+                    },
+                    "douhao": {
+                        "type": "pattern",
+                        "pattern": ","
+                    },
+                    "my_ngram": {
+                        "filter": [
+                            "lowercase"
+                        ],
+                        "tokenizer": "my_ngram"
+                    }
+                },
+                "tokenizer": {
+                    "my_ngram_title": {
+                        "token_chars": [
+                            "letter",
+                            "digit",
+                            "punctuation",
+                            "symbol"
+                        ],
+                        "min_gram": "1",
+                        "type": "nGram",
+                        "max_gram": "1"
+                    },
+                    "my_ngram": {
+                        "token_chars": [
+                            "letter",
+                            "digit",
+                            "punctuation",
+                            "symbol"
+                        ],
+                        "min_gram": "2",
+                        "type": "nGram",
+                        "max_gram": "2"
+                    }
+                }
+            },
+            "number_of_shards": "%s",
+            "number_of_replicas": "0",
+            "max_result_window": "20000"
+        }
+    }`, "2")
+
+var mapping = `    "mappings": {
+        "dynamic": false,
+        "properties": {
+            "dataweight": {
+                "type": "long"
+            },
+            "projectcode": {
+                "type": "keyword"
+            },
+             "object_type": {
+                "type": "keyword"
+            },
+            "bidopentime": {
+                "type": "long"
+            },
+            "bidamount": {
+                "type": "double"
+            },
+            "winner": {
+                "type": "keyword"
+            },
+            "buyer": {
+                "type": "keyword",
+                "fields": {
+                    "mbuyer": {
+                        "analyzer": "my_ngram_title",
+                        "type": "text"
+                    }
+                }
+            },
+            "budget": {
+                "type": "double"
+            },
+            "projectname": {
+                "type": "keyword",
+                "fields": {
+                    "pname": {
+                        "analyzer": "my_ngram_title",
+                        "type": "text"
+                    }
+                }
+            },
+            "area": {
+                "type": "keyword"
+            },
+            "city": {
+                "type": "keyword"
+            },
+            "district": {
+                "type": "keyword"
+            },
+            "s_winner": {
+                "analyzer": "douhao",
+                "type": "text",
+                "fields": {
+                    "mwinner": {
+                        "analyzer": "my_ngram_title",
+                        "type": "text"
+                    }
+                }
+            },
+            "pici": {
+                "type": "long"
+            },
+            "id": {
+                "type": "keyword"
+            },
+            "title": {
+                "analyzer": "my_ngram_title",
+                "type": "text",
+                "fields": {
+                    "mtitle": {
+                        "type": "keyword"
+                    }
+                }
+            },
+            "detail": {
+                "analyzer": "my_ngram",
+                "type": "text"
+            },
+            "site": {
+                "type": "keyword"
+            },
+            "comeintime": {
+                "type": "long"
+            },
+            "href": {
+                "type": "keyword"
+            },
+            "infoformat": {
+                "type": "long"
+            },
+            "publishtime": {
+                "type": "long"
+            },
+            "toptype": {
+                "type": "keyword"
+            },
+            "subtype": {
+                "type": "keyword"
+            },
+            "createtime": {
+                "type": "long"
+            }
+        }
+    }`
+
+//createIndex 创建索引
+func createIndex(client *elastic.Client, PreBiddingIndex string) error {
+	createJson := fmt.Sprintf(`{%s,%s}`, setting, mapping)
+	//fmt.Println(createJson)
+	//month := int(time.Now().Month())
+	//monthStr := strconv.Itoa(month)
+	//year := time.Now().Year()
+	//yearStr := strconv.Itoa(year)
+	////预处理索引名称
+	//PreBiddingIndex := "bidding_" + yearStr + monthStr
+	url := GF.ES.URL
+	username := GF.ES.Username
+	password := GF.ES.Password
+
+	// 创建 Elasticsearch 客户端
+	//client, err := elastic.NewClient(
+	//	elastic.SetURL(url),
+	//	elastic.SetBasicAuth(username, password),
+	//	elastic.SetSniff(false),
+	//)
+	//if err != nil {
+	//	log.Error("创建 Elasticsearch 客户端失败:", zap.Error(err))
+	//}
+
+	exist, err := client.IndexExists(PreBiddingIndex).Do(context.Background())
+	if exist {
+		log.Info("CreateIndex", zap.String(PreBiddingIndex, "已经存在了"))
+		return err
+	}
+
+	//1. 开启节点平衡
+	// 设置临时的节点平衡设置
+	balanceSettings := `{
+  "transient" : {
+    "cluster.routing.allocation.enable" : "all"
+  }
+}`
+
+	requestURL := fmt.Sprintf("%s/_cluster/settings", url)
+	req, err := http.NewRequest("PUT", requestURL, bytes.NewBuffer([]byte(balanceSettings)))
+	if err != nil {
+		log.Error("开启节点平衡", zap.Error(err))
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	// 添加身份验证头部
+	auth := username + ":" + password
+	basicAuth := "Basic " + base64.StdEncoding.EncodeToString([]byte(auth))
+	req.Header.Set("Authorization", basicAuth)
+
+	clientQ := &http.Client{}
+	resp, err := clientQ.Do(req)
+	if err != nil {
+		log.Error("开启节点平衡", zap.Error(err))
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		log.Fatal("设置节点平衡失败")
+		return err
+	}
+
+	fmt.Println("节点平衡已开启")
+	createIndexR, err := client.CreateIndex(PreBiddingIndex).BodyString(createJson).Do(context.Background())
+	if err != nil {
+		log.Error(PreBiddingIndex, zap.Error(err))
+		return err
+	}
+	if !createIndexR.Acknowledged {
+		log.Error("CreateIndex", zap.String(PreBiddingIndex, "创建索引失败"))
+		return err
+	}
+
+	defer func() {
+		//3. 关闭节点平衡
+		//设置临时的节点平衡设置
+		disableSettings := `{
+  "transient" : {
+    "cluster.routing.allocation.enable" : "none"
+  }
+}`
+
+		req2, err2 := http.NewRequest("PUT", requestURL, bytes.NewBuffer([]byte(disableSettings)))
+		if err2 != nil {
+			log.Error("开启节点平衡", zap.Error(err))
+		}
+		req2.Header.Set("Content-Type", "application/json")
+		// 添加身份验证头部
+		req2.Header.Set("Authorization", basicAuth)
+		//clientQ := &http.Client{}
+		resp2, err2 := clientQ.Do(req)
+		if err2 != nil {
+			log.Error("关闭节点平衡", zap.Error(err))
+		}
+		defer resp2.Body.Close()
+		fmt.Println("节点平衡已关闭")
+	}()
+
+	return nil
+}
+
+//deleteIndex 删除索引
+func deleteIndex(client *elastic.Client, index string) error {
+	exist, err := client.IndexExists(index).Do(context.Background())
+	if !exist {
+		return errors.New("索引" + index + "不存在")
+	}
+	if err != nil {
+		return err
+	}
+
+	_, err = client.DeleteIndex(index).Do(context.Background())
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+//dealIndexByDay 处理预处理索引,根据天;
+func dealIndexByDay() {
+	now := time.Now()
+	PreBiddingIndex := ""
+	hour := now.Hour()
+	// 判断当前时间是否时最后一个小时
+	if hour == 23 {
+		//当天最后一小时
+		url := GF.ES.URL
+		username := GF.ES.Username
+		password := GF.ES.Password
+		// 创建 Elasticsearch 客户端
+		client, err := elastic.NewClient(
+			elastic.SetURL(url),
+			elastic.SetBasicAuth(username, password),
+			elastic.SetSniff(false),
+		)
+		if err != nil {
+			log.Error("创建 Elasticsearch 客户端失败:", zap.Error(err))
+		}
+
+		next := now.AddDate(0, 0, 1)
+		month := int(next.Month())
+		monthStr := strconv.Itoa(month)
+		year := next.Year()
+		yearStr := strconv.Itoa(year)
+		dayStr := strconv.Itoa(next.Day())
+		//下一天的索引名称
+		PreBiddingIndex = "bidding_" + yearStr + monthStr + dayStr
+
+		err = createIndex(client, PreBiddingIndex)
+		if err != nil {
+			log.Info("dealIndexByDay", zap.Error(err))
+			SendMail("预处理索引", "预处理索引创建失败,请检查")
+		}
+
+		log.Info("dealIndexByDay", zap.String(PreBiddingIndex, "创建成功"))
+		//3. 删除昨天的索引
+		last := now.AddDate(0, 0, -1)
+		month2 := int(last.Month())
+		monthStr2 := strconv.Itoa(month2)
+		year2 := last.Year()
+		yearStr2 := strconv.Itoa(year2)
+		dayStr2 := strconv.Itoa(last.Day())
+		//索引名称
+		lastIndex := "bidding_" + yearStr2 + monthStr2 + dayStr2
+		err = deleteIndex(client, lastIndex)
+		if err != nil {
+			log.Info("dealIndexByDay", zap.Error(err))
+		}
+	}
+}
+
+//dealIndexByMonth 处理预处理索引,根据月份;提前一天创建好 下个月的索引
+func dealIndexByMonth() {
+	now := time.Now()
+	PreBiddingIndex := ""
+	// 获取当前月份的最后一天
+	lastDayOfMonth := time.Date(now.Year(), now.Month()+1, 0, 0, 0, 0, 0, time.UTC)
+	// 判断当前时间是否为当前月份的最后一天
+	if now.Day() == lastDayOfMonth.Day() {
+		//当月最后一天,需要提前创建好索引
+		fmt.Println("当前时间是当前月份的最后一天")
+		url := GF.ES.URL
+		username := GF.ES.Username
+		password := GF.ES.Password
+
+		// 创建 Elasticsearch 客户端
+		client, err := elastic.NewClient(
+			elastic.SetURL(url),
+			elastic.SetBasicAuth(username, password),
+			elastic.SetSniff(false),
+		)
+		if err != nil {
+			log.Error("创建 Elasticsearch 客户端失败:", zap.Error(err))
+		}
+
+		next := now.AddDate(0, 0, 1)
+		month := int(next.Month())
+		monthStr := strconv.Itoa(month)
+		year := next.Year()
+		yearStr := strconv.Itoa(year)
+		//下一个月的索引名称
+		PreBiddingIndex = "bidding_" + yearStr + monthStr
+
+		//2		创建下个月索引结构
+		err = createIndex(client, PreBiddingIndex)
+		if err != nil {
+			log.Info("dealIndexByMonth", zap.Error(err))
+			SendMail("预处理索引", "预处理索引创建失败,请检查")
+		}
+
+		log.Info("dealIndexByMonth", zap.String(PreBiddingIndex, "创建成功"))
+		//3.	删除上个月的索引
+		last := now.AddDate(0, -1, 1)
+		month2 := int(last.Month())
+		monthStr2 := strconv.Itoa(month2)
+		year2 := last.Year()
+		yearStr2 := strconv.Itoa(year2)
+		//上个月的索引名称
+		lastIndex := "bidding_" + yearStr2 + monthStr2
+		err = deleteIndex(client, lastIndex)
+		if err != nil {
+			log.Info("dealIndexByMonth", zap.Error(err))
+		}
+	}
+
+}
+
+//SendMail 发送邮件
+func SendMail(title, content string) {
+	url := fmt.Sprintf("%s?to=%s&title=%s&body=%s", GF.Email.Api, GF.Email.To, title, content)
+	fmt.Println("url=>", url)
+	res, err := http.Get(url)
+	if err != nil {
+		log.Info("SendMail", zap.Any("err", err))
+	} else {
+		log.Info("SendMail", zap.Any("res", res))
+	}
+}

+ 4 - 0
pre_extract/go.mod

@@ -3,6 +3,7 @@ module pre_extract
 go 1.18
 
 require (
+	github.com/olivere/elastic/v7 v7.0.32
 	github.com/spf13/viper v1.16.0
 	go.mongodb.org/mongo-driver v1.10.1
 	go.uber.org/zap v1.22.0
@@ -16,12 +17,15 @@ require (
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/golang/snappy v0.0.1 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
 	github.com/klauspost/compress v1.13.6 // indirect
 	github.com/magiconair/properties v1.8.7 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect
 	github.com/pelletier/go-toml/v2 v2.1.0 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
+	github.com/robfig/cron/v3 v3.0.1 // indirect
 	github.com/spf13/afero v1.10.0 // indirect
 	github.com/spf13/cast v1.5.1 // indirect
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect

+ 6 - 0
pre_extract/go.sum

@@ -66,6 +66,7 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m
 github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
 github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
 github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
 github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
 github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
@@ -145,6 +146,7 @@ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@@ -159,12 +161,14 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
 github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
 github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0=
 github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
 github.com/nsqio/go-nsq v1.1.0/go.mod h1:vKq36oyeVXgsS5Q8YEO7WghqidAVXQlcFxzQbQTuDEY=
+github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
 github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
 github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4=
@@ -176,6 +180,8 @@ github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qR
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
+github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
 github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=

+ 28 - 5
pre_extract/main.go

@@ -3,6 +3,7 @@ package main
 import (
 	"encoding/json"
 	"fmt"
+	"github.com/robfig/cron/v3"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	"go.uber.org/zap"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
@@ -54,6 +55,33 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 }
 
 func main() {
+	local, _ := time.LoadLocation("Asia/Shanghai")
+	c := cron.New(cron.WithLocation(local), cron.WithSeconds())
+
+	if GF.Env.SpecType == "day" {
+		_, err := c.AddFunc(GF.Env.Spec, dealIndexByDay)
+		if err != nil {
+			log.Info("main", zap.Any("AddFunc err", err))
+		}
+	} else if GF.Env.SpecType == "month" {
+		_, err := c.AddFunc(GF.Env.Spec, dealIndexByMonth)
+		if err != nil {
+			log.Info("main", zap.Any("AddFunc err", err))
+		}
+	}
+
+	c.Start()
+	defer c.Stop()
+
+	if GF.Env.Send {
+		go SendPreData()
+	}
+
+	select {}
+}
+
+//SendPreData 发送预处理数据给 抽取程序
+func SendPreData() {
 	f_sid := ""
 	n_sid := ""
 	f_lid := "" //file 最后一个分类结束ID
@@ -116,11 +144,6 @@ func main() {
 			n_sid = n_lid
 		}
 	}
-
-}
-
-func deletePreData() {
-
 }
 
 //BsonIdToSId 根据bsonID转string

+ 23 - 0
pre_extract/pre_test.go

@@ -0,0 +1,23 @@
+package main
+
+import (
+	"fmt"
+	"strconv"
+	"testing"
+	"time"
+)
+
+func TestMonth(T *testing.T) {
+
+	// 创建2023年12月31日的时间对象
+	date := time.Date(2023, time.August, 30, 0, 0, 0, 0, time.UTC)
+	next := date.AddDate(0, -1, -1)
+	month := int(next.Month())
+	monthStr := strconv.Itoa(month)
+	year := next.Year()
+	yearStr := strconv.Itoa(year)
+
+	//预处理索引名称
+	PreBiddingIndex := "bidding_" + yearStr + monthStr
+	fmt.Sprintf(PreBiddingIndex)
+}

+ 7 - 2
pre_extract/readme.txt

@@ -1,4 +1,9 @@
 
-这里主要是读取 bidding_file  bidding_nomal 数据表,然后拿到 分类好的数据段,传递抽取程序去处理。
+功能:
+   1. 读取 bidding_file  bidding_nomal 数据表,然后拿到 分类好的数据段,传递抽取程序去处理。
+   2. 定时 创建索引结构;可以每天创建一个或者每月创建一个
 
-主要是为了数据 预处理流程服务
+目的:
+    数据处理流程添加了一个预处理阶段,需要把附件识别、标的物识别 过程先放弃,先生索引到一个临时索引,
+    为了 提高数据时效性。本程序主要实现上面二个功能,具体 `预处理数据生索引`,以及 `删除预处理数据`,
+    需要在索引程序 中 完成。