Эх сурвалжийг харах

更新采购单位 数据源,从tidb 获取数据

wcc 2 жил өмнө
parent
commit
6b23fc5d2e

+ 307 - 0
README.md

@@ -16,3 +16,310 @@
    1. [log] 配置下`logpath`  189 环境为空,196 环境下配置不为空;为空将输出控制台
 2. bidding_es 文件中 `GetEsField` 方法 为公用的,全量跑数据不需要`saveErr`,``saveErr`  用来存储 针对 `projectscope` 、`detail` 、 `filetext` 附件内容 错误的;全量跑数据时屏蔽掉,跑增量数据打开。
 
+# 1.索引mapping
+
+1. 拉取git 地址库:[ssh://root@192.168.3.207:10022/maxiaoshan/esmapping.git](ssh://root@192.168.3.207:10022/maxiaoshan/esmapping.git)
+
+2. 根据索引库,找到对应mapping 文件;
+
+   | 文件名                  | 索引名             | 备注                                                |
+      | ----------------------- | ------------------ | --------------------------------------------------- |
+   | biddingmapping2.txt     | bidding            | 标讯数据;163 服务qfw.bidding                       |
+   | projectset2.txt         | projectset         | 项目数据;85服务qfw.projectset_20230407             |
+   | biddingall2.txt         | biddingall         | 采集爬虫判重;145.178:9200es,保存的也是bidding数据 |
+   | qyxy2.txt               | qyxy               | 凭安 企业数据;163.mixdata.qyxy_std                 |
+   | project-forecast2.txt   | forecast           | 项目预测                                            |
+   | buyer2.txt              | buyer              | 采购单位数据;163.mixdata.buyer_enterprise          |
+   | winner2.txt             | winner             | 中标单位数据;163.mixdata.winner_enterprise         |
+   | supplyinfo2.txt         | supplyinfo         | 供应商信息;剑鱼维护                                |
+   | supplier_product.txt    |                    | 废弃                                                |
+   | proposed.txt            | proposed           | 拟建数据。                                          |
+   | project.json            |                    | 废弃                                                |
+   | oprd_qyxy_v1            | oprd_qyxy_v1       | 广州移动                                            |
+   | oprd_project.txt        | oprd_projectset_v1 | 广州移动                                            |
+   | oprd_bidding.txt        | oprd_bidding_v1    | 广州移动                                            |
+   | medical_institution.txt |                    | 废弃                                                |
+   | medical_dealer.txt      |                    | 废弃                                                |
+
+3. 添加字段或者修改mapping
+
+   ​	a.添加字段或者修改字段类型,需要报备  **季春玲**
+
+   ​	b.字段应和索引程序保持一致,不分词字段类型为 keyword ,分词字段类型应为 text
+
+   ​	c.具体字段信息需要和研发沟通确认
+
+4. **字段改动都需要在 149的es 环境进行测试,数据没问题后通知测试人员走流程测试**
+
+5. 数据测试没问题后,提交代码;并告知产品,做好维护  **@季春玲**
+
+6. 更新正式环境钱,需要提前邮件通知,写清楚 具体什么时间开始切换;切换完毕后也需要通知大家
+
+
+
+# 2.数据迭代
+
+## bidding数据
+
+### 存量数据
+
+1. 修改配置文件  **biddingall.toml** ,更新文件配置,找到需要运行的存量数据id
+
+   ```Toml
+   [routines]  ## 开启协程个数
+   num = 50
+   
+   [[all]]
+   [all.bidding_back]
+   coll = "bidding_back"
+   gtid = "0"
+   lteid = "5a862e7040d2d9bbe88e3b1f" ## bidding_back 最后一个ID
+   
+   [all.02]
+   coll = "bidding"
+   gtid = "0"
+   lteid = "5c531b800000000000000000" ## 2019.2.1  15493432
+   
+   [all.03]
+   coll = "bidding"
+   gtid = "5c531b800000000000000000"
+   lteid = "5e0b70800000000000000000" ## 2020.1.1  17995862
+   
+   [all.04]
+   coll = "bidding"
+   gtid = "5e0b70800000000000000000"
+   lteid = "5f74ab800000000000000000" ## 2020.10.1 17611742
+   
+   [all.05]
+   coll = "bidding"
+   gtid = "5f74ab800000000000000000"
+   lteid = "608c29800000000000000000" ## 2021.5.1  17135203
+   
+   [all.06]
+   coll = "bidding"
+   gtid = "608c29800000000000000000"
+   lteid = "6155df000000000000000000" ## 2021.10.1  20316855
+   
+   [all.07]
+   coll = "bidding"
+   gtid = "6155df000000000000000000"
+   lteid = "621cf1800000000000000000" ## 2022.3.1  18930270
+   
+   [all.08]
+   coll = "bidding"
+   gtid = "621cf1800000000000000000"
+   lteid = "62bdc8800000000000000000" ## 2022.7.1 18373938
+   
+   [all.09]
+   coll = "bidding"
+   gtid = "62bdc8800000000000000000"
+   lteid = "633712800000000000000000" ## 2022.10.1  19093157
+   
+   [all.10]
+   coll = "bidding"
+   gtid = "633712800000000000000000"
+   lteid = "63b05c800000000000000000" ## 2023.1.1   20198847
+   
+   [all.11]
+   coll = "bidding"
+   gtid = "63b05c800000000000000000"
+   lteid = "644e90800000000000000000" ##  2023.5.1  18038591
+   ```
+
+2.udp 请求索引程序,**stype = bidding_all_data**
+
+```shell
+./sendtask -ip 127.0.0.1 -p 17834 -stype bidding_all_data
+```
+
+
+
+### 增量数据
+
+#### 需要生成pici
+
+> 这种比较常见,由于某种原因导致数据没生索引,需要手动生索引
+
+1. 确认缺少 的数据段,可以参考 163 MongoDB bidding_processing_ids
+
+2. 请求索引程序,**stype=bidding**
+
+```Shell
+./sendtask -ip 127.0.0.1 -p 17834 -gtid 64705c740ebbbcdcb5cf3db4 -lteid 64705da00ebbbcdcb5cf41c0 -stype bidding
+```
+
+#### 不需要生成pici
+
+> 这种主要是针对 **过去** 的某些数据,不更新 pici 字段
+
+```Shell
+./sendtask -ip 127.0.0.1 -p 17834 -gtid 64705c740ebbbcdcb5cf3db4 -lteid 64705da00ebbbcdcb5cf41c0 -stype biddingall
+```
+
+## project数据
+
+### 存量数据
+
+1. 修改配置文 **projectall.toml** ,更新文件配置,合理配置存量数据区间段
+
+   ```Toml
+   [routines]  ## 开启协程个数
+   num = 20
+   [[all]]
+   
+   [all.01]
+   coll = "projectset_20230407"
+   gtid = "0"
+   lteid = "5d839796a5cb26b9b770bc27" ##
+   
+   [all.02]
+   coll = "projectset_20230407"
+   gtid = "5d839796a5cb26b9b770bc27"
+   lteid = "60e28e641a75b8f446ee805d" ##
+   
+   [all.03]
+   coll = "projectset_20230407"
+   gtid = "60e28e641a75b8f446ee805d"
+   lteid = "62d9519d4d0d9b2bc2b402fa" ##
+   
+   [all.04]
+   coll = "projectset_20230407"
+   gtid = "62d9519d4d0d9b2bc2b402fa"
+   lteid = "6476e4b7eb01e8efa62a676e" ## mongo表最新ID
+   ```
+
+2.发送UDP 请求,注意修改对应的端口参数 p
+
+```shell
+/sendtask -ip 127.0.0.1 -p 17834 -stype project_all_data
+```
+
+
+
+### 增量数据
+
+> 针对项目信息,入索引库 **projectset**
+
+1. 发送UDP 请求,修改对应的端口参数 p
+
+```Shell
+ ./sendtask -ip 127.0.0.1 -p 17832 -stype project -tmpkey pici -tmptime 1684512000相当于查询 pici 大于 1684512000 的数据
+```
+
+
+
+# 3.索引程序部署
+
+## 1.数据库依赖
+
+需要 **bidding_processing_field** 数据表,里面配置了bidding以及project 生索引的字段处理信息
+
+## 2.配置文件
+
+```Toml
+[udp]
+    locport = ":17834"          ## 本地监听地址
+    jyaddr = "127.0.0.1"
+    jyport = 11118
+
+[db]
+[db.mongoB] ## bidding数据
+    addr = "192.168.3.206:27002"    ## 测试环境
+    dbname = "qfw_data"
+    coll = "bidding"
+    size = 15
+    user = "root"
+    password = "root"
+
+[db.mongoP] ## project 数据
+    addr = "192.168.3.206:27002"
+    dbname = "qfw_data"
+    coll = "projectset"
+    size = 15
+    user = "root"
+    password = "root"
+
+[db.mongoQ] ## buyer winner 索引连接,只使用到了dbname,coll暂未使用
+    addr = "192.168.3.206:27002"
+    dbname = "mixdata"
+    coll = "qyxy_std"
+    size = 15
+    user = "root"
+    password = "root"
+
+[db.oss] ## 获取附件内容
+#    endpoint = "oss-cn-beijing-internal.aliyuncs.com"## 正式环境
+    endpoint = "oss-cn-beijing.aliyuncs.com"## 测试环境
+    accesskey = "LTAI4G5x9aoZx8dDamQ7vfZi"
+    accesssecret = "Bk98FsbPYXcJe72n1bG3Ssf73acuNh"
+    bucketname = "topjy"
+[db.es]
+    addr = "http://192.168.3.149:9200"      ## 正常bidding 链接
+    addrp = "http://172.17.145.178:9200"    ## 采集使用的单机版地址
+    username = "es_all"
+    password = "TopJkO2E_d1x"
+    size = 5
+    indexb = "bidding"
+    indextmp = "bidding_temporary"         ## 临时索引,其他程序需要
+    indexp = "projectset"
+    indexwinner = "winner"
+    indexbuyer = "buyer"
+detailfilter = ["(招标网|千里马|采招网|招标采购导航网|招标与采购网|中国招投标网|中国采购与招标网|中国采购与招标|优质采)[\\w\\W]{0,15}[http|https|htpps]?[a-z0-9:\\/\\/.]{0,20}(qianlima|zhaobiao|okcis|zbytb|infobidding|bidcenter|youzhicai|chinabidding|Chinabidding|CHINABIDDING)[a-z0-9.\\/\\/]{0,40}",
+    "招标网[\\w\\W]{0,15}[http|https|htpps]?[a-z0-9:\\/\\/.]{0,20}zhaobiao[a-z0-9.\\/\\/]{0,40}",
+    "千里马[\\w\\W]{0,15}[a-z0-9:\\/\\/.]{0,20}qianlima[a-z0-9.\\/\\/]{0,10}",
+    "[\\((]?(网址)?[::;;]?(http|https|htpps)*[::]?(\\/\\/)?(www|jinan|WWW)?.(zhaobiao|chinabidding|Chinabidding|CHINABIDDING|infobidding|zbytb|okcis|qianlima|youzhicai).(com|cn|COM|CN)?(.cn|.CN)?\\/?[\\))]?",
+    "[\\((]?(网址)?(::)?(http|https|htpps)*(:|:)?\\/\\/www.bidcenter.com.cn\\/",
+    "千里马(平台|网站)+", "[“\"]?优质采(平台|电子交易平台|云采购平台|交易平台)?[”\"]?", "《?(中国采购与|中国)?招(投)?标(与采购|采购导航)?网》?",
+    "《?元博网(采购与招标网)?》?", "《?(中国)?招标采购导航网》?", "中\\W{0,3}国采\\W{0,3}招\\W{0,3}网\\W*[((]?(bidcenter.com.cn)?[))]?", "已方宝", "中国招标与采购"]
+
+[mail]
+send = false
+to = "wangjianghan@topnet.net.cn"
+api = "http://172.17.145.179:19281/_send/_mail"
+
+# 日志
+[log]
+# 日志路径,为空将输出控制台
+logpath = ""
+# log size (M)
+maxsize = 10
+# compress log
+compress = true
+# log save  time (day)
+maxage =  7
+# save total log file total
+maxbackups = 10
+# log level
+loglevel  = "debug"
+# text or json output
+format = "text"
+```
+
+## 3.部署
+
+1. 打包索引程序,然后拷贝到服务器目录下,一般程序文件命名:**createindex_1783_20230601。**
+
+2. 最后以日期名结尾,容易区分程序。线上一般保留旧的程序文件2个,以防止出现意外。
+
+
+
+# 4.注意事项
+
+## 1.stype 参数使用
+
+> 只有**==index-by-id、bidding、bidding_history==**三种类型,才会生成pici字段
+
+| stype 数值                | 数值含义                                       |
+|-------------------------|--------------------------------------------|
+| index-by-id             | 单个ID数据;主要针对某条数据单独生索引                       |
+| bidding                 | bidding增量数据,需要传递一个id段,配合参数 gt 和 lte 使用     |
+| biddingall              | 补充某一个段的存量数据,适合数据量不大的情况,也需要参数 gt 和lte。      |
+| bidding_all_data        | 根据biddingall.toml配置文件,迁移大批存量数据             |
+| bidding_history         | 和bidding 逻辑一样                              |
+| project                 | 项目信息,配合gt和lte参数使用                          |
+| project_all_data        | project存量数据,依据projectall.toml配置文件分段同步存量数据  |
+| biddingdata             | 同步数据到采集判重索引,部署在145.178的服务器,单机版索引,只有采集爬虫在使用 |
+| biddingdelbyextracttype | 根据bidding表extracttype=-1,删除es中重复数据         |
+| buyer_all               | buyer 全量数据                                 |
+
+

+ 1 - 1
createEsIndex/bidding_del.go

@@ -37,7 +37,7 @@ func biddingDelByExtracttype(data []byte, mapInfo map[string]interface{}) {
 	for tmp := make(map[string]interface{}); biddingquery.Next(tmp); i = i + 1 {
 		n++
 		_id := mongodb.BsonIdToSId(tmp["_id"])
-		if Es.DelById(config.Conf.DB.Es.IndexB, config.Conf.DB.Es.TypeB, _id) { //删除
+		if Es.DelById(config.Conf.DB.Es.IndexB, _id) { //删除
 			dnum++
 		}
 		if n%200 == 0 {

+ 1 - 1
createEsIndex/bidding_es.go

@@ -771,7 +771,7 @@ func dealPackage(tmp map[string]interface{}) (newpackages []map[string]interface
 	if ok3 && ok2 && ok1 {
 		packageMap, ok := package1.(map[string]interface{})
 		if ok {
-			if len(packageMap) > 2 {
+			if len(packageMap) >= 2 {
 				var packages = make([]map[string]interface{}, 0)
 				//var newTmp = make(map[string]interface{})
 				winner_amount_count := 0

+ 131 - 82
createEsIndex/buyertask.go

@@ -1,97 +1,146 @@
 package main
 
 import (
-	util "app.yhyue.com/data_processing/common_utils"
 	"app.yhyue.com/data_processing/common_utils/log"
-	"app.yhyue.com/data_processing/common_utils/mongodb"
 	"esindex/config"
-	"go.mongodb.org/mongo-driver/bson/primitive"
+	"fmt"
 	"go.uber.org/zap"
-	"sync"
 	"time"
 )
 
-var fieldArr = []string{"institute_type", "fixedphone", "mobilephone", "latestfixedphone", "latestmobilephone", "province", "city"}
-
-func buyerEsTaskOnce() {
-	defer util.Catch()
-	arrEs := []map[string]interface{}{}
-	buyerEsLock := &sync.Mutex{}
-	pool := make(chan bool, 3)
-	wg := &sync.WaitGroup{}
-
-	now := time.Now()
-	preTime := time.Date(now.Year(), now.Month(), now.Day()-1, now.Hour(), 0, 0, 0, time.Local)
-	curTime := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, time.Local)
-	task_sid := mongodb.BsonIdToSId(primitive.NewObjectIDFromTimestamp(preTime))
-	task_eid := mongodb.BsonIdToSId(primitive.NewObjectIDFromTimestamp(curTime))
-	log.Info("buyer 区间id", zap.String("sid", task_sid), zap.String("eid", task_eid))
-	//区间id
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gte": mongodb.StringTOBsonId(task_sid),
-			"$lt":  mongodb.StringTOBsonId(task_eid),
-		},
-	}
-	//mongo
-	sess := MgoQ.GetMgoConn()
-	defer MgoQ.DestoryMongoConn(sess)
-
-	it_1 := sess.DB(MgoQ.DbName).C("buyer_enterprise").Find(&q).Select(map[string]interface{}{
-		"buyer_name":        1,
-		"institute_type":    1,
-		"buyerclass":        1,
-		"fixedphone":        1,
-		"mobilephone":       1,
-		"latestfixedphone":  1,
-		"latestmobilephone": 1,
-		"province":          1,
-		"city":              1,
-	}).Sort("_id").Iter()
-	num_1 := 0
-	for tmp := make(map[string]interface{}); it_1.Next(&tmp); num_1++ {
-		if num_1%2000 == 0 && num_1 > 0 {
-			log.Info("current", zap.Int("数量", num_1))
-		}
-		pool <- true
-		wg.Add(1)
-		go func(tmp map[string]interface{}) {
-			defer func() {
-				<-pool
-				wg.Done()
-			}()
-			savetmp := map[string]interface{}{}
-			_id := mongodb.BsonIdToSId(tmp["_id"])
-			if util.ObjToString(tmp["buyerclass"]) != "" {
-				savetmp["buyerclass"] = tmp["buyerclass"]
-			}
-			savetmp["_id"] = _id
-			savetmp["name"] = tmp["buyer_name"]
-			savetmp["buyer_name"] = tmp["buyer_name"]
-			for _, f := range fieldArr {
-				if val := util.ObjToString(tmp[f]); val != "" {
-					savetmp[f] = val
+//buyerOnce  处理增量数据
+func buyerOnce() {
+	rowsPerPage := 1000
+	currentPage := 1
+	total := 0
+	for {
+		log.Info("buyerOnce", zap.Int("currentPage", currentPage))
+		arrEs := make([]map[string]interface{}, 0)
+		offset := (currentPage - 1) * rowsPerPage
+		//year, month, day := 2022, time.October, 01
+		//now := time.Date(year, month, day, 0, 0, 0, 0, time.Local)
+		now := time.Now()
+		curTime := now.Format("2006-01-02")
+		insertquery := fmt.Sprintf(`
+            SELECT
+                b.name, 
+                t.id,
+                t.name_id, 
+                c.area, 
+                c.city, 
+                class.name AS buyerclass 
+               
+            FROM 
+                dws_f_ent_tags AS t 
+                LEFT JOIN code_buyerclass AS class ON class.code = t.labelvalues
+                LEFT JOIN dws_f_ent_baseinfo AS b ON b.name_id = t.name_id
+                LEFT JOIN code_area AS c ON b.city_code = c.code 
+
+			WHERE  t.createtime > '%v' OR t.updatetime > '%v'
+            ORDER BY t.id ASC
+            LIMIT %d, %d;
+        `, curTime, curTime, offset, rowsPerPage)
+
+		result := MysqlB.SelectBySql(insertquery)
+
+		if len(*result) > 0 {
+			for _, re := range *result {
+				tmp := make(map[string]interface{}, 0)
+				tmp["name"] = re["name"]
+				tmp["id"] = re["name_id"]
+				tmp["area"] = re["area"]
+				tmp["city"] = re["city"]
+				tmp["buyerclass"] = re["buyerclass"]
+				sql := fmt.Sprintf(`select * from dws_f_ent_contact where name_id = '%v'`, re["name_id"])
+
+				counts := MysqlB.SelectBySql(sql)
+				if len(*counts) > 0 {
+					tmp["is_contact"] = true
+				} else {
+					tmp["is_contact"] = false
 				}
+				arrEs = append(arrEs, tmp)
 			}
-			buyerEsLock.Lock()
-			arrEs = append(arrEs, savetmp)
-			if len(arrEs) >= EsBulkSize {
-				tmps := arrEs
-				Es.BulkSave(config.Conf.DB.Es.IndexBuyer, tmps)
-				arrEs = []map[string]interface{}{}
-			}
-			buyerEsLock.Unlock()
-		}(tmp)
-		tmp = make(map[string]interface{})
+			total = total + len(arrEs)
+
+			Es.InsertOrUpdate(config.Conf.DB.Es.IndexBuyer, arrEs)
+		}
+
+		if len(arrEs) < rowsPerPage {
+			break
+		}
+		// 继续查询下一页
+		currentPage++
 	}
 
-	wg.Wait()
-	buyerEsLock.Lock()
-	if len(arrEs) > 0 {
-		tmps := arrEs
-		Es.BulkSave(config.Conf.DB.Es.IndexBuyer, tmps)
-		arrEs = []map[string]interface{}{}
+	log.Info("buyerOnce", zap.Int("结束,总数是:", total))
+}
+
+//buyerall 全量数据
+func buyerall() {
+	rowsPerPage := 10000
+	currentPage := 1
+	total := 0
+
+	for {
+		log.Info("buyerall", zap.Int("currentPage", currentPage))
+		arrEs := make([]map[string]interface{}, 0)
+		offset := (currentPage - 1) * rowsPerPage
+		query := fmt.Sprintf(`
+             SELECT
+                b.name, 
+                t.id,
+                t.name_id, 
+                c.area, 
+                c.city, 
+                class.name AS buyerclass
+               
+            FROM 
+                dws_f_ent_tags AS t 
+                LEFT JOIN code_buyerclass AS class ON class.code = t.labelvalues
+                LEFT JOIN dws_f_ent_baseinfo AS b ON b.name_id = t.name_id
+                LEFT JOIN code_area AS c ON b.city_code = c.code
+
+            ORDER BY t.id ASC
+			
+            LIMIT %d, %d;
+        `, offset, rowsPerPage)
+
+		result := MysqlB.SelectBySql(query)
+
+		if len(*result) > 0 {
+			for _, re := range *result {
+				tmp := make(map[string]interface{}, 0)
+				tmp["name"] = re["name"]
+				tmp["id"] = re["name_id"]
+				tmp["_id"] = re["name_id"]
+				tmp["area"] = re["area"]
+				tmp["city"] = re["city"]
+				tmp["buyerclass"] = re["buyerclass"]
+
+				sql := fmt.Sprintf(`select * from dws_f_ent_contact where name_id = '%v'`, re["name_id"])
+
+				counts := MysqlB.SelectBySql(sql)
+				if len(*counts) > 0 {
+					tmp["is_contact"] = true
+				} else {
+					tmp["is_contact"] = false
+				}
+				arrEs = append(arrEs, tmp)
+			}
+			total = total + len(arrEs)
+			//保存es
+			Es.BulkSave(config.Conf.DB.Es.IndexBuyer, arrEs)
+		}
+
+		// 如果本次查询返回的数据不足每页请求的数量,说明已经查询到最后一页
+		if len(*result) < rowsPerPage {
+			break
+		}
+
+		// 继续查询下一页
+		currentPage++
 	}
-	buyerEsLock.Unlock()
-	log.Info("buyer over!", zap.Int("总计", num_1))
+
+	log.Info("buyerall", zap.Int("结束,总数是:", total))
 }

+ 19 - 141
createEsIndex/common.toml

@@ -5,14 +5,14 @@
 
 [db]
 [db.mongoB]
-    addr = "127.0.0.1:27017"
-#    addr = "192.168.3.206:27002"    ## 测试环境
-    dbname = "wcc"
-#    dbname = "qfw_data"
+#    addr = "127.0.0.1:27017"
+    addr = "192.168.3.206:27002"    ## 测试环境
+#    dbname = "wcc"
+    dbname = "qfw_data"
     coll = "bidding"
     size = 15
-#    user = "root"
-#    password = "root"
+    user = "root"
+    password = "root"
 
 [db.mongoP]
     addr = "192.168.3.206:27002"
@@ -22,7 +22,7 @@
     user = "root"
     password = "root"
 
-[db.mongoQ]
+[db.mongoQ] ##  winner
     addr = "192.168.3.206:27002"
     dbname = "mixdata"
     coll = "qyxy_std"
@@ -30,6 +30,13 @@
     user = "root"
     password = "root"
 
+[db.mysqlB]  ## buyer 采购单位
+    addr = "192.168.3.14:4000"      ## 测试环境
+    dbname = "global_common_data"
+    username = "root"
+    password = "=PDT49#80Z!RVv52_z"
+
+
 [db.oss]
 #    endpoint = "oss-cn-beijing-internal.aliyuncs.com"## 正式环境
     endpoint = "oss-cn-beijing.aliyuncs.com"## 测试环境
@@ -37,20 +44,16 @@
     accesssecret = "Bk98FsbPYXcJe72n1bG3Ssf73acuNh"
     bucketname = "topjy"
 [db.es]
-    addr = "http://127.0.0.1:19805" ## 正常bidding 链接
+    addr = "http://192.168.3.149:9200"      ## 正常bidding 链接
     addrp = "http://172.17.145.178:9200" ## 采集使用的单机版地址
     username = "es_all"
     password = "TopJkO2E_d1x"
     size = 5
-    indexb = "bidding"
-    indextmp = "bidding_temporary" ## 临时索引,其他程序需要
-#    typeb = "bidding"
-    indexp = "projectset"
-#    typep = "projectset"
+    indexb = "bidding_v1"
+    indextmp = "bidding_temporary"       ## 临时索引,其他程序需要
+    indexp = "projectset_v1"
     indexwinner = "winner"
-#    typewinner = "winner"
-    indexbuyer = "buyer"
-#    typebuyer = "buyer"
+    indexbuyer = "buyer_v1"
 detailfilter = ["(招标网|千里马|采招网|招标采购导航网|招标与采购网|中国招投标网|中国采购与招标网|中国采购与招标|优质采)[\\w\\W]{0,15}[http|https|htpps]?[a-z0-9:\\/\\/.]{0,20}(qianlima|zhaobiao|okcis|zbytb|infobidding|bidcenter|youzhicai|chinabidding|Chinabidding|CHINABIDDING)[a-z0-9.\\/\\/]{0,40}",
     "招标网[\\w\\W]{0,15}[http|https|htpps]?[a-z0-9:\\/\\/.]{0,20}zhaobiao[a-z0-9.\\/\\/]{0,40}",
     "千里马[\\w\\W]{0,15}[a-z0-9:\\/\\/.]{0,20}qianlima[a-z0-9.\\/\\/]{0,10}",
@@ -80,128 +83,3 @@ maxbackups = 10
 loglevel  = "debug"
 # text or json output
 format = "text"
-
-#[db.es.fieldes]
-#"_id" = ""
-#"buyerzipcode" = "string"
-#"winnertel" = "string"
-#"winnerperson" = "string"
-#"contractcode" = "string"
-#"winneraddr" = "string"
-#"agencyaddr" = "string"
-#"buyeraddr" = "string"
-#"signaturedate" = "int64"
-#"projectperiod" = "string"
-#"projectaddr" = "string"
-#"agencytel" = "string"
-#"agencyperson" = "string"
-#"buyerperson" = "string"
-#"agency" = "string"
-#"projectscope" = "string"
-#"projectcode" = "string"
-#"bidopentime" = "int64"
-#"supervisorrate" = "float64"
-#"buyertel" = "string"
-#"bidamount" = "float64"
-#"winner" = "string"
-#"buyer" = "string"
-#"budget" = "float64"
-#"projectname" = "string"
-#"bidstatus" = "string"
-#"buyerclass" = "string"
-"topscopeclass" = ""
-#"s_topscopeclass" = "string"
-#"s_subscopeclass" = "string"
-#"area" = "string"
-#"city" = "string"
-#"district" = "string"
-#"s_winner" = "string"
-#"title" = "string"
-#"detail" = "string"
-#"site" = "string"
-#"comeintime" = "int64"
-#"href" = "string"
-#"infoformat" = "int32"
-#"publishtime" = "int64"
-#"s_sha" = "string"
-#"spidercode" = "string"
-#"subtype" = "string"
-#"toptype" = "string"
-#"projectinfo" = "" ## 废弃
-#"purchasing" = "string"
-"purchasinglist" = ""
-#"channel" = "string"
-#"winnerorder" = ""
-#"project_scale" = "string"
-#"project_duration" = "int32"
-#"project_timeunit" = "string"
-#"project_startdate" = "int64"
-#"project_completedate" = "int64"
-#"payway" = "string"
-#"contract_guarantee" = "bool"
-#"bid_guarantee" = "bool"
-#"qualifies" = ""
-#"entidlist" = ""
-#"funds" = "string"
-#"review_experts" = "string"
-#"bidmethod" = "string"
-#"bidendtime" = "int64"
-#"bidopenaddress" = "string"
-#"docamount" = "float64"
-#"agencyrate" = "float64"
-#"agencyfee" = "float64"
-#"bidway" = "string"
-#"getdocmethod" = "string"
-#"china_bidding" = "string"
-#"purchasing_tag" = "string"
-#"multipackage" = "int32"
-#"isValidFile" = "bool"
-#"bid_field" = "string"
-#"bidstarttime" = "int64"
-#"docendtime" = "int64"
-#"docstarttime" = "int64"
-#"signendtime" = "int64"
-#"signstarttime" = "int64"
-#"issue_quota" = "float64"
-#"bidopen_shape" = "string"
-#"quote_mode" = "string"
-#"is_acquire_tender" = "bool"
-#"is_payment_deposit" = "bool"
-#"is_joint_bidding" = "bool"
-#"procurementlist" = ""
-#"object_type" = "string"  ##针对中国政府采购网,添加字段,区分货物、服务和工程
-#"subpackage" = "int64"
-#"package" = ""          ## 分包信息
-#[db.es.fieldprojectinfo] ## 作废不再使用
-#"approvecode" = "string"
-#"approvecontent" = "string"
-#"approvestatus" = "string"
-#"approvetime" = "string"
-#"approvedept" = "string"
-#"approvenumber" = "string"
-#"projecttype" = "string"
-#"approvecity" = "string"
-#[db.es.fieldpurchasinglist]
-#"itemname" = "string"
-#"item" = "string"
-#"brandname" = "string"
-#"model" = "string"
-#"unitname" = "string"
-#"number" = "float64"
-#"unitprice" = "float64"
-#"totalprice" = "float64"
-#[db.es.fieldprocurementlist]
-#"projectname" = "string"
-#"buyer" = "string"
-#"item" = "string"
-#"projectscope" = "string"
-#"expurasingtime" = "int64"
-#"totalprice" = "float64"
-#[db.es.fieldwinnerorder]
-#"sort" = "int"
-#"sortstr" = "string"
-#"entname" = "string"
-#[db.es.package] ## 分包新增字段
-#"winner" = "string"
-#"bidamount" = "float64"
-#"name" = "string"

+ 8 - 0
createEsIndex/config/conf.go

@@ -65,6 +65,14 @@ type db struct {
 	MongoQ mgo
 	Es     es
 	Oss    oss
+	MysqlB mysql
+}
+
+type mysql struct {
+	Addr     string
+	Dbname   string
+	Username string
+	Password string
 }
 
 //oss oss 阿里云配置

+ 37 - 0
createEsIndex/es_test.go

@@ -135,3 +135,40 @@ func TestGetIndexName(t *testing.T) {
 	fmt.Println("name ->", name)
 	fmt.Println(name)
 }
+
+func TestBuyer(t *testing.T) {
+	rowsPerPage := 1000
+	currentPage := 1
+
+	var total int
+	for {
+		fmt.Println("currentPage", currentPage)
+		arrEs := make([]map[string]interface{}, 0)
+		offset := (currentPage - 1) * rowsPerPage
+		query := fmt.Sprintf(`
+             SELECT * from goods
+			
+            LIMIT %d, %d;
+        `, offset, rowsPerPage)
+
+		result := MysqlB.SelectBySql(query)
+
+		if len(*result) > 0 {
+			for _, re := range *result {
+				arrEs = append(arrEs, re)
+			}
+
+		}
+
+		total = total + len(*result)
+		if len(*result) < rowsPerPage {
+			break
+		}
+
+		// 继续查询下一页
+		currentPage++
+	}
+
+	fmt.Println("over --------")
+	fmt.Println("total --------", total)
+}

+ 2 - 1
createEsIndex/go.mod

@@ -3,7 +3,7 @@ module esindex
 go 1.16
 
 require (
-	app.yhyue.com/data_processing/common_utils v0.0.0-20230427103005-4289580ee061
+	app.yhyue.com/data_processing/common_utils v0.0.0-20230615083858-55ea206e1f6e
 	github.com/BurntSushi/toml v1.2.0
 	github.com/aliyun/aliyun-oss-go-sdk v2.2.5+incompatible
 	github.com/olivere/elastic/v7 v7.0.32
@@ -11,4 +11,5 @@ require (
 	github.com/spf13/viper v1.15.0
 	go.mongodb.org/mongo-driver v1.10.2
 	go.uber.org/zap v1.23.0
+	gopkg.in/olivere/elastic.v2 v2.0.61 // indirect
 )

+ 10 - 0
createEsIndex/go.sum

@@ -1,5 +1,13 @@
+app.yhyue.com/data_processing/common_utils v0.0.0-20220830011833-76d58ef43f4f h1:5fUbVRwPM3oBsZgvG76Bia3I4SdwdBB6PvJ6B28Qkyc=
+app.yhyue.com/data_processing/common_utils v0.0.0-20220830011833-76d58ef43f4f/go.mod h1:9PlRUNzirlF/LL1W7fA7koCudxJe3uO5nshDWlCnGo8=
 app.yhyue.com/data_processing/common_utils v0.0.0-20230427103005-4289580ee061 h1:UaE1gor8fIhSBAXcZsNOQDMI5sD9LxGECXFh7Ik4rk4=
 app.yhyue.com/data_processing/common_utils v0.0.0-20230427103005-4289580ee061/go.mod h1:XMSY6tIzDnO/YQFjSb0OrOKl93ViGE0ejqcSCTlyHUs=
+app.yhyue.com/data_processing/common_utils v0.0.0-20230615030314-9b35edf4b4f7 h1:W+wt9hKHBZ42pXo75JUL4l0mymg1Ntsra2TnDCpk9MM=
+app.yhyue.com/data_processing/common_utils v0.0.0-20230615030314-9b35edf4b4f7/go.mod h1:XMSY6tIzDnO/YQFjSb0OrOKl93ViGE0ejqcSCTlyHUs=
+app.yhyue.com/data_processing/common_utils v0.0.0-20230615031256-1e18e5f863bd h1:PfT2Q5D2B2/Lpj0QxdOk1QSHpJeeaOiHMdcAS3tC1XY=
+app.yhyue.com/data_processing/common_utils v0.0.0-20230615031256-1e18e5f863bd/go.mod h1:XMSY6tIzDnO/YQFjSb0OrOKl93ViGE0ejqcSCTlyHUs=
+app.yhyue.com/data_processing/common_utils v0.0.0-20230615083858-55ea206e1f6e h1:jb995Ko1dqe6UnJVT5Gh6rBpAWgWEdtLhVDir4tyysk=
+app.yhyue.com/data_processing/common_utils v0.0.0-20230615083858-55ea206e1f6e/go.mod h1:XMSY6tIzDnO/YQFjSb0OrOKl93ViGE0ejqcSCTlyHUs=
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
@@ -473,6 +481,7 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V
 github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
 github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -1352,6 +1361,7 @@ gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3
 gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
+gopkg.in/olivere/elastic.v2 v2.0.61/go.mod h1:CTVyl1gckiFw1aLZYxC00g3f9jnHmhoOKcWF7W3c6n4=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

+ 18 - 5
createEsIndex/init.go

@@ -5,6 +5,7 @@ import (
 	"app.yhyue.com/data_processing/common_utils/elastic"
 	"app.yhyue.com/data_processing/common_utils/log"
 	"app.yhyue.com/data_processing/common_utils/mongodb"
+	"app.yhyue.com/data_processing/common_utils/mysqldb"
 	"context"
 	"esindex/config"
 	"fmt"
@@ -66,6 +67,14 @@ func InitMgo() {
 		Password:    config.Conf.DB.MongoQ.Password,
 	}
 	MgoQ.InitPool()
+
+	MysqlB = &mysqldb.Mysql{
+		Address:  config.Conf.DB.MysqlB.Addr,
+		DBName:   config.Conf.DB.MysqlB.Dbname,
+		UserName: config.Conf.DB.MysqlB.Username,
+		PassWord: config.Conf.DB.MysqlB.Password,
+	}
+	MysqlB.Init()
 }
 
 func InitEs() {
@@ -215,13 +224,17 @@ func GetIndexName(client *es7.Client, name string) (string, error) {
 	res, err := client.Aliases().Alias(name).Do(context.Background())
 	if err != nil {
 		// 错误处理
-		return "", err
+		if err.(*es7.Error).Status != 404 && err.(*es7.Error).Details != nil {
+			return "", err
+		}
 	}
 
-	for k, v := range res.Indices {
-		for _, vv := range v.Aliases {
-			if vv.AliasName == name {
-				return k, nil
+	if res != nil {
+		for k, v := range res.Indices {
+			for _, vv := range v.Aliases {
+				if vv.AliasName == name {
+					return k, nil
+				}
 			}
 		}
 	}

+ 14 - 4
createEsIndex/main.go

@@ -5,6 +5,7 @@ import (
 	"app.yhyue.com/data_processing/common_utils/elastic"
 	"app.yhyue.com/data_processing/common_utils/log"
 	"app.yhyue.com/data_processing/common_utils/mongodb"
+	"app.yhyue.com/data_processing/common_utils/mysqldb"
 	"app.yhyue.com/data_processing/common_utils/udp"
 	"encoding/json"
 	"esindex/config"
@@ -20,9 +21,10 @@ import (
 )
 
 var (
-	MgoB *mongodb.MongodbSim
-	MgoP *mongodb.MongodbSim
-	MgoQ *mongodb.MongodbSim
+	MgoB   *mongodb.MongodbSim
+	MgoP   *mongodb.MongodbSim
+	MgoQ   *mongodb.MongodbSim
+	MysqlB *mysqldb.Mysql
 
 	Es, Es1 *elastic.Elastic
 
@@ -178,6 +180,14 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					}()
 					biddingDelByExtracttype(data, mapInfo)
 				}()
+			case "buyer_all": //buyer 全量数据
+				pool <- true
+				go func() {
+					defer func() {
+						<-pool
+					}()
+					buyerall()
+				}()
 			default:
 				pool <- true
 				go func() {
@@ -211,7 +221,7 @@ func task_winneres() {
 }
 func task_buyeres() {
 	log.Info("定时任务,buyeres")
-	buyerEsTaskOnce()
+	buyerOnce()
 }
 
 type UdpNode struct {