Jianghan 2 роки тому
батько
коміт
c0a0c9cfe5

+ 13 - 3
README.md

@@ -8,7 +8,7 @@
 
 
 ## field_sync 数据处理流程-数据索引之前字段同步及处理
-+ 抽取字段同步到bidding表
++ 抽取字段同步到bidding表  
 + 剑鱼关键词处理  
 + isValidFile 附件有效字段  
 + entidlist 中标单位id字段  
@@ -16,7 +16,14 @@
 
 ## processing_ids 数据处理流程-id段保存
 + 定时5分钟,保存id段
-+ 保存id段(dataprocess=0,updatetime)—>招标分类(dataprocess=1,updatetime)—>标的物识别(dataprocess=2,updatetime)—>抽取(dataprocess=3,updatetime)—>字段清理(dataprocess=4,updatetime)—>业主分类(dataprocess=5,updatetime)—>判重(dataprocess=6,updatetime)—>bidding表字段同步(dataprocess=7,updatetime)
+  + 保存id段(dataprocess=0,updatetime)—>
+  + 招标分类(dataprocess=1,updatetime)—>
+  + 标的物识别(dataprocess=2,updatetime)—>
+  + 抽取(dataprocess=3,updatetime)—>
+  + 字段清理(dataprocess=4,updatetime)—>
+  + 业主分类(dataprocess=5,updatetime)—>
+  + 判重(dataprocess=6,updatetime)—>
+  + bidding表字段同步(dataprocess=7,updatetime)
 
 ## data_tidb 数据处理流程-数据同步到tidb库(bidding、proejctset)  
 + bidding数据  
@@ -31,4 +38,7 @@
   + 基础信息
 
 ## monitor 数据异常监控
-+ bidding_file数据累计异常监控(累计数据量>10000条)
++ bidding_file数据累计异常监控(累计数据量>10000条)  
+
+## data_fusion 数据融合-基于判重数据进行的信息融合
++ 

BIN
data_fusion/bin/data_fusion


+ 72 - 0
data_fusion/common.toml

@@ -0,0 +1,72 @@
+[serve]
+thread = 10
+fields = ["agency", "agencytel", "agencyperson", "bidendtime", "budget", "bidamount", "buyer", "buyerclass", "buyerperson", "buyertel", "buyeraddr", "bidway",
+    "district", "projectcode", "projectscope", "s_winner", "winnertel", "winerperson", "publishtime", "signaturedate", "docstarttime", "package", "attachments",
+    "project_timeunit", "project_completedate", "project_duration", "project_startdate", "winnerorder", "purchasinglist", "qualifies", "contractcode", "bidopentime",
+    "topscopeclass", "subscopeclass", "toptype", "subtype"]
+
+[serve.weight]
+projectname = 3
+projectcode = 1
+budget = 2
+bidamount = 2
+buyer = 3
+buyerclass = 1
+buyerperson = 1
+buyertel = 1
+agency = 3
+agencyperson = 1
+agencytel = 1
+toptype = 1
+subtype = 1
+topscopeclass = 1
+subscopeclass = 1
+s_winner = 3
+bidopentime = 1
+contractcode = 3
+qualifies = 1
+attachments = 2
+package = 1
+purchasinglist = 1
+winnerorder = 1
+
+[db]
+[db.mongo]
+addr = "192.168.3.207:27092"
+dbname = "wjh"
+coll = "bidding"
+size = 15
+user = ""
+password = ""
+[db.mongoS]
+addr = "192.168.3.207:27092"
+dbname = "wjh"
+coll = "spider_compete"
+size = 15
+user = ""
+password = ""
+[db.redis]
+addr = "fusion_id=192.168.3.207:1679"
+db = 7
+
+[mail]
+send = false
+to = "wangjianghan@topnet.net.cn"
+api = "http://172.17.145.179:19281/_send/_mail"
+
+# 日志
+[log]
+# 日志路径,为空将输出控制台
+logpath = ""
+# log size (M)
+maxsize = 10
+# compress log
+compress = true
+# log save  time (day)
+maxage =  7
+# save total log file total
+maxbackups = 10
+# log level
+loglevel  = "debug"
+# text or json output
+format = "text"

+ 84 - 0
data_fusion/config/conf.go

@@ -0,0 +1,84 @@
+package config
+
+import (
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/BurntSushi/toml"
+)
+
+var (
+	// Conf crocodile conf
+	Conf *conf
+)
+
+// Init Config
+func Init(conf string) {
+	_, err := toml.DecodeFile(conf, &Conf)
+	if err != nil {
+		fmt.Printf("Err %v", err)
+		os.Exit(1)
+	}
+}
+
+type conf struct {
+	Serve serve
+	DB    db
+	Mail  mail
+	Log   log
+}
+
+type serve struct {
+	Thread int
+	Fields []string
+	Weight map[string]int
+}
+
+type mail struct {
+	Send bool
+	To   string
+	Api  string
+}
+
+// Log Config
+type log struct {
+	LogPath    string
+	MaxSize    int
+	Compress   bool
+	MaxAge     int
+	MaxBackups int
+	LogLevel   string
+	Format     string
+}
+
+type db struct {
+	Mongo  mgo
+	MongoS mgo
+	Redis  redis
+}
+
+type mgo struct {
+	Addr     string
+	Dbname   string
+	Coll     string
+	Size     int
+	User     string
+	Password string
+}
+
+type redis struct {
+	Addr string
+	Db   int
+}
+
+type duration struct {
+	time.Duration
+}
+
+// UnmarshalText parse 10s to time.Time
+func (d *duration) UnmarshalText(text []byte) error {
+	var err error
+	d.Duration, err = time.ParseDuration(string(text))
+	return err
+}

+ 104 - 0
data_fusion/config/conf_test.go

@@ -0,0 +1,104 @@
+package config
+
+import (
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+var confs = `# log
+[log]
+logpath = ""
+maxsize = 10
+compress = true
+maxage =  7
+maxbackups = 10
+loglevel  = "info"
+format = "text"
+
+[serve]
+[[serve.weight]]
+projectname = 3
+projectcode = 1
+budget = 2
+bidamount = 2
+buyer = 3
+buyerclass = 1
+buyerperson = 1
+buyertel = 1
+agency = 3
+agencyperson = 1
+agencytel = 1
+toptype = 1
+subtype = 1
+topscopeclass = 1
+subscopeclass = 1
+s_winner = 3
+bidopentime = 1
+contractcode = 3
+qualifies = 1
+attachments = 2
+package = 1
+purchasinglist = 1
+winerorder = 1
+
+[db]
+[db.mongo]
+addr = "192.168.3.207:27092"
+dbname = "qfw"
+size = 10
+user = ""
+password = ""
+[db.mongo1]
+addr = "192.168.3.207:27092"
+dbname = "wjh"
+size = 5
+user = ""
+password = ""
+
+[db.es]
+addr = "http://192.168.3.206:9800"
+size = 5
+indexm = "medical_institution_v1"
+typem = "medical_institution"
+indexs = "supplier_product_v1"
+types = "supplier_product"
+
+
+[udp]
+locport = ":1787"
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1783
+stype = "bidding"
+remark = "索引"
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1782
+stype = ""
+remark = ""
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1680
+stype = "subject"
+remark = "医疗数据"
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1681
+stype = ""
+remark = ""
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1970
+stype = ""
+remark = ""
+
+`
+
+func TestInit(t *testing.T) {
+	testfile := "/tmp/crocodile.toml"
+	ioutil.WriteFile(testfile, []byte(confs), 0644)
+	Init(testfile)
+	t.Logf("%+v", Conf.DB.Mongo.Addr)
+	os.Remove(testfile)
+}

+ 9 - 0
data_fusion/go.mod

@@ -0,0 +1,9 @@
+module data_fusion
+
+go 1.16
+
+require (
+	app.yhyue.com/data_processing/common_utils v0.0.0-20221205033056-885644941005 // indirect
+	github.com/BurntSushi/toml v1.2.1 // indirect
+	go.uber.org/zap v1.24.0 // indirect
+)

+ 114 - 0
data_fusion/go.sum

@@ -0,0 +1,114 @@
+app.yhyue.com/data_processing/common_utils v0.0.0-20221205033056-885644941005 h1:AEEi+8ao9pTVqPIh6uVvjxBby/i43fFj7DwVo+feDAE=
+app.yhyue.com/data_processing/common_utils v0.0.0-20221205033056-885644941005/go.mod h1:9PlRUNzirlF/LL1W7fA7koCudxJe3uO5nshDWlCnGo8=
+github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak=
+github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
+github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
+github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dchest/captcha v1.0.0 h1:vw+bm/qMFvTgcjQlYVTuQBJkarm5R0YSsDKhm1HZI2o=
+github.com/dchest/captcha v1.0.0/go.mod h1:7zoElIawLp7GUMLcj54K9kbw+jEyvz2K0FDdRRYhvWo=
+github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
+github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
+github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gomodule/redigo v1.8.9 h1:Sl3u+2BI/kk+VEatbj0scLdrFhjPmbxOc1myhDP41ws=
+github.com/gomodule/redigo v1.8.9/go.mod h1:7ArFNvsTjH8GMMzB4uy1snslv2BwmginuMs06a1uzZE=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
+github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0=
+github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
+github.com/nsqio/go-nsq v1.1.0/go.mod h1:vKq36oyeVXgsS5Q8YEO7WghqidAVXQlcFxzQbQTuDEY=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
+github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
+github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
+github.com/xdg-go/scram v1.1.1 h1:VOMT+81stJgXW3CpHyqHN3AXDYIMsx56mEFrB37Mb/E=
+github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
+github.com/xdg-go/stringprep v1.0.3 h1:kdwGpVNwPFtjs98xCGkHjQtGKh86rDcRZN17QEMCOIs=
+github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
+github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA=
+github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
+github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+go.mongodb.org/mongo-driver v1.10.1 h1:NujsPveKwHaWuKUer/ceo9DzEe7HIj1SlJ6uvXZG0S4=
+go.mongodb.org/mongo-driver v1.10.1/go.mod h1:z4XpeoU6w+9Vht+jAFyLgVrD+jGSQQe0+CBWFHNiHt8=
+go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
+go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI=
+go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
+go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
+go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
+go.uber.org/zap v1.22.0 h1:Zcye5DUgBloQ9BaT4qc9BnjOFog5TvBSAGkJ3Nf70c0=
+go.uber.org/zap v1.22.0/go.mod h1:H4siCOZOrAolnUPJEkfaSjDqyP+BDS0DdDWzwcgt3+U=
+go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60=
+go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d h1:sK3txAijHtOK88l68nt020reeT1ZdKLIYetKl95FzVY=
+golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE=
+golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw=
+gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
+gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
+gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
+gopkg.in/olivere/elastic.v2 v2.0.61/go.mod h1:CTVyl1gckiFw1aLZYxC00g3f9jnHmhoOKcWF7W3c6n4=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

+ 69 - 0
data_fusion/init.go

@@ -0,0 +1,69 @@
+package main
+
+import (
+	util "app.yhyue.com/data_processing/common_utils"
+	"app.yhyue.com/data_processing/common_utils/log"
+	"app.yhyue.com/data_processing/common_utils/mongodb"
+	"data_fusion/config"
+	"fmt"
+	"go.uber.org/zap"
+	"os"
+)
+
+var (
+	MgoB, MgoS *mongodb.MongodbSim
+
+	CompeteSite []string // 竞品站点
+
+	updatePool chan []map[string]interface{}
+	updateSp   chan bool
+	recordPool chan []map[string]interface{}
+	recordSp   chan bool
+)
+
+func initData() {
+	info, _ := MgoS.Find(config.Conf.DB.MongoS.Coll, nil, nil, nil, false, -1, -1)
+	if info != nil && len(*info) > 0 {
+		for _, m := range *info {
+			CompeteSite = append(CompeteSite, util.ObjToString(m["site"]))
+		}
+	}
+	log.Info("initData", zap.Int("CompeteSite", len(CompeteSite)))
+}
+
+func InitMgo() {
+	MgoB = &mongodb.MongodbSim{
+		MongodbAddr: config.Conf.DB.Mongo.Addr,
+		DbName:      config.Conf.DB.Mongo.Dbname,
+		Size:        config.Conf.DB.Mongo.Size,
+		UserName:    config.Conf.DB.Mongo.User,
+		Password:    config.Conf.DB.Mongo.Password,
+	}
+	MgoB.InitPool()
+	MgoS = &mongodb.MongodbSim{
+		MongodbAddr: config.Conf.DB.MongoS.Addr,
+		DbName:      config.Conf.DB.MongoS.Dbname,
+		Size:        config.Conf.DB.MongoS.Size,
+		UserName:    config.Conf.DB.MongoS.User,
+		Password:    config.Conf.DB.MongoS.Password,
+	}
+	MgoS.InitPool()
+}
+
+func InitLog() {
+	logcfg := config.Conf.Log
+
+	err := log.InitLog(
+		log.Path(logcfg.LogPath),
+		log.Level(logcfg.LogLevel),
+		log.Compress(logcfg.Compress),
+		log.MaxSize(logcfg.MaxSize),
+		log.MaxBackups(logcfg.MaxBackups),
+		log.MaxAge(logcfg.MaxAge),
+		log.Format(logcfg.Format),
+	)
+	if err != nil {
+		fmt.Printf("InitLog failed: %v\n", err)
+		os.Exit(1)
+	}
+}

+ 307 - 0
data_fusion/main.go

@@ -0,0 +1,307 @@
+package main
+
+import (
+	util "app.yhyue.com/data_processing/common_utils"
+	"app.yhyue.com/data_processing/common_utils/log"
+	"app.yhyue.com/data_processing/common_utils/mongodb"
+	"app.yhyue.com/data_processing/common_utils/redis"
+	"data_fusion/config"
+	"fmt"
+	"go.uber.org/zap"
+	"reflect"
+	"strings"
+	"sync"
+	"time"
+)
+
+func init() {
+	config.Init("./common.toml")
+	InitLog()
+	InitMgo()
+	redis.InitRedis1(config.Conf.DB.Redis.Addr, config.Conf.DB.Redis.Db)
+
+	initData()
+
+	updatePool = make(chan []map[string]interface{}, 5000)
+	updateSp = make(chan bool, 5)
+	recordPool = make(chan []map[string]interface{}, 5000)
+	recordSp = make(chan bool, 5)
+}
+
+func main() {
+	go updateMethod()
+	go updateMethod1()
+
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	ch := make(chan bool, config.Conf.Serve.Thread)
+	wg := &sync.WaitGroup{}
+
+	q := map[string]interface{}{"_id": mongodb.StringTOBsonId("639751bb063a7b816e026aa1")}
+	it := sess.DB(config.Conf.DB.Mongo.Dbname).C("bidding_fusion").Find(q).Select(nil).Iter()
+	count := 0
+	for tmp := make(map[string]interface{}); it.Next(tmp); count++ {
+		if count%2000 == 0 {
+			log.Info("main", zap.Int("current:", count))
+		}
+		if repeat := util.IntAll(tmp["repeat"]); repeat != 1 {
+			continue
+		}
+		ch <- true
+		wg.Add(1)
+		go func(tmp map[string]interface{}) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			repeatId := util.ObjToString(tmp["repeat_id"])
+			if str := redis.GetStr("fusion_id", repeatId); str != "" {
+				mid := strings.Split(str, "-")[0]
+				tmp1, _ := MgoB.FindById("bidding_fusion", mid, nil)
+				w, s := getWeight(tmp)
+				w1, s1 := getWeight(*tmp1)
+				util.Debug(w, s, w1, s1)
+				var update map[string]interface{}
+				if w > w1 {
+					update = mergeTmp(tmp, *tmp1)
+					//if len(update) > 0 {
+					//	updatePool <- []map[string]interface{}{
+					//		{"_id": tmp["_id"]},
+					//		{"$set": update},
+					//	}
+					//}
+					record := make(map[string]interface{})
+					record["$set"] = map[string]interface{}{
+						"template_id":     mongodb.BsonIdToSId(tmp["_id"]),
+						"template_weight": w,
+					}
+					update1 := util.DeepCopy(update).(map[string]interface{})
+					update1["infoid"] = mongodb.BsonIdToSId(tmp["_id"])
+					update1["weight"] = w
+					if w == 0 {
+						update1["remark"] = s
+					}
+					record["$push"] = map[string]interface{}{
+						"ids":    mongodb.BsonIdToSId(tmp["_id"]),
+						"record": update1,
+					}
+					recordPool <- []map[string]interface{}{
+						{"_id": mongodb.StringTOBsonId(repeatId)},
+						record,
+					}
+					redis.PutCKV("fusion_id", repeatId, fmt.Sprintf("%s-%s", mongodb.BsonIdToSId(tmp["_id"]), str))
+				} else {
+					update = mergeTmp(*tmp1, tmp)
+					//if len(update) > 0 {
+					//	updatePool <- []map[string]interface{}{
+					//		{"_id": (*tmp1)["_id"]},
+					//		{"$set": update},
+					//	}
+					//}
+					record := make(map[string]interface{})
+					record["$set"] = map[string]interface{}{
+						"template_weight": w1,
+					}
+					update1 := util.DeepCopy(update).(map[string]interface{})
+					update1["infoid"] = mongodb.BsonIdToSId(tmp["_id"])
+					update1["weight"] = w
+					if w == 0 {
+						update1["remark"] = s
+					}
+					record["$push"] = map[string]interface{}{
+						"ids":    mongodb.BsonIdToSId(tmp["_id"]),
+						"record": update1,
+					}
+					recordPool <- []map[string]interface{}{
+						{"_id": mongodb.StringTOBsonId(repeatId)},
+						record,
+					}
+					redis.PutCKV("fusion_id", repeatId, fmt.Sprintf("%s-%s", str, mongodb.BsonIdToSId(tmp["_id"])))
+				}
+			} else {
+				tmp1, _ := MgoB.FindById("bidding_fusion", repeatId, nil)
+				w, s := getWeight(tmp)
+				w1, s1 := getWeight(*tmp1)
+				var update map[string]interface{}
+				if w > w1 {
+					update = mergeTmp(tmp, *tmp1)
+					//if len(update) > 0 {
+					//	updatePool <- []map[string]interface{}{
+					//		{"_id": tmp["_id"]},
+					//		{"$set": update},
+					//	}
+					//}
+					record := make(map[string]interface{})
+					record["_id"] = (*tmp1)["_id"]
+					record["template_id"] = mongodb.BsonIdToSId(tmp["_id"])
+					record["template_weight"] = w
+					record["ids"] = []string{mongodb.BsonIdToSId(tmp["_id"]), mongodb.BsonIdToSId(repeatId)}
+					var recordlist []map[string]interface{}
+					recordlist = append(recordlist, map[string]interface{}{"infoid": mongodb.BsonIdToSId(tmp["_id"]), "weight": w})
+					update1 := util.DeepCopy(update).(map[string]interface{})
+					update1["infoid"] = mongodb.BsonIdToSId((*tmp1)["_id"])
+					update1["weight"] = w1
+					if w1 == 0 {
+						update1["remark"] = s1
+					}
+					recordlist = append(recordlist, update1)
+					record["record"] = recordlist
+					recordPool <- []map[string]interface{}{
+						{"_id": (*tmp1)["_id"]},
+						{"$set": record},
+					}
+					redis.PutCKV("fusion_id", repeatId, fmt.Sprintf("%s-%s", mongodb.BsonIdToSId(tmp["_id"]), mongodb.BsonIdToSId((*tmp1)["_id"])))
+				} else {
+					update = mergeTmp(*tmp1, tmp)
+					//if len(update) > 0 {
+					//	updatePool <- []map[string]interface{}{
+					//		{"_id": (*tmp1)["_id"]},
+					//		{"$set": update},
+					//	}
+					//}
+					record := make(map[string]interface{})
+					record["_id"] = (*tmp1)["_id"]
+					record["template_id"] = mongodb.BsonIdToSId((*tmp1)["_id"])
+					record["template_weight"] = w1
+					record["ids"] = []string{mongodb.BsonIdToSId(tmp["_id"]), mongodb.BsonIdToSId((*tmp1)["_id"])}
+					var recordlist []map[string]interface{}
+					recordlist = append(recordlist, map[string]interface{}{"infoid": mongodb.BsonIdToSId((*tmp1)["_id"]), "weight": w1})
+					update1 := util.DeepCopy(update).(map[string]interface{})
+					update1["infoid"] = mongodb.BsonIdToSId(tmp["_id"])
+					update1["weight"] = w
+					if w == 0 {
+						update1["remark"] = s
+					}
+					recordlist = append(recordlist, update1)
+					record["record"] = recordlist
+					recordPool <- []map[string]interface{}{
+						{"_id": (*tmp1)["_id"]},
+						{"$set": record},
+					}
+					redis.PutCKV("fusion_id", repeatId, fmt.Sprintf("%s-%s", mongodb.BsonIdToSId((*tmp1)["_id"]), mongodb.BsonIdToSId(tmp["_id"])))
+				}
+			}
+
+		}(tmp)
+		tmp = map[string]interface{}{}
+	}
+
+	c := make(chan bool, 1)
+	<-c
+}
+
+func getWeight(tmp map[string]interface{}) (int, string) {
+	var w int
+	if util.IntAll(tmp["publishtime"]) <= 0 {
+		return 0, "发布时间小于0"
+	}
+	if BinarySearch(CompeteSite, util.ObjToString(tmp["site"])) > -1 {
+		return 0, "竞品网站数据"
+	}
+	for k, v := range config.Conf.Serve.Weight {
+		if tmp[k] != nil {
+			util.Debug(k)
+			if reflect.TypeOf(tmp[k]).String() == "string" {
+				if util.ObjToString(tmp[k]) != "" {
+					w += v
+				}
+			} else if reflect.TypeOf(tmp[k]).String() == "float64" {
+				if util.Float64All(tmp[k]) > 0 {
+					w += v
+				}
+			} else {
+				w += v
+			}
+		}
+	}
+	return w, ""
+}
+
+// @Description tmp模版数据, tmp1补充数据
+// @Author J 2023/1/3 11:31
+func mergeTmp(tmp map[string]interface{}, tmp1 map[string]interface{}) map[string]interface{} {
+	update := make(map[string]interface{})
+	for _, v := range config.Conf.Serve.Fields {
+		if tmp[v] == nil && tmp1[v] != nil {
+			if reflect.TypeOf(tmp1[v]).String() == "string" && util.ObjToString(tmp1[v]) != "" {
+				update[v] = util.ObjToString(tmp1[v])
+			} else if reflect.TypeOf(tmp1[v]).String() == "[]interface {}" && len(tmp1[v].([]interface{})) > 0 {
+				update[v] = tmp1[v]
+			} else {
+				update[v] = tmp1[v]
+			}
+		}
+	}
+	return update
+}
+
+func updateMethod() {
+	arru := make([][]map[string]interface{}, 500)
+	indexu := 0
+	for {
+		select {
+		case v := <-recordPool:
+			arru[indexu] = v
+			indexu++
+			if indexu == 500 {
+				recordSp <- true
+				go func(arru [][]map[string]interface{}) {
+					defer func() {
+						<-recordSp
+					}()
+					MgoB.UpSertBulk("bidding_fusion_record", arru...)
+				}(arru)
+				arru = make([][]map[string]interface{}, 500)
+				indexu = 0
+			}
+		case <-time.After(1000 * time.Millisecond):
+			if indexu > 0 {
+				recordSp <- true
+				go func(arru [][]map[string]interface{}) {
+					defer func() {
+						<-recordSp
+					}()
+					MgoB.UpSertBulk("bidding_fusion_record", arru...)
+				}(arru[:indexu])
+				arru = make([][]map[string]interface{}, 500)
+				indexu = 0
+			}
+		}
+	}
+}
+
+func updateMethod1() {
+	arru := make([][]map[string]interface{}, 500)
+	indexu := 0
+	for {
+		select {
+		case v := <-updatePool:
+			arru[indexu] = v
+			indexu++
+			if indexu == 500 {
+				updateSp <- true
+				go func(arru [][]map[string]interface{}) {
+					defer func() {
+						<-updateSp
+					}()
+					MgoB.UpdateBulk("bidding_fusion", arru...)
+				}(arru)
+				arru = make([][]map[string]interface{}, 500)
+				indexu = 0
+			}
+		case <-time.After(1000 * time.Millisecond):
+			if indexu > 0 {
+				updateSp <- true
+				go func(arru [][]map[string]interface{}) {
+					defer func() {
+						<-updateSp
+					}()
+					MgoB.UpdateBulk("bidding_fusion", arru...)
+				}(arru[:indexu])
+				arru = make([][]map[string]interface{}, 500)
+				indexu = 0
+			}
+		}
+	}
+}

+ 19 - 0
data_fusion/tools.go

@@ -0,0 +1,19 @@
+package main
+
+import "sort"
+
+func BinarySearch(s []string, k string) int {
+	sort.Strings(s)
+	lo, hi := 0, len(s)-1
+	for lo <= hi {
+		m := (lo + hi) >> 1
+		if s[m] < k {
+			lo = m + 1
+		} else if s[m] > k {
+			hi = m - 1
+		} else {
+			return m
+		}
+	}
+	return -1
+}

+ 20 - 6
field_py/main.go

@@ -28,6 +28,9 @@ var (
 	updateSp     chan bool
 	updateEsPool chan []map[string]interface{}
 	updateEsSp   chan bool
+
+	UdpChan      = make(chan map[string]interface{}, 500)
+	SingleThread = make(chan bool, 1)
 )
 
 func init() {
@@ -59,8 +62,20 @@ func main() {
 	go checkMapJob()
 	go updateMethod()
 
-	ch := make(chan bool, 1)
-	<-ch
+	for {
+		mapinfo, ok := <-UdpChan
+		if !ok {
+			continue
+		}
+		SingleThread <- true
+		go func(m map[string]interface{}) {
+			defer func() {
+				<-SingleThread
+			}()
+			log.Info("start dispose ...", zap.Any("key", mapinfo["key"]))
+			getIntention(m)
+		}(mapinfo)
+	}
 }
 func InitMgo() {
 	MgoB = &mongodb.MongodbSim{
@@ -93,13 +108,12 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 		} else {
 			//udp成功回写
 			if k := util.ObjToString(mapInfo["key"]); k != "" {
-				UdpClient.WriteUdp([]byte(k), udp.OP_NOOP, ra)
+				go UdpClient.WriteUdp([]byte(k), udp.OP_NOOP, ra)
 			} else {
 				k = fmt.Sprintf("%s-%s-%s", gtid, lteid, util.ObjToString(mapInfo["stype"]))
-				UdpClient.WriteUdp([]byte(k), udp.OP_NOOP, ra)
+				go UdpClient.WriteUdp([]byte(k), udp.OP_NOOP, ra)
 			}
-			log.Info("start dispose ...")
-			getIntention(gtid, lteid, mapInfo)
+			UdpChan <- mapInfo
 		}
 	case udp.OP_NOOP: //下个节点回应
 		ok := string(data)

+ 13 - 11
field_py/task.go

@@ -28,8 +28,10 @@ var IpDialErrNum, IpGetErrNum, ExtractDialErrNum = int64(0), int64(0), int64(0)
 
 //@Description 处理字段procurementlist、review_experts
 // @Author J 2022/8/31 14:57
-func getIntention(gtid, lteid string, mapinfo map[string]interface{}) {
+func getIntention(mapinfo map[string]interface{}) {
 	defer util.Catch()
+	gtid, _ := mapinfo["gtid"].(string)
+	lteid, _ := mapinfo["lteid"].(string)
 
 	MgoB.Update("bidding_processing_ids", bson.M{"gtid": gtid}, bson.M{"$set": bson.M{"dataprocess": 2, "updatetime": time.Now().Unix()}}, false, false)
 
@@ -120,7 +122,7 @@ func taskA(tmp map[string]interface{}, gtid, lteid string) map[string]interface{
 		return nil
 	}
 	//处理数据
-	result, err := rpcGetFieldP(string(reqStr))
+	result, err := rpcGetFieldP(string(reqStr), id)
 	if err != nil { //保存处理异常信息
 		ErrorInfoCache <- map[string]interface{}{
 			"err":        err.Error(),
@@ -153,7 +155,7 @@ func taskB(tmp map[string]interface{}, gtid, lteid string) map[string]interface{
 		return nil
 	}
 	//处理数据
-	result, err := rpcGetFieldR(string(reqStr))
+	result, err := rpcGetFieldR(string(reqStr), id)
 	if err != nil { //保存处理异常信息
 		ErrorInfoCache <- map[string]interface{}{
 			"err":        err.Error(),
@@ -168,7 +170,7 @@ func taskB(tmp map[string]interface{}, gtid, lteid string) map[string]interface{
 	return result
 }
 
-func rpcGetFieldP(reqStr string) (map[string]interface{}, error) {
+func rpcGetFieldP(reqStr, id string) (map[string]interface{}, error) {
 	defer util.Catch()
 	//获取ip、port服务
 	ipConn, ipErr := grpc.Dial(config.Conf.Serve.GrpcAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
@@ -211,7 +213,7 @@ func rpcGetFieldP(reqStr string) (map[string]interface{}, error) {
 	req := &proto.GoodsRequest{
 		Contents: reqStr,
 	}
-	ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*1)
+	ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*2)
 	defer cancel()
 	resp, err := client.GoodsExtract(ctx, req)
 	if err != nil {
@@ -226,14 +228,14 @@ func rpcGetFieldP(reqStr string) (map[string]interface{}, error) {
 	if err != nil {
 		return nil, err
 	}
-	if time.Since(start).Minutes() > 5 {
+	if time.Since(start).Minutes() > 2 {
 		// py接口字段识别超过5分钟
-		log.Info("rpcGetFieldP 字段识别超过5min", zap.Any("serve", "goods_service"), zap.Any("reqStr", reqStr), zap.Any("ip+port", addr))
+		log.Info("rpcGetFieldP 字段识别超过2min", zap.Any("serve", "goods_service"), zap.String("id", id), zap.Any("ip+port", addr))
 	}
 	return result, nil
 }
 
-func rpcGetFieldR(reqStr string) (map[string]interface{}, error) {
+func rpcGetFieldR(reqStr, id string) (map[string]interface{}, error) {
 	defer util.Catch()
 	//获取ip、port服务
 	ipConn, ipErr := grpc.Dial(config.Conf.Serve.GrpcAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
@@ -276,7 +278,7 @@ func rpcGetFieldR(reqStr string) (map[string]interface{}, error) {
 	req := &proto.ContentRequest{
 		Contents: reqStr,
 	}
-	ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*1)
+	ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*2)
 	defer cancel()
 	resp, err := client.Extract(ctx, req)
 	if err != nil {
@@ -286,9 +288,9 @@ func rpcGetFieldR(reqStr string) (map[string]interface{}, error) {
 	if json.Unmarshal([]byte(resp.Results), &result) != nil {
 		return nil, errors.New("Json Unmarshal Error")
 	}
-	if time.Since(start).Minutes() > 5 {
+	if time.Since(start).Minutes() > 2 {
 		// py接口字段识别超过5分钟
-		log.Info("rpcGetFieldR 字段识别超过5min", zap.Any("serve", "extract_expert_service"), zap.Any("reqStr", reqStr), zap.Any("ip+port", addr))
+		log.Info("rpcGetFieldR 字段识别超过2min", zap.Any("serve", "extract_expert_service"), zap.Any("id", id), zap.Any("ip+port", addr))
 	}
 	return result, nil
 }

+ 2 - 1
field_sync/common.toml

@@ -7,7 +7,8 @@ fields = ["buyerzipcode", "winnertel", "winnerperson", "contractcode", "winnerad
     "area", "city", "district", "s_winner", "toptype", "subtype", "subscopeclass", "s_subscopeclass", "dataging", "winnerorder", "project_scale",
     "project_duration", "project_timeunit", "project_startdate", "project_completedate", "payway", "contract_guarantee", "bid_guarantee", "qualifies",
     "funds", "review_experts", "bidmethod", "bidendtime", "bidopenaddress", "docamount", "bidway", "agencyrate", "agencyfee", "getdocmethod", "purchasing_tag",
-    "package", "history_updatetime"
+    "package", "history_updatetime", "total_investment", "owner", "projecttype", "project_person", "project_phone", "approvedept", "construction_area",
+    "floor_area"
 ]
 
 [udp]

+ 30 - 1
field_sync/config/conf_test.go

@@ -42,12 +42,41 @@ typem = "medical_institution"
 indexs = "supplier_product_v1"
 types = "supplier_product"
 
+
+[udp]
+locport = ":1787"
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1783
+stype = "bidding"
+remark = "索引"
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1782
+stype = ""
+remark = ""
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1680
+stype = "subject"
+remark = "医疗数据"
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1681
+stype = ""
+remark = ""
+[[udp.next]]
+addr = "127.0.0.1"
+port = 1970
+stype = ""
+remark = ""
+
 `
 
 func TestInit(t *testing.T) {
 	testfile := "/tmp/crocodile.toml"
 	ioutil.WriteFile(testfile, []byte(confs), 0644)
 	Init(testfile)
-	t.Logf("%+v", Conf.Serve.GrpcAddr)
+	t.Logf("%+v", Conf.Udp.Next)
 	os.Remove(testfile)
 }

+ 2 - 11
field_sync/task.go

@@ -30,7 +30,7 @@ func biddingTask(data []byte, mapInfo map[string]interface{}) {
 	if stype == "bidding" {
 		uq := bson.M{"gtid": bson.M{"$gte": util.ObjToString(mapInfo["gtid"])},
 			"lteid": bson.M{"$lte": util.ObjToString(mapInfo["lteid"])}}
-		MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 6, "updatetime": time.Now().Unix()}}, false, false)
+		MgoB.Update("bidding_processing_ids", uq, bson.M{"$set": bson.M{"dataprocess": 7, "updatetime": time.Now().Unix()}}, false, true)
 	}
 	// 领域标签处理的数据 id段
 	if stype == "bidding_history" {
@@ -71,7 +71,7 @@ func biddingTask(data []byte, mapInfo map[string]interface{}) {
 	count, _ := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(&q).Count()
 	log.Info("bidding表", zap.Int64("同步总数:", count))
 	c := 0
-	if count < 200000 {
+	if count < 500000 {
 		var res []map[string]interface{}
 		result := biddingConn.DB(MgoB.DbName).C(config.Conf.DB.MongoB.Coll).Find(q).Select(map[string]interface{}{
 			"contenthtml": 0,
@@ -495,10 +495,6 @@ func taskinfo(id string) {
 			extractMap["entidlist"] = cid
 		}
 		MgoE.UpdateById(config.Conf.DB.MongoE.Coll, id, map[string]interface{}{"$set": extractMap})
-		//updateExtPool <- []map[string]interface{}{
-		//	{"_id": mongodb.StringTOBsonId(id)},
-		//	{"$set": extractMap},
-		//}
 	}
 
 	// 附件有效字段
@@ -511,11 +507,6 @@ func taskinfo(id string) {
 	}
 	if len(update) > 0 {
 		MgoB.UpdateById(config.Conf.DB.MongoB.Coll, id, map[string]interface{}{"$set": update})
-		//updateBidPool <- []map[string]interface{}{{
-		//	"_id": mongodb.StringTOBsonId(id),
-		//},
-		//	{"$set": update},
-		//}
 	}
 
 	mapinfo := map[string]interface{}{