123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480 |
- package main
- import (
- "context"
- "encoding/json"
- "fmt"
- "github.com/garyburd/redigo/redis"
- "go.mongodb.org/mongo-driver/bson"
- "go.mongodb.org/mongo-driver/bson/primitive"
- "go.mongodb.org/mongo-driver/mongo"
- "go.mongodb.org/mongo-driver/mongo/options"
- "log"
- mu "mfw/util"
- "net"
- elastic "qfw/common/src/qfw/util/elastic"
- "qfw/util"
- "regexp"
- "sort"
- "strings"
- "time"
- )
- var
- (
- Config = make(map[string]string)
- Fields []string
- SourceClient, FClient *mongo.Client
- RedisPool redis.Pool
- Addrs = make(map[string]interface{}, 0) //省市县
- udpclient mu.UdpClient //udp对象
- ElasticClientIndex, ElasticClientType string
- Reg_xing = regexp.MustCompile(`\*{1,}`)
- Reg_person = regexp.MustCompile("[\u4E00-\u9FA5\\s]+")
- Reg_tel = regexp.MustCompile(`^[0-9\-\s]*$`)
- )
- /**
- 新增
- 初始化
- */
- func init() {
- log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
- util.ReadConfig(&Config)
- log.Println(Config)
- Fields = []string{"_id", "contact", "partners", "business_scope", "company_address", "capital",
- "establish_date", "legal_person", "company_type", "district", "city", "province", "area_code", "credit_no",
- "company_name", "history_name", "topscopeclass", "wechat_accounts", "alias", "website", "report_websites"}
- var err error
- //mongo init
- SourceClient, err = mongo.NewClient(options.Client().ApplyURI("mongodb://" + Config["mgoinit"]).SetMaxPoolSize(20))
- if err != nil {
- log.Fatalln(err)
- }
- c1 := context.Background()
- err = SourceClient.Connect(c1)
- //defer SourceClient.Disconnect(c1)
- if err != nil {
- log.Fatalln(err)
- }
- FClient, err = mongo.NewClient(options.Client().ApplyURI("mongodb://" + Config["mgourl"]).SetMaxPoolSize(20))
- if err != nil {
- log.Fatalln(err)
- }
- cc := context.Background()
- err = FClient.Connect(cc)
- //defer FClient.Disconnect(cc)
- if err != nil {
- log.Fatalln(err)
- }
- //加载省市县代码
- cursor2, err := FClient.Database(Config["mgodb_extract_kf"]).Collection("address").Find(cc, bson.M{},
- options.Find().SetProjection(bson.M{"province": 1, "code": 1, "city": 1, "district": 1}))
- defer cursor2.Close(cc)
- defer FClient.Connect(cc)
- if err != nil {
- log.Fatalln(err)
- }
- for cursor2.Next(cc) {
- tmp := make(map[string]interface{})
- if err := cursor2.Decode(&tmp); err != nil {
- log.Println(err)
- continue
- } else {
- code := tmp["code"]
- if code != nil && strings.TrimSpace(code.(string)) != "" {
- Addrs[fmt.Sprint(code)] = tmp
- }
- }
- }
- log.Println(len(Addrs))
- //es.NewClient(es.SetURL(addrs...), es.SetMaxRetries(2), es.SetSniff(false))
- //es init
- elastic.InitElasticSize(Config["elasticsearch"], 10)
- //esConn := elastic.GetEsConn()
- //defer elastic.DestoryEsConn(esConn)
- //log.Println(esConn.Index().Index(Config["elasticsearch_index"]).Type(Config["elasticsearch_type"]).Id("123").BodyJson(map[string]interface{}{"testname":"六盘水市钟山开发区亿农科贸有限公司"}).Refresh(true).Do())
- //log.Println(esConn.Delete().Index(Config["elasticsearch_index"]).Type(Config["elasticsearch_type"]).Id("123").Refresh(true).Do())
- //if ESclient, err = elastic.NewClient(elastic.SetURL(Config["elasticsearch"]), elastic.SetHealthcheckTimeout(time.Minute)); err != nil {
- // log.Println(Config["elasticsearch"])
- // log.Fatalln("ElasticClient err:", err)
- //} else {
- // ElasticClientIndex = Config["elasticsearch_index"]
- // ElasticClientType = Config["elasticsearch_type"]
- //}
- //redis
- RedisPool = redis.Pool{
- MaxIdle: 10,
- IdleTimeout: 240 * time.Second,
- Dial: func() (redis.Conn, error) {
- conn, e := redis.Dial("tcp", Config["redis"])
- if e != nil {
- return conn, e
- }
- _, err = conn.Do("SELECT", "1")
- if err != nil {
- return nil, err
- }
- return conn, nil
- },}
- c := RedisPool.Get()
- if _, err := c.Do("PING"); err != nil {
- log.Fatalln(err)
- }
- defer c.Close()
- }
- func main() {
- //udp
- updport := Config["udpport"]
- udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
- udpclient.Listen(processUdpMsg)
- log.Println("Udp服务监听", updport)
- go TimedTask() //定时任务
- c := make(chan int, 1)
- <-c
- }
- func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
- log.Println(act, string(data), ra)
- switch act {
- case mu.OP_TYPE_DATA: //上个节点的数据
- //从表中开始处理生成企业数据
- tmp := new(map[string]interface{})
- err := json.Unmarshal(data, &tmp)
- if err != nil {
- log.Println("err:", err)
- udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
- return
- } else if tmp != nil {
- udpclient.WriteUdp([]byte("ok,run"), mu.OP_NOOP, ra)
- go task(tmp)
- }
- case mu.OP_NOOP: //下个节点回应
- log.Println("发送成功", string(data))
- }
- }
- func task(mapinfo *map[string]interface{}) {
- defer util.Catch()
- gtid, lteid := util.ObjToString((*mapinfo)["gtid"]), util.ObjToString((*mapinfo)["lteid"])
- if gtid == "" || lteid == "" {
- log.Println(gtid, lteid, "参数错误")
- return
- }
- GId, err := primitive.ObjectIDFromHex(gtid)
- LtId, err2 := primitive.ObjectIDFromHex(lteid)
- if err != nil || err2 != nil {
- log.Println(gtid, lteid, "转换_id错误")
- return
- }
- //udp的id区间查询bidding 中标人 中标联系人 中标联系电话
- // topscopeclass项目类型-industry行业类型&&topscopeclass联系人项目类型
- // (area地区-province省份 city城市-city城市 district区县-district区县)
- // winneraddr-company_address企业地址
- cursor, err := SourceClient.Database(Config["mgodb_bidding"]).Collection(Config["mgodb_mgoinit_c"]).Find(context.TODO(), bson.M{
- "_id": bson.M{
- "$gte": GId,
- "$lte": LtId,
- },
- }, options.Find().SetProjection(bson.M{"winner": 1, "winnertel": 1, "winnerperson": 1,
- "topscopeclass": 1, "winneraddr": 1}))
- if err != nil {
- log.Println(err)
- return
- }
- for cursor.Next(context.TODO()) {
- tmp := map[string]interface{}{}
- if err := cursor.Decode(&tmp); err == nil {
- if tmp["winner"] == nil || tmp["winner"] == "" {
- continue
- }
- //redis查询是否存在
- rdb := RedisPool.Get()
- defer rdb.Close()
- if reply, err := redis.String(rdb.Do("GET", tmp["winner"])); err != nil {
- //redis不存在存到临时表,定时任务处理
- FClient.Database(Config["mgodb_extract_kf"]).Collection("winner_new").InsertOne(context.TODO(), tmp)
- //log.Println(tmp, err)
- continue
- } else {
- //redis存在
- //log.Println(reply)
- //reply = "5e0316b998a9abaf6535df3d"
- id, err := primitive.ObjectIDFromHex(reply)
- if err != nil {
- log.Println("get redis id err:", err, tmp)
- continue
- }
- //拿到合并后的qyk
- oldTmp := make(map[string]interface{})
- err = FClient.Database(Config["mgodb_extract_kf"]).Collection(Config["mgo_qyk_c"]).
- FindOne(context.TODO(), bson.M{"_id": id}).Decode(&oldTmp)
- if err != nil {
- log.Println("qyk id err:", err, id)
- continue
- }
- //比较合并
- //行业类型
- tmpTopscopeclass := []string{}
- tmpTopscopeclassMap := make(map[string]bool)
- log.Println(tmp["_id"])
- if oldTmp["industry"] == nil {
- //log.Println(reflect.ValueOf(tmp["topscopeclass"]))
- if v, ok := tmp["topscopeclass"].(primitive.A); ok {
- for _, vv := range v {
- if vvv, ok := vv.(string); ok && len(vvv) > 1 {
- tmpTopscopeclassMap[vvv[:len(vvv)-1]] = true
- }
- }
- for k := range tmpTopscopeclassMap {
- tmpTopscopeclass = append(tmpTopscopeclass, k)
- }
- }
- } else {
- if v, ok := oldTmp["industry"].(primitive.A); ok {
- for _, vv := range v {
- if vvv, ok := vv.(string); ok {
- tmpTopscopeclassMap[vvv] = true
- }
- }
- }
- if v, ok := tmp["topscopeclass"].(primitive.A); ok {
- for _, vv := range v {
- if vvv, ok := vv.(string); ok && len(vvv) > 1 {
- tmpTopscopeclassMap[vvv[:len(vvv)-1]] = true
- }
- }
- for k := range tmpTopscopeclassMap {
- tmpTopscopeclass = append(tmpTopscopeclass, k)
- }
- }
- }
- sort.Strings(tmpTopscopeclass)
- oldTmp["industry"] = tmpTopscopeclass
- esId := oldTmp["_id"].(primitive.ObjectID).Hex()
- //更新行业类型
- if tmp["winnerperson"] == nil || tmp["winnerperson"] == "" || Reg_xing.MatchString(util.ObjToString(tmp["winnerperson"])) {
- oldTmp["updatatime"] = time.Now().Unix()
- //mongo更新
- FClient.Database(Config["mgodb_extract_kf"]).Collection(Config["mgo_qyk_c"]).
- UpdateOne(context.TODO(), bson.M{"_id": oldTmp["_id"]}, bson.M{"$set": oldTmp})
- //es更新
- delete(oldTmp, "_id")
- esConn := elastic.GetEsConn()
- defer elastic.DestoryEsConn(esConn)
- esConn.Update().Index(Config["elasticsearch_index"]).Type(Config["elasticsearch_type"]).Id(esId).Doc(oldTmp).Refresh(true).Do()
- //log.Println( err2,err3)
- continue
- }
- //联系方式合并
- var tmpperson, winnertel string
- tmpperson = tmp["winnerperson"].(string)
- if tmp["winnertel"] == nil || tmp["winnertel"]==""{
- winnertel = ""
- }else {
- if Reg_xing.MatchString(util.ObjToString(tmp["winnertel"]))||!Reg_tel.MatchString(util.ObjToString(tmp["winnertel"])){
- winnertel = ""
- }else {
- winnertel = util.ObjToString(tmp["winnertel"])
- }
- }
- contactMaps := make([]interface{}, 0)
- if oldTmp["contact"] == nil {
- tmpContact := make(map[string]interface{})
- tmpContact["contact_person"] = tmpperson
- tmpContact["contact_type"] = "项目联系人"
- tmpContact["phone"] = winnertel
- tmpContact["topscopeclass"] = strings.Join(tmpTopscopeclass, ";")
- tmpContact["updatetime"] = time.Now().Unix()
- contactMaps = append(contactMaps, tmpContact)
- } else {
- //对比前四项,相等丢弃
- if v, ok := oldTmp["contact"].(primitive.A); ok {
- var isNotUpdate bool
- for _, vv := range v {
- if vvv, ok := vv.(map[string]interface{}); ok {
- if vvv["contact_person"] == tmpperson && vvv["contact_type"] == "项目联系人" &&
- vvv["phone"] == winnertel && vvv["topscopeclass"] == strings.Join(tmpTopscopeclass, ";") {
- isNotUpdate = true
- vvv["updatetime"] = time.Now().Unix()
- }
- contactMaps = append(contactMaps, vvv)
- }
- }
- if !isNotUpdate {
- vvv := make(map[string]interface{})
- vvv["contact_person"] = tmp["winnerperson"]
- vvv["contact_type"] = "项目联系人"
- vvv["phone"] = winnertel
- vvv["topscopeclass"] = strings.Join(tmpTopscopeclass, ";")
- vvv["updatetime"] = time.Now().Unix()
- contactMaps = append(contactMaps, vvv)
- }
- }
- }
- oldTmp["contact"] = contactMaps
- //mongo更新
- oldTmp["updatatime"] = time.Now().Unix()
- FClient.Database(Config["mgodb_extract_kf"]).Collection(Config["mgo_qyk_c"]).
- UpdateOne(context.TODO(), bson.M{"_id": oldTmp["_id"]}, bson.M{"$set": oldTmp})
- //es更新
- delete(oldTmp, "_id")
- esConn := elastic.GetEsConn()
- defer elastic.DestoryEsConn(esConn)
- esConn.Update().Index(Config["elasticsearch_index"]).Type(Config["elasticsearch_type"]).Id(esId).Doc(oldTmp).Refresh(true).Do()
- //log.Println( err2,err3)
- }
- } else {
- log.Println(tmp)
- continue
- }
- }
- defer cursor.Close(context.TODO())
- //tmps := make([]interface{}, 0)
- //num, snum := 0, 0
- //for k := range keys {
- // //if num == 6 {
- // // return
- // //}
- // tmp := make(map[string]interface{})
- // err := Client.Database("enterprise").Collection("qyxy").FindOne(context.TODO(), bson.M{"company_name": k}).Decode(&tmp)
- // if err != nil {
- // //log.Println(k, err)
- // continue
- // }
- // if tmp["credit_no"] != nil {
- // if credit_no, ok := tmp["credit_no"].(string); ok && strings.TrimSpace(credit_no) != "" &&
- // len(strings.TrimSpace(credit_no)) > 8 {
- // dataNo := strings.TrimSpace(credit_no)[2:8]
- // if Addrs[dataNo] != nil {
- // if v, ok := Addrs[dataNo].(map[string]interface{}); ok {
- // if tmp["province"] == nil || tmp["province"] == "" {
- // tmp["province"] = v["province"]
- // }
- // tmp["city"] = v["city"]
- // tmp["district"] = v["district"]
- //
- // }
- // }
- // }
- // }
- // contacts := make([]map[string]interface{}, 0)
- // contact := make(map[string]interface{}, 0)
- // if tmp["legal_person"] != nil {
- // contact["contact_person"] = tmp["legal_person"] //联系人
- // } else {
- // contact["contact_person"] = "" //联系人
- // }
- // contact["contact_type"] = "法定代表人" //法定代表人
- // //log.Println(1)
- // if tmp["annual_reports"] != nil {
- // bytes, err := json.Marshal(tmp["annual_reports"])
- // if err != nil {
- // log.Println("annual_reports err:", err)
- // }
- // //log.Println(2, string(bytes))
- // phonetmp := make([]map[string]interface{}, 0)
- // err = json.Unmarshal(bytes, &phonetmp)
- // if err != nil {
- // log.Println("Unmarshal err:", err)
- // }
- // //log.Println(44, err)
- // for _, vv := range phonetmp {
- // if vv["company_phone"] != nil {
- // if vv["company_phone"] == "" {
- // continue
- // } else {
- // contact["phone"] = vv["company_phone"] //联系电话
- // break
- // }
- // } else {
- // contact["phone"] = "" //联系电话
- // }
- //
- // }
- // }
- // //log.Println(k, contact["phone"], tmp["_id"])
- // //time.Sleep(10 * time.Second)
- // if contact["phone"] == nil {
- // contact["phone"] = "" //联系电话
- // }
- // contact["topscopeclass"] = "企业公示" //项目类型
- // contact["updatetime"] = time.Now().Unix() //更新时间
- // contacts = append(contacts, contact)
- // tmp["contact"] = contacts
- //
- // savetmp := make(map[string]interface{}, 0)
- // //字段处理
- // for _, sk := range Fields {
- // if sk == "establish_date" { //成立日期
- // if tmp[sk] != nil {
- // savetmp[sk] = tmp[sk].(primitive.DateTime).Time().UTC().Unix()
- // continue
- // }
- // } else if sk == "capital" { //注册资本
- // //log.Println(sk, tmp[sk])
- // savetmp[sk] = ObjToMoney([]interface{}{tmp[sk], ""})[0]
- // continue
- // } else if sk == "partners" { //股东及出资信息
- // //log.Println(sk, tmp[sk], )
- // //fmt.Println(reflect.TypeOf(tmp[sk]))
- // if tmp[sk] != nil {
- // if ppms, ok := tmp[sk].(primitive.A); ok {
- // for i, _ := range ppms {
- // if ppms[i].(map[string]interface{})["stock_type"] != nil {
- // ppms[i].(map[string]interface{})["stock_type"] = "企业公示"
- // }
- // delete(ppms[i].(map[string]interface{}), "identify_type")
- // }
- // savetmp[sk] = ppms
- // continue
- // }
- // }
- // } else if sk == "_id" { //_id备份企业库
- // savetmp["tmp"+sk] = tmp[sk]
- // continue
- // }
- // if tmp[sk] == nil && sk != "history_name" && sk != "establish_date" && sk != "capital" && sk != "partners" && sk != "contact" && sk != "wechat_accounts" {
- // savetmp[sk] = ""
- // } else {
- // if sk == "wechat_accounts" { //微信公众号
- // if savetmp[sk] == nil {
- // //TODO 微信公众号取值未确认
- // savetmp[sk] = []string{}
- // }
- // continue
- // } else if sk == "website" { //网址
- // //TODO 网址取值未确认
- // continue
- // }
- // savetmp[sk] = tmp[sk]
- // }
- // }
- // savetmp["alias"] = "" //别名
- // tmps = append(tmps, savetmp)
- // num++
- // snum++
- // if snum >= 300 {
- // _, err := Client.Database("extract_v3").Collection("enterprise_qyxy").InsertMany(context.TODO(), tmps)
- // if err != nil {
- // log.Println("save:", err)
- // continue
- // } else {
- // log.Println(num)
- // tmps = []interface{}{}
- // snum = 0
- // }
- // }
- //}
- //if len(tmps) > 0 {
- // result, err := Client.Database("extract_v3").Collection("enterprise_qyxy").InsertMany(context.TODO(), tmps)
- // if err != nil {
- // log.Println("save over:", err)
- // } else {
- // log.Println("last save num:", len(result.InsertedIDs))
- // }
- //}
- }
|