|
@@ -0,0 +1,169 @@
|
|
|
|
+package main
|
|
|
|
+
|
|
|
|
+import (
|
|
|
|
+ "context"
|
|
|
|
+ "encoding/json"
|
|
|
|
+ "fmt"
|
|
|
|
+ "github.com/go-redis/redis/v8"
|
|
|
|
+ "go.mongodb.org/mongo-driver/bson"
|
|
|
|
+ "go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
|
+ "go.mongodb.org/mongo-driver/mongo"
|
|
|
|
+ "go.mongodb.org/mongo-driver/mongo/options"
|
|
|
|
+ "log"
|
|
|
|
+ "strings"
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+type Company struct {
|
|
|
|
+ CreditNo string `bson:"credit_no"`
|
|
|
|
+ CompanyName string `bson:"company_name"`
|
|
|
|
+ ContactName string
|
|
|
|
+ Phone string
|
|
|
|
+ Email string
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+type MatchResult struct {
|
|
|
|
+ CompanyA string `bson:"company_a"`
|
|
|
|
+ CreditNoA string `bson:"credit_no_a"`
|
|
|
|
+ CompanyB string `bson:"company_b"`
|
|
|
|
+ CreditNoB string `bson:"credit_no_b"`
|
|
|
|
+ MatchedFields []string `bson:"matched_fields"`
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+var ctx = context.Background()
|
|
|
|
+
|
|
|
|
+func initMongo(uri string) (*mongo.Collection, *mongo.Collection) {
|
|
|
|
+ clientOpts := options.Client().ApplyURI(uri)
|
|
|
|
+ client, err := mongo.Connect(ctx, clientOpts)
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Fatal(err)
|
|
|
|
+ }
|
|
|
|
+ db := client.Database("mixdata")
|
|
|
|
+ return db.Collection("qyxy_std"), db.Collection("wcc")
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func initRedis(addr, password string) *redis.Client {
|
|
|
|
+ return redis.NewClient(&redis.Options{
|
|
|
|
+ Addr: addr,
|
|
|
|
+ Password: password,
|
|
|
|
+ DB: 0,
|
|
|
|
+ })
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func buildRedisKey(contact, phone, email string) []string {
|
|
|
|
+ var keys []string
|
|
|
|
+ if contact != "" && phone != "" {
|
|
|
|
+ keys = append(keys, "cp:"+contact+"|"+phone)
|
|
|
|
+ }
|
|
|
|
+ if contact != "" && email != "" {
|
|
|
|
+ keys = append(keys, "ce:"+contact+"|"+email)
|
|
|
|
+ }
|
|
|
|
+ if phone != "" && email != "" {
|
|
|
|
+ keys = append(keys, "pe:"+phone+"|"+email)
|
|
|
|
+ }
|
|
|
|
+ return keys
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func processCompanies(redisCli *redis.Client, coll *mongo.Collection, matchColl *mongo.Collection) {
|
|
|
|
+ cur, err := coll.Find(ctx, bson.M{})
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Fatal(err)
|
|
|
|
+ }
|
|
|
|
+ defer cur.Close(ctx)
|
|
|
|
+
|
|
|
|
+ for cur.Next(ctx) {
|
|
|
|
+ var doc bson.M
|
|
|
|
+ if err := cur.Decode(&doc); err != nil {
|
|
|
|
+ log.Println("decode error:", err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ creditNo := doc["credit_no"].(string)
|
|
|
|
+ companyName := doc["company_name"].(string)
|
|
|
|
+ annualRaw, ok := doc["annual_reports"]
|
|
|
|
+ if !ok {
|
|
|
|
+ continue // annual_reports 不存在,跳过
|
|
|
|
+ }
|
|
|
|
+ annualReports, ok := annualRaw.(primitive.A)
|
|
|
|
+ if !ok || len(annualReports) == 0 {
|
|
|
|
+ continue // annual_reports 存在但不是数组或为空
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 抽取联系人、电话、邮箱
|
|
|
|
+ type keySet struct {
|
|
|
|
+ Contact string
|
|
|
|
+ Phone string
|
|
|
|
+ Email string
|
|
|
|
+ }
|
|
|
|
+ keySetMap := make(map[string]keySet)
|
|
|
|
+
|
|
|
|
+ for _, item := range annualReports {
|
|
|
|
+ report, ok := item.(bson.M)
|
|
|
|
+ if !ok {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ contact := strings.TrimSpace(fmt.Sprint(report["operator_name"]))
|
|
|
|
+ phone := strings.TrimSpace(fmt.Sprint(report["company_phone"]))
|
|
|
|
+ email := strings.TrimSpace(fmt.Sprint(report["company_email"]))
|
|
|
|
+ key := contact + "|" + phone + "|" + email
|
|
|
|
+ keySetMap[key] = keySet{contact, phone, email}
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for _, ks := range keySetMap {
|
|
|
|
+ if ks.Contact == "" && ks.Phone == "" && ks.Email == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ keys := buildRedisKey(ks.Contact, ks.Phone, ks.Email)
|
|
|
|
+ for _, redisKey := range keys {
|
|
|
|
+ // 查 Redis 中是否有相同的 Key 存在的其他企业
|
|
|
|
+ fields, err := redisCli.HGetAll(ctx, redisKey).Result()
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Println("redis get error:", err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for _, otherStr := range fields {
|
|
|
|
+ var other Company
|
|
|
|
+ if err := json.Unmarshal([]byte(otherStr), &other); err != nil {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ // 忽略自身
|
|
|
|
+ if other.CreditNo == creditNo {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ matchedFields := []string{}
|
|
|
|
+ if ks.Contact != "" && ks.Contact == other.ContactName {
|
|
|
|
+ matchedFields = append(matchedFields, "contact")
|
|
|
|
+ }
|
|
|
|
+ if ks.Phone != "" && ks.Phone == other.Phone {
|
|
|
|
+ matchedFields = append(matchedFields, "phone")
|
|
|
|
+ }
|
|
|
|
+ if ks.Email != "" && ks.Email == other.Email {
|
|
|
|
+ matchedFields = append(matchedFields, "email")
|
|
|
|
+ }
|
|
|
|
+ if len(matchedFields) >= 2 {
|
|
|
|
+ match := MatchResult{
|
|
|
|
+ CompanyA: companyName,
|
|
|
|
+ CreditNoA: creditNo,
|
|
|
|
+ CompanyB: other.CompanyName,
|
|
|
|
+ CreditNoB: other.CreditNo,
|
|
|
|
+ MatchedFields: matchedFields,
|
|
|
|
+ }
|
|
|
|
+ _, _ = matchColl.InsertOne(ctx, match)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 存入当前公司信息(防止重复,使用 credit_no 作为 field)
|
|
|
|
+ comp := Company{
|
|
|
|
+ CreditNo: creditNo,
|
|
|
|
+ CompanyName: companyName,
|
|
|
|
+ ContactName: ks.Contact,
|
|
|
|
+ Phone: ks.Phone,
|
|
|
|
+ Email: ks.Email,
|
|
|
|
+ }
|
|
|
|
+ compStr, _ := json.Marshal(comp)
|
|
|
|
+ _ = redisCli.HSet(ctx, redisKey, creditNo, compStr).Err()
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|