wcc há 3 meses atrás
pai
commit
795ec49652
10 ficheiros alterados com 367 adições e 107 exclusões
  1. 3 0
      graph/go.mod
  2. 6 0
      graph/go.sum
  3. BIN
      graph/graph-http
  4. 13 0
      graph/graph_test.go
  5. 3 5
      graph/main.go
  6. 157 0
      graph/multi_deep.go
  7. 15 1
      graph/readme.txt
  8. 0 97
      graph/templates/graph.html
  9. 1 4
      graph/utils.go
  10. 169 0
      graph/yisi.go

+ 3 - 0
graph/go.mod

@@ -13,9 +13,11 @@ require (
 	github.com/andybalholm/cascadia v1.3.1 // indirect
 	github.com/bytedance/sonic v1.11.6 // indirect
 	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/cespare/xxhash/v2 v2.1.2 // indirect
 	github.com/cloudwego/base64x v0.1.4 // indirect
 	github.com/cloudwego/iasm v0.2.0 // indirect
 	github.com/dchest/captcha v1.0.0 // indirect
+	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
 	github.com/gin-contrib/sse v0.1.0 // indirect
 	github.com/gin-gonic/gin v1.10.0 // indirect
@@ -23,6 +25,7 @@ require (
 	github.com/go-playground/locales v0.14.1 // indirect
 	github.com/go-playground/universal-translator v0.18.1 // indirect
 	github.com/go-playground/validator/v10 v10.20.0 // indirect
+	github.com/go-redis/redis/v8 v8.11.5 // indirect
 	github.com/goccy/go-json v0.10.2 // indirect
 	github.com/golang/snappy v0.0.1 // indirect
 	github.com/josharian/intern v1.0.0 // indirect

+ 6 - 0
graph/go.sum

@@ -12,6 +12,8 @@ github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1
 github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
 github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
+github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
 github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
@@ -23,6 +25,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dchest/captcha v1.0.0 h1:vw+bm/qMFvTgcjQlYVTuQBJkarm5R0YSsDKhm1HZI2o=
 github.com/dchest/captcha v1.0.0/go.mod h1:7zoElIawLp7GUMLcj54K9kbw+jEyvz2K0FDdRRYhvWo=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/elastic/go-elasticsearch/v7 v7.17.10/go.mod h1:OJ4wdbtDNk5g503kvlHLyErCgQwwzmDtaFC4XyOxXA4=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -46,6 +50,8 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
 github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
 github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
 github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI=
+github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo=
 github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
 github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
 github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=

BIN
graph/graph-http


+ 13 - 0
graph/graph_test.go

@@ -37,6 +37,19 @@ func TestCheckLegalRelationships(t *testing.T) {
 	log.Println(has, result, err)
 }
 
+func TestCheckLegalRelationshipsByStep(t *testing.T) {
+	client, err := NewNebulaClient(HostList, UserName, PassWord)
+	if err != nil {
+		log.Fatal("连接失败:", err)
+	}
+	defer client.Close()
+
+	names := []string{"河南剑鱼数字科技有限公司", "河南拓普计算机网络工程有限公司", "上海元藩投资有限公司"}
+	//names := []string{"万达集团股份有限公司", "万达石化有限公司", "山东万达电缆有限公司", "山东万达化工有限公司", "山东万达热电有限公司", "山东万达进出口有限公司", "山东耐斯特炭黑有限公司", "山东万达宝通轮胎有限公司", "山东明宇化学有限公司", "大连万达集团股份有限公司", "大连万达(上海)金融集团有限公司", "大连万达集团咨询服务有限公司", "北京万达足球俱乐部有限公司", "北京红舸科技文化有限公司", "北京万达文化产业集团有限公司"}
+	has, result, err := client.CheckLegalRelationshipsByStep(names, 8, 1)
+	log.Println(has, result, err)
+}
+
 func TestFetchLegalByVid(t *testing.T) {
 	session, pool, err := ConnectToNebula(HostList, UserName, PassWord)
 	if err != nil {

+ 3 - 5
graph/main.go

@@ -158,8 +158,6 @@ func main() {
 	//3、改造方法,使用连接池,避免session过去//
 	// 初始化 Gin 路由
 	r := gin.Default()
-	// 加载模板文件(你可以自定义路径)
-	r.LoadHTMLGlob("templates/*")
 
 	client, err := NewNebulaClient(HostList, UserName, PassWord)
 	if err != nil {
@@ -168,9 +166,9 @@ func main() {
 	defer client.Close()
 	// 注册 POST 接口
 	// 提供 HTML 页面
-	r.GET("/legal/graph", func(c *gin.Context) {
-		c.HTML(http.StatusOK, "graph.html", nil)
-	})
+	//r.GET("/legal/graph", func(c *gin.Context) {
+	//	c.HTML(http.StatusOK, "graph.html", nil)
+	//})
 	r.POST("/check-relations", func(c *gin.Context) {
 		var req CheckRequest
 		if err := c.ShouldBindJSON(&req); err != nil {

+ 157 - 0
graph/multi_deep.go

@@ -0,0 +1,157 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"strings"
+)
+
+// CheckLegalRelationshipsByStep 分层级判断企业名单之间是否存在投资关系
+func (c *NebulaClient) CheckLegalRelationshipsByStep(names []string, deep, stepLimit int) (bool, []string, error) {
+	if len(names) < 2 {
+		return false, nil, fmt.Errorf("企业数量不足,至少需要两个")
+	}
+
+	visited := make(map[string]bool)
+	pathMap := map[string][]string{}  // key: 路径key,value: 节点名称列表
+	pathStrMap := map[string]string{} // key: 路径key,value: 形式化路径描述
+	currentLayer := names             // 当前要处理的起点
+	startToTarget := make(map[string]struct{})
+	for _, a := range names {
+		for _, b := range names {
+			if a != b {
+				startToTarget[a+"->"+b] = struct{}{}
+			}
+		}
+	}
+
+	for totalStep := 1; totalStep <= deep; totalStep += stepLimit {
+		thisStep := min(stepLimit, deep-totalStep+1)
+		var nextLayer []string
+
+		for _, start := range currentLayer {
+			// 组装目标企业(排除自己)
+			targets := []string{}
+			for _, t := range names {
+				if t != start {
+					targets = append(targets, fmt.Sprintf(`%s`, t))
+				}
+			}
+			targetList := strings.Join(targets, ", ")
+
+			query := fmt.Sprintf(`
+USE %s;
+MATCH p=(a:Legal{name:"%s"})-[*1..%d]-(b:Legal)
+WHERE b.Legal.name IN [%s]
+RETURN p LIMIT 20
+`, Table_Space, start, thisStep, targetList)
+
+			resp, err := c.ExecuteWithReconnect(query)
+			if err != nil || resp == nil || !resp.IsSucceed() {
+				log.Printf("查询失败: %v, %s\n", err, resp.GetErrorMsg())
+				continue
+			}
+
+			for _, row := range resp.GetRows() {
+				if len(row.Values) == 0 || !row.Values[0].IsSetPVal() {
+					continue
+				}
+
+				path := row.Values[0].GetPVal()
+				var namesInPath []string
+				var builder strings.Builder
+
+				// 起点
+				src := path.Src
+				startName := ""
+				if src != nil && src.Vid != nil && src.Vid.IsSetSVal() {
+					vid := string(src.Vid.GetSVal())
+					lea, err := getLegalByVid(c.session, vid)
+					if err == nil && lea != nil {
+						startName = lea.Name
+					}
+				}
+				namesInPath = append(namesInPath, startName)
+				builder.WriteString(startName)
+
+				// 步骤
+				for _, step := range path.Steps {
+					dstName := ""
+					if step.Dst != nil && step.Dst.Vid != nil && step.Dst.Vid.IsSetSVal() {
+						vid := string(step.Dst.Vid.GetSVal())
+						lea, err := getLegalByVid(c.session, vid)
+						if err == nil && lea != nil {
+							dstName = lea.Name
+						}
+					}
+					if step.Type > 0 {
+						builder.WriteString(" → ")
+					} else if step.Type < 0 {
+						builder.WriteString(" ← ")
+					} else {
+						builder.WriteString(" - ")
+					}
+					builder.WriteString(dstName)
+					namesInPath = append(namesInPath, dstName)
+				}
+
+				// 存储路径
+				key := generatePathKey(namesInPath)
+				pathMap[key] = namesInPath
+				pathStrMap[key] = builder.String()
+
+				// 记录下一层继续遍历
+				last := namesInPath[len(namesInPath)-1]
+				if !visited[last] && !isInList(names, last) {
+					visited[last] = true
+					nextLayer = append(nextLayer, last)
+				}
+			}
+		}
+
+		currentLayer = nextLayer
+	}
+
+	// 去重,保留最长路径
+	finalMap := map[string]string{}
+	for k1, nodes1 := range pathMap {
+		shouldAdd := true
+		for k2, nodes2 := range pathMap {
+			if k1 == k2 {
+				continue
+			}
+			if isSubPath(nodes1, nodes2) || isSubPath(reverseSlice(nodes1), nodes2) {
+				shouldAdd = false
+				break
+			}
+		}
+		if shouldAdd {
+			finalMap[k1] = pathStrMap[k1]
+		}
+	}
+
+	if len(finalMap) > 0 {
+		var result []string
+		for _, v := range finalMap {
+			result = append(result, v)
+		}
+		return true, result, nil
+	}
+	return false, nil, nil
+}
+
+func isInList(arr []string, target string) bool {
+	for _, v := range arr {
+		if v == target {
+			return true
+		}
+	}
+	return false
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}

+ 15 - 1
graph/readme.txt

@@ -1,3 +1,17 @@
 处理 EquityGraph,
 处理凭安企业数据,分为 投资关系和疑似有关系
-目的;为了找出 相关的企业信息
+目的;为了找出 相关的企业信息
+
+
+目标:
+    1、实现企业之间的投资关系
+    2、企业之间的疑似关系;需要依赖企业的联系人、联系电话、邮箱三个字段,其中二个相同就认为存在疑似关系
+    3、企业高管是否存在关系,需要判断employees 是否存在重复
+
+
+
+ 数据:
+    1、点,Legal,属性有:name、code、type;代表企业
+    2、边:目前主要设计三种,投资、疑似以及董事 三个关系
+
+

+ 0 - 97
graph/templates/graph.html

@@ -1,97 +0,0 @@
-<!DOCTYPE html>
-<html lang="zh-CN">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>ECharts 关系图示例</title>
-    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.0/dist/echarts.min.js"></script>
-</head>
-<body>
-<div id="main" style="width: 1200px; height: 800px;"></div>
-<script type="text/javascript">
-    var myChart = echarts.init(document.getElementById('main'));
-
-    var option = {
-        title: {
-            text: '投资关系图'
-        },
-        tooltip: {},
-        animationDurationUpdate: 1500,
-        animationEasingUpdate: 'quinticInOut',
-        series: [
-            {
-                type: 'graph',
-                layout: 'none',
-                symbolSize: 50,
-                roam: true,
-                label: {
-                    show: true
-                },
-                edgeSymbol: ['circle', 'arrow'],
-                edgeSymbolSize: [4, 10],
-                edgeLabel: {
-                    fontSize: 20
-                },
-                data: [
-                    {
-                        name: '北京剑鱼信息技术有限公司',
-                        x: 300,
-                        y: 300
-                    },
-                    {
-                        name: '北京拓普丰联信息科技股份有限公司',
-                        x: 800,
-                        y: 300
-                    },
-                    {
-                        name: '宁波隆华汇博源创业投资合伙企业(有限合伙)',
-                        x: 550,
-                        y: 100
-                    },
-                    {
-                        name: '上海元藩投资有限公司',
-                        x: 550,
-                        y: 500
-                    }
-                ],
-                links: [
-                    {
-                        source: '上海元藩投资有限公司',
-                        target: '宁波隆华汇博源创业投资合伙企业(有限合伙)',
-                        symbolSize: [5, 20],
-                        label: {
-                            show: true
-                        },
-                        lineStyle: {
-                            width: 5,
-                            curveness: 0.2
-                        }
-                    },
-                    {
-                        source: '宁波隆华汇博源创业投资合伙企业(有限合伙)',
-                        target: '北京拓普丰联信息科技股份有限公司',
-                        label: {
-                            show: true
-                        },
-                        lineStyle: {
-                            curveness: 0.2
-                        }
-                    },
-                    {
-                        source: '北京拓普丰联信息科技股份有限公司',
-                        target: '北京剑鱼信息技术有限公司'
-                    }
-                ],
-                lineStyle: {
-                    opacity: 0.9,
-                    width: 2,
-                    curveness: 0
-                }
-            }
-        ]
-    };
-
-    myChart.setOption(option);
-</script>
-</body>
-</html>

+ 1 - 4
graph/utils.go

@@ -1060,7 +1060,7 @@ RETURN p LIMIT 1
 	return false, nil, nil
 }
 
-// CheckLegalRelationships CheckLegalRelationships
+// CheckLegalRelationships 判断提供的企业名单,是否存在投资关系
 func (c *NebulaClient) CheckLegalRelationships(names []string, deep, stype int) (bool, []string, error) {
 	if len(names) < 2 {
 		return false, nil, fmt.Errorf("企业数量不足,至少需要两个")
@@ -1191,6 +1191,3 @@ RETURN p LIMIT 1
 	}
 	return false, nil, nil
 }
-
-// ---//
-// 1. 数据结构定义

+ 169 - 0
graph/yisi.go

@@ -0,0 +1,169 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"github.com/go-redis/redis/v8"
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	"go.mongodb.org/mongo-driver/mongo"
+	"go.mongodb.org/mongo-driver/mongo/options"
+	"log"
+	"strings"
+)
+
+type Company struct {
+	CreditNo    string `bson:"credit_no"`
+	CompanyName string `bson:"company_name"`
+	ContactName string
+	Phone       string
+	Email       string
+}
+
+type MatchResult struct {
+	CompanyA      string   `bson:"company_a"`
+	CreditNoA     string   `bson:"credit_no_a"`
+	CompanyB      string   `bson:"company_b"`
+	CreditNoB     string   `bson:"credit_no_b"`
+	MatchedFields []string `bson:"matched_fields"`
+}
+
+var ctx = context.Background()
+
+func initMongo(uri string) (*mongo.Collection, *mongo.Collection) {
+	clientOpts := options.Client().ApplyURI(uri)
+	client, err := mongo.Connect(ctx, clientOpts)
+	if err != nil {
+		log.Fatal(err)
+	}
+	db := client.Database("mixdata")
+	return db.Collection("qyxy_std"), db.Collection("wcc")
+}
+
+func initRedis(addr, password string) *redis.Client {
+	return redis.NewClient(&redis.Options{
+		Addr:     addr,
+		Password: password,
+		DB:       0,
+	})
+}
+
+func buildRedisKey(contact, phone, email string) []string {
+	var keys []string
+	if contact != "" && phone != "" {
+		keys = append(keys, "cp:"+contact+"|"+phone)
+	}
+	if contact != "" && email != "" {
+		keys = append(keys, "ce:"+contact+"|"+email)
+	}
+	if phone != "" && email != "" {
+		keys = append(keys, "pe:"+phone+"|"+email)
+	}
+	return keys
+}
+
+func processCompanies(redisCli *redis.Client, coll *mongo.Collection, matchColl *mongo.Collection) {
+	cur, err := coll.Find(ctx, bson.M{})
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer cur.Close(ctx)
+
+	for cur.Next(ctx) {
+		var doc bson.M
+		if err := cur.Decode(&doc); err != nil {
+			log.Println("decode error:", err)
+			continue
+		}
+
+		creditNo := doc["credit_no"].(string)
+		companyName := doc["company_name"].(string)
+		annualRaw, ok := doc["annual_reports"]
+		if !ok {
+			continue // annual_reports 不存在,跳过
+		}
+		annualReports, ok := annualRaw.(primitive.A)
+		if !ok || len(annualReports) == 0 {
+			continue // annual_reports 存在但不是数组或为空
+		}
+
+		// 抽取联系人、电话、邮箱
+		type keySet struct {
+			Contact string
+			Phone   string
+			Email   string
+		}
+		keySetMap := make(map[string]keySet)
+
+		for _, item := range annualReports {
+			report, ok := item.(bson.M)
+			if !ok {
+				continue
+			}
+			contact := strings.TrimSpace(fmt.Sprint(report["operator_name"]))
+			phone := strings.TrimSpace(fmt.Sprint(report["company_phone"]))
+			email := strings.TrimSpace(fmt.Sprint(report["company_email"]))
+			key := contact + "|" + phone + "|" + email
+			keySetMap[key] = keySet{contact, phone, email}
+		}
+
+		for _, ks := range keySetMap {
+			if ks.Contact == "" && ks.Phone == "" && ks.Email == "" {
+				continue
+			}
+
+			keys := buildRedisKey(ks.Contact, ks.Phone, ks.Email)
+			for _, redisKey := range keys {
+				// 查 Redis 中是否有相同的 Key 存在的其他企业
+				fields, err := redisCli.HGetAll(ctx, redisKey).Result()
+				if err != nil {
+					log.Println("redis get error:", err)
+					continue
+				}
+
+				for _, otherStr := range fields {
+					var other Company
+					if err := json.Unmarshal([]byte(otherStr), &other); err != nil {
+						continue
+					}
+					// 忽略自身
+					if other.CreditNo == creditNo {
+						continue
+					}
+					matchedFields := []string{}
+					if ks.Contact != "" && ks.Contact == other.ContactName {
+						matchedFields = append(matchedFields, "contact")
+					}
+					if ks.Phone != "" && ks.Phone == other.Phone {
+						matchedFields = append(matchedFields, "phone")
+					}
+					if ks.Email != "" && ks.Email == other.Email {
+						matchedFields = append(matchedFields, "email")
+					}
+					if len(matchedFields) >= 2 {
+						match := MatchResult{
+							CompanyA:      companyName,
+							CreditNoA:     creditNo,
+							CompanyB:      other.CompanyName,
+							CreditNoB:     other.CreditNo,
+							MatchedFields: matchedFields,
+						}
+						_, _ = matchColl.InsertOne(ctx, match)
+					}
+				}
+
+				// 存入当前公司信息(防止重复,使用 credit_no 作为 field)
+				comp := Company{
+					CreditNo:    creditNo,
+					CompanyName: companyName,
+					ContactName: ks.Contact,
+					Phone:       ks.Phone,
+					Email:       ks.Email,
+				}
+				compStr, _ := json.Marshal(comp)
+				_ = redisCli.HSet(ctx, redisKey, creditNo, compStr).Err()
+			}
+		}
+	}
+}