main.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/gin-gonic/gin"
  7. "github.com/olivere/elastic/v7"
  8. nebula "github.com/vesoft-inc/nebula-go/v3"
  9. "io"
  10. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  11. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  12. "log"
  13. "net/http"
  14. "regexp"
  15. "strconv"
  16. "strings"
  17. )
  18. var (
  19. re = regexp.MustCompile(`[-+]?[0-9]*\.?[0-9]+`)
  20. HostList = []nebula.HostAddress{{Host: "114.116.213.97", Port: 9669}}
  21. //HostList = []nebula.HostAddress{{Host: "127.0.0.1", Port: 9669}}
  22. UserName = "root"
  23. PassWord = "jianyu@123"
  24. Mgo181 *mongodb.MongodbSim
  25. //Table_Space = "legal_profile"
  26. Table_Space = "legal_profile"
  27. WorkerCount = 5
  28. BatchSize = 100
  29. )
  30. // Legal 代表公司节点的结构体
  31. type Legal struct {
  32. Id string
  33. Name string
  34. Code string
  35. Type string //类型,企业,事业单哪位,政府部门
  36. State string //状态,有效/无效;不是存续、在营、开业、在册
  37. }
  38. // Invest 代表公司之间的投资关系边的结构体
  39. type Invest struct {
  40. FromCode string
  41. ToCode string
  42. Amount float64
  43. Ratio float64
  44. }
  45. type InsertJob struct {
  46. Companies []Legal
  47. Relations []Invest
  48. }
  49. type InvestVertex struct { //顶点
  50. id string
  51. company_id string
  52. company_name string
  53. credit_no string
  54. }
  55. type InvestEdge struct { //边
  56. company_id string
  57. company_name string
  58. stock_id string
  59. stock_name string
  60. stock_rate float64
  61. stock_amount float64
  62. stock_level int
  63. stock_type int //0企业股东 1自然人股东
  64. }
  65. // ConnectToNebula 封装数据库连接函数
  66. func ConnectToNebula(hosts []nebula.HostAddress, username, password string) (*nebula.Session, *nebula.ConnectionPool, error) {
  67. // 创建连接池配置
  68. config := nebula.GetDefaultConf()
  69. config.UseHTTP2 = false
  70. config.HandshakeKey = ""
  71. // 初始化连接池
  72. pool, err := nebula.NewConnectionPool(hosts, config, nebula.DefaultLogger{})
  73. if err != nil {
  74. return nil, nil, err
  75. }
  76. // 获取会话
  77. session, err := pool.GetSession(username, password)
  78. if err != nil {
  79. pool.Close()
  80. return nil, nil, err
  81. }
  82. return session, pool, nil
  83. }
  84. type CheckRequest struct {
  85. Names []string `json:"names"`
  86. Deep int `json:"deep"`
  87. Stype int `json:"stype"` //0.简易模式,匹配到直接返回;1.匹配完所有的数据
  88. }
  89. func main() {
  90. //InitMgo()
  91. //dda()
  92. //dealCompanyBase22()
  93. //dealCompanyBase() //迭代company_base 处理企业数据
  94. //batchDealGraph() // 迭代es 处理企业数据;
  95. //
  96. //log.Println("数据处理完毕!!!!!!!")
  97. //return
  98. //封装对外提供的HTTP
  99. session, pool, err := ConnectToNebula(HostList, UserName, PassWord)
  100. if err != nil {
  101. log.Fatalf("Failed to connect to Nebula Graph: %v", err)
  102. }
  103. defer pool.Close()
  104. defer session.Release()
  105. // 初始化 Gin 路由
  106. r := gin.Default()
  107. // 注册 POST 接口
  108. r.POST("/check-relations", func(c *gin.Context) {
  109. var req CheckRequest
  110. if err := c.ShouldBindJSON(&req); err != nil {
  111. c.JSON(http.StatusBadRequest, gin.H{"error": "请求参数无效"})
  112. return
  113. }
  114. _, results, err := CheckLegalRelationships(session, req.Names, req.Deep, req.Stype)
  115. if err != nil {
  116. c.JSON(http.StatusInternalServerError, gin.H{"error": "查询失败", "details": err.Error()})
  117. return
  118. }
  119. c.JSON(http.StatusOK, results)
  120. })
  121. // 启动服务
  122. r.Run(":8080")
  123. }
  124. func dda() {
  125. name := "北京拓普丰联信息科技股份有限公司"
  126. rea, resb := GetInvByLevel(name, 5, 0, false)
  127. // 调用封装的连接函数
  128. session, pool, err := ConnectToNebula(HostList, UserName, PassWord)
  129. if err != nil {
  130. log.Fatalf("Failed to connect to Nebula Graph: %v", err)
  131. }
  132. defer pool.Close()
  133. defer session.Release()
  134. for _, v := range rea {
  135. d := Legal{
  136. Id: v.company_id,
  137. Name: v.company_name,
  138. Code: v.credit_no,
  139. Type: "企业",
  140. }
  141. res, err := InsertCompany(session, d)
  142. if err != nil {
  143. log.Println(err, res)
  144. }
  145. }
  146. for _, v := range resb {
  147. d := Invest{
  148. FromCode: v.stock_name,
  149. ToCode: v.company_name,
  150. Amount: v.stock_amount,
  151. Ratio: v.stock_rate,
  152. }
  153. err := InsertInvestRel(session, d)
  154. if err != nil {
  155. log.Println(err, d)
  156. }
  157. }
  158. }
  159. // getQyxytData 获取企业数据
  160. func getQyxytData() {
  161. // 调用封装的连接函数
  162. session, pool, err := ConnectToNebula(HostList, UserName, PassWord)
  163. if err != nil {
  164. log.Fatalf("Failed to connect to Nebula Graph: %v", err)
  165. }
  166. defer pool.Close()
  167. defer session.Release()
  168. url := "http://172.17.4.184:19908"
  169. //url := "http://127.0.0.1:19908"
  170. username := "jybid"
  171. password := "Top2023_JEB01i@31"
  172. index := "qyxy" //索引名称
  173. // 创建 Elasticsearch 客户端
  174. client, err := elastic.NewClient(
  175. elastic.SetURL(url),
  176. elastic.SetBasicAuth(username, password),
  177. elastic.SetSniff(false),
  178. )
  179. if err != nil {
  180. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  181. }
  182. //---------------------------//
  183. //query := elastic.NewBoolQuery()
  184. //query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
  185. //query.Must(elastic.NewTermQuery("company_city", "北京市"))
  186. //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
  187. query := elastic.NewBoolQuery().
  188. //北京,天津,河北,上海,江苏,浙江,安徽
  189. //Must(elastic.NewTermQuery("area", "北京市")).
  190. //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  191. MustNot(
  192. elastic.NewTermQuery("company_type", "个体工商户"),
  193. ).
  194. //Must(elastic.NewTermQuery("company_name", "河南拓普计算机网络工程有限公司"))
  195. Must(elastic.NewTermsQuery("company_area", "河南"))
  196. ctx := context.Background()
  197. //开始滚动搜索
  198. scrollID := ""
  199. scroll := "10m"
  200. searchSource := elastic.NewSearchSource().
  201. Query(query).
  202. Size(10000).
  203. Sort("_doc", true) //升序排序
  204. //Sort("_doc", false) //降序排序
  205. searchService := client.Scroll(index).
  206. Size(10000).
  207. Scroll(scroll).
  208. SearchSource(searchSource)
  209. res, err := searchService.Do(ctx)
  210. if err != nil {
  211. if err == io.EOF {
  212. fmt.Println("没有数据")
  213. } else {
  214. panic(err)
  215. }
  216. }
  217. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  218. fmt.Println("总数是:", res.TotalHits())
  219. total := 0
  220. for len(res.Hits.Hits) > 0 {
  221. for _, hit := range res.Hits.Hits {
  222. var doc map[string]interface{}
  223. err := json.Unmarshal(hit.Source, &doc)
  224. if err != nil {
  225. log.Printf("解析文档失败:%s", err)
  226. continue
  227. }
  228. company1 := Legal{
  229. Name: util.ObjToString(doc["company_name"]),
  230. Code: util.ObjToString(doc["credit_no"]),
  231. Type: "企业",
  232. }
  233. /**
  234. 1.stock_name_id 为空,直接跳过
  235. 2.stock_name 为空,直接跳过
  236. 3.stock_name 含有 已除名/不适宜/待清理/拟吊销 ,直接跳过
  237. 4.stock_name 不含中文,跳过
  238. */
  239. if util.ObjToString(doc["company_name"]) == "" || util.ObjToString(doc["credit_no"]) == "" {
  240. continue
  241. }
  242. if strings.Contains(util.ObjToString(doc["company_name"]), "已除名") {
  243. continue
  244. }
  245. res1, err1 := InsertCompany(session, company1)
  246. if err != nil {
  247. log.Println("InsertCompany err", res1, err1)
  248. }
  249. //边
  250. if partners, ok := doc["partners"].([]interface{}); ok {
  251. for _, partner := range partners {
  252. if da, ok := partner.(map[string]interface{}); ok {
  253. if util.ObjToString(da["stock_type"]) == "企业法人" {
  254. if util.ObjToString(da["stock_name"]) == "" || util.ObjToString(da["identify_no"]) == "" {
  255. continue
  256. } else {
  257. company2 := Legal{
  258. Name: util.ObjToString(da["stock_name"]),
  259. Code: util.ObjToString(da["identify_no"]),
  260. Type: "企业",
  261. }
  262. res2, err2 := InsertCompany(session, company2)
  263. if err2 != nil {
  264. log.Println("InsertCompany err", res2, err2)
  265. }
  266. //
  267. if err1 != nil || err2 != nil {
  268. continue
  269. }
  270. where := map[string]interface{}{
  271. "company_name": util.ObjToString(doc["company_name"]),
  272. "stock_name": util.ObjToString(da["stock_name"]),
  273. }
  274. ddd, _ := Mgo181.FindOne("company_partner", where)
  275. if len(*ddd) > 0 {
  276. par := *ddd
  277. amount := ParseStockCapital(util.ObjToString(par["stock_capital"]))
  278. investRel := Invest{FromCode: util.ObjToString(da["stock_name"]), ToCode: util.ObjToString(doc["company_name"]), Ratio: util.Float64All(par["stock_proportion"]), Amount: amount}
  279. err = InsertInvestRel(session, investRel)
  280. if err != nil {
  281. log.Println("InsertInvestRel", err, investRel)
  282. }
  283. }
  284. }
  285. }
  286. }
  287. }
  288. }
  289. }
  290. total = total + len(res.Hits.Hits)
  291. scrollID = res.ScrollId
  292. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  293. log.Println("current count:", total)
  294. if err != nil {
  295. if err == io.EOF {
  296. // 滚动到最后一批数据,退出循环
  297. break
  298. }
  299. log.Println("滚动搜索失败:", err, res)
  300. break // 处理错误时退出循环
  301. }
  302. }
  303. // 在循环外调用 ClearScroll
  304. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  305. if err != nil {
  306. log.Printf("清理滚动搜索失败:%s", err)
  307. }
  308. log.Println("结束~~~~~~~~~~~~~~~")
  309. }
  310. // dealCompanyPartner 处理企业投资关系
  311. func dealCompanyPartner() {
  312. // 调用封装的连接函数
  313. session, pool, err := ConnectToNebula(HostList, UserName, PassWord)
  314. if err != nil {
  315. log.Fatalf("Failed to connect to Nebula Graph: %v", err)
  316. }
  317. defer pool.Close()
  318. defer session.Release()
  319. //log.Println("session", session)
  320. defer util.Catch()
  321. sess := Mgo181.GetMgoConn()
  322. defer Mgo181.DestoryMongoConn(sess)
  323. it := sess.DB("mixdata").C("company_partner").Find(nil).Select(nil).Iter()
  324. count := 0
  325. //realNum := 0
  326. for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
  327. if count%10000 == 0 {
  328. log.Println("current:", count, tmp["stock_name"], tmp["company_name"])
  329. }
  330. //个人企业跳过
  331. if util.IntAll(tmp["is_personal"]) == 1 {
  332. continue
  333. }
  334. if util.IntAll(tmp["use_flag"]) > 0 {
  335. continue
  336. }
  337. company1 := Legal{
  338. Name: util.ObjToString(tmp["stock_name"]),
  339. Code: util.ObjToString(tmp["stock_name_id"]),
  340. }
  341. /**
  342. 1.stock_name_id 为空,直接跳过
  343. 2.stock_name 为空,直接跳过
  344. 3.stock_name 含有 已除名/不适宜/待清理/拟吊销 ,直接跳过
  345. 4.stock_name 不含中文,跳过
  346. */
  347. company2 := Legal{
  348. Name: util.ObjToString(tmp["company_name"]),
  349. Code: util.ObjToString(tmp["company_id"]),
  350. }
  351. res1, err1 := InsertCompany(session, company1)
  352. if err != nil {
  353. log.Println("InsertCompany err", res1, err1)
  354. }
  355. res2, err2 := InsertCompany(session, company2)
  356. if err != nil {
  357. log.Println("InsertCompany err", res2, err2)
  358. }
  359. if err1 != nil || err2 != nil {
  360. continue
  361. }
  362. //边
  363. amount := ParseStockCapital(util.ObjToString(tmp["stock_capital"]))
  364. investRel := Invest{FromCode: res1, ToCode: res2, Ratio: util.Float64All(tmp["stock_proportion"]), Amount: amount}
  365. err = InsertInvestRel(session, investRel)
  366. if err != nil {
  367. log.Println("InsertInvestRel", err, investRel)
  368. }
  369. }
  370. }
  371. // InsertCompany 插入公司节点的方法
  372. func InsertCompany(session *nebula.Session, company Legal) (string, error) {
  373. // 构建插入公司节点的查询
  374. //insertCompanyStmt := `
  375. // USE ` + Table_Space + `;
  376. // INSERT VERTEX company(company_id,name) VALUES "%s":("%s", "%s");
  377. //`
  378. //insertCompanyStmt = fmt.Sprintf(insertCompanyStmt, inv.id, inv.company_id, inv.company_name)
  379. query := fmt.Sprintf(`
  380. USE `+Table_Space+`;
  381. INSERT VERTEX Legal(name, code, type, state ) VALUES "%s":("%s", "%s", "%s", "%s")
  382. `, company.Name, company.Name, company.Code, company.Type, company.State)
  383. // 执行查询
  384. result, err := session.Execute(query)
  385. if err != nil {
  386. log.Println("InsertCompany", result)
  387. return "", err
  388. }
  389. // 打印返回结果
  390. //fmt.Println("Insert Company Result:", result)
  391. // 返回节点ID(通常可以通过返回的结果中的 "_vid" 字段获取)
  392. return company.Name, nil
  393. }
  394. // InsertInvestRel 插入投资关系边的方法
  395. func InsertInvestRel(session *nebula.Session, investRel Invest) error {
  396. // 构建插入投资关系边的查询
  397. query := fmt.Sprintf(`
  398. USE `+Table_Space+`;
  399. INSERT EDGE Invest(amount, ratio) VALUES "%s"->"%s":(%f, %f)
  400. `, investRel.FromCode, investRel.ToCode, investRel.Amount, investRel.Ratio)
  401. // 执行查询
  402. result, err := session.Execute(query)
  403. if err != nil {
  404. log.Println("InsertInvestRel", result)
  405. return err
  406. }
  407. // 打印返回结果
  408. //fmt.Println("Insert InvestRel Result:", result)
  409. return nil
  410. }
  411. func ParseStockCapital(raw string) float64 {
  412. raw = strings.TrimSpace(raw)
  413. // 默认单位:万元人民币
  414. exchangeRateUSD := 7.0
  415. // 匹配数值部分(可能带小数)
  416. re := regexp.MustCompile(`([\d.]+)`)
  417. match := re.FindStringSubmatch(raw)
  418. if len(match) < 2 {
  419. return 0
  420. }
  421. value, _ := strconv.ParseFloat(match[1], 64)
  422. // 判断单位并转换
  423. switch {
  424. case strings.Contains(raw, "万美元"):
  425. value *= exchangeRateUSD // 转换成人民币
  426. case strings.Contains(raw, "元") || strings.Contains(raw, "人民币"):
  427. if strings.Contains(raw, "万元") || strings.Contains(raw, "万") {
  428. // 已经是万元单位,无需处理
  429. } else {
  430. // 是“元”,需要除以1万
  431. value = value / 10000
  432. }
  433. default:
  434. // 可能是纯数字,默认视为“万元”
  435. }
  436. return value
  437. }
  438. /*
  439. 根据公司名称和层级向上挖掘,获取顶点和边;
  440. maxLevel 挖掘层级数量;
  441. direction 0:双向挖掘 -1:向上挖掘 1:向下挖掘
  442. person true:保留自然人股东 false:不保留自然人股东
  443. */
  444. func GetInvByLevel(company_name string, maxLevel int, direction int, person bool) (map[string]InvestVertex, []InvestEdge) {
  445. verter := map[string]InvestVertex{}
  446. edges := []InvestEdge{}
  447. if direction == 0 {
  448. v1, e1 := getInvByLevel(company_name, maxLevel, 1, person)
  449. v2, e2 := getInvByLevel(company_name, maxLevel, -1, person)
  450. for k, v := range v1 {
  451. verter[k] = v
  452. }
  453. for k, v := range v2 {
  454. verter[k] = v
  455. }
  456. edges = append(edges, e1...)
  457. edges = append(edges, e2...)
  458. } else {
  459. verter, edges = getInvByLevel(company_name, maxLevel, direction, person)
  460. }
  461. return verter, edges
  462. }
  463. func getInvByLevel(company_name string, maxLevel int, direction int, person bool) (map[string]InvestVertex, []InvestEdge) {
  464. data, _ := Mgo181.FindOne("company_base", map[string]interface{}{
  465. "company_name": company_name,
  466. })
  467. company_id := fmt.Sprint((*data)["company_id"])
  468. credit_no := fmt.Sprint((*data)["credit_no"])
  469. var edges = []InvestEdge{} //记录边
  470. var verter = map[string]InvestVertex{} //有效顶点
  471. // 初始化队列和访问记录
  472. type node struct {
  473. companyID, companyName, creditNo string
  474. level int
  475. }
  476. queue := []node{{companyID: company_id, companyName: company_name, creditNo: credit_no, level: 1}}
  477. visited := make(map[string]bool)
  478. for len(queue) > 0 {
  479. current := queue[0]
  480. if _, ok := verter[current.companyID]; !ok {
  481. verter[current.companyID] = InvestVertex{
  482. id: current.companyID,
  483. company_id: current.companyID,
  484. company_name: current.companyName,
  485. credit_no: current.creditNo,
  486. }
  487. }
  488. queue = queue[1:]
  489. if visited[current.companyID] || current.level > maxLevel { // 防止重复处理和超过最大层级
  490. continue
  491. }
  492. visited[current.companyID] = true
  493. query := map[string]interface{}{"company_id": current.companyID}
  494. if direction > 0 {
  495. query = map[string]interface{}{"stock_name_id": current.companyID}
  496. }
  497. partners, _ := Mgo181.Find("company_partner", query, nil, nil, false, -1, -1)
  498. // 处理股东数据
  499. for _, p := range *partners {
  500. //log.Println(direction, p)
  501. if fmt.Sprint(p["is_history"]) == "1" {
  502. continue
  503. }
  504. // 构建投资关系
  505. inv := InvestEdge{
  506. company_id: fmt.Sprint(p["company_id"]),
  507. company_name: fmt.Sprint(p["company_name"]),
  508. stock_id: fmt.Sprint(p["stock_name_id"]),
  509. stock_name: fmt.Sprint(p["stock_name"]),
  510. stock_rate: convertStockCapitalToFloat(fmt.Sprint(p["stock_proportion"])),
  511. stock_amount: convertStockCapitalToFloat(fmt.Sprint(p["stock_capital"])),
  512. stock_level: current.level,
  513. stock_type: 0, // 默认机构股东
  514. }
  515. edges = append(edges, inv)
  516. // 根据股东类型是否继续挖掘
  517. if fmt.Sprint(p["stock_type"]) == "自然人股东" || convertStockCapitalToFloat(fmt.Sprint(p["is_personal"])) > 0 {
  518. inv.stock_type = 1
  519. if _, ok := verter[inv.stock_id]; !ok && person {
  520. verter[inv.stock_id] = InvestVertex{
  521. id: inv.stock_id,
  522. company_id: inv.stock_id,
  523. company_name: inv.stock_name,
  524. }
  525. }
  526. } else {
  527. where1 := map[string]interface{}{
  528. "company_name": inv.company_name,
  529. }
  530. where2 := map[string]interface{}{
  531. "company_name": inv.stock_name,
  532. }
  533. company, _ := Mgo181.FindOne("company_base", where1)
  534. stock, _ := Mgo181.FindOne("company_base", where2)
  535. // 机构股东加入队列继续穿透
  536. if direction > 0 { //向下挖掘
  537. if !visited[inv.company_id] {
  538. queue = append(queue, node{
  539. companyID: inv.company_id,
  540. companyName: inv.company_name,
  541. creditNo: util.ObjToString((*company)["credit_no"]),
  542. level: current.level + 1,
  543. })
  544. }
  545. } else { //向上挖掘
  546. if !visited[inv.stock_id] {
  547. queue = append(queue, node{
  548. companyID: inv.stock_id,
  549. companyName: inv.stock_name,
  550. creditNo: util.ObjToString((*stock)["credit_no"]),
  551. level: current.level + 1,
  552. })
  553. }
  554. }
  555. }
  556. }
  557. //log.Printf("已处理层级%d,当前队列深度%d", current.level, len(queue))
  558. }
  559. return verter, edges
  560. }
  561. func convertStockCapitalToFloat(str string) float64 {
  562. // 查找匹配的数字
  563. match := re.FindString(str)
  564. if match == "" {
  565. return 0
  566. }
  567. // 将匹配到的数字字符串转换为浮点数
  568. result, err := strconv.ParseFloat(match, 64)
  569. if err != nil {
  570. return 0
  571. }
  572. return result
  573. }