main.go 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/olivere/elastic/v7"
  7. "gorm.io/driver/mysql"
  8. "gorm.io/gorm"
  9. "io"
  10. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  11. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  12. "log"
  13. "strings"
  14. "sync"
  15. "unicode/utf8"
  16. )
  17. func main() {
  18. /**
  19. getProjectData click 是一起使用的,统计获取中标企业信息
  20. */
  21. //getProjectDataFromEs() //1.拉取项目中标成交数据
  22. //click() //2.处理项目数据,写入clickhouse
  23. //click2()
  24. //dealData()
  25. //getProject()
  26. //getQyLimitData()
  27. //getBiddingData()
  28. //getQyxytData()
  29. //getTidb()
  30. //getEntInfo() //法人库数据
  31. //getBuyerData()
  32. //mgoBidding()
  33. //log.Println("开启第二轮")
  34. //mgoBidding()
  35. //updateMgoEntInfoBuyer()
  36. getZhiMa()
  37. log.Println("over ------------------ over")
  38. }
  39. // getBiddingData 获取标讯数据
  40. func getBiddingData() {
  41. //url := "http://172.17.4.184:19908"
  42. url := "http://127.0.0.1:19908"
  43. username := "jybid"
  44. password := "Top2023_JEB01i@31"
  45. index := "bidding" //索引名称
  46. //index := "projectset" //索引名称
  47. // 创建 Elasticsearch 客户端
  48. client, err := elastic.NewClient(
  49. elastic.SetURL(url),
  50. elastic.SetBasicAuth(username, password),
  51. elastic.SetSniff(false),
  52. )
  53. if err != nil {
  54. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  55. }
  56. //85 抽取库
  57. //Mgo := &mongodb.MongodbSim{
  58. // //MongodbAddr: "127.0.0.1:27080",
  59. // MongodbAddr: "172.17.4.85:27080",
  60. // DbName: "top",
  61. // Size: 10,
  62. // //Direct: true,
  63. //}
  64. //Mgo.InitPool()
  65. MgoB := &mongodb.MongodbSim{
  66. //MongodbAddr: "172.17.189.140:27080",
  67. MongodbAddr: "127.0.0.1:27083",
  68. Size: 10,
  69. DbName: "qfw",
  70. UserName: "SJZY_RWbid_ES",
  71. Password: "SJZY@B4i4D5e6S",
  72. Direct: true,
  73. }
  74. MgoB.InitPool()
  75. //2023年01-01 2023-10-01,,1-3季度
  76. //2024-1 - 2024-4;1704038400-1711900800
  77. //2023-10-1 2024-1-1;1696089600-1704038400
  78. //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市")
  79. //rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1696089600).Lt(1704038400)
  80. query := elastic.NewBoolQuery().
  81. Must(elastic.NewTermQuery("toptype", "结果")).
  82. Must(elastic.NewTermQuery("subtype", "招标"))
  83. //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
  84. //query := elastic.NewBoolQuery().
  85. // //北京,天津,河北,上海,江苏,浙江,安徽
  86. // //Must(elastic.NewTermQuery("area", "北京市")).
  87. // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  88. // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
  89. // Must(rangeQuery)
  90. ctx := context.Background()
  91. //开始滚动搜索
  92. scrollID := ""
  93. scroll := "10m"
  94. searchSource := elastic.NewSearchSource().
  95. Query(query).
  96. Size(10000).
  97. Sort("_doc", true) //升序排序
  98. //Sort("_doc", false) //降序排序
  99. searchService := client.Scroll(index).
  100. Size(10000).
  101. Scroll(scroll).
  102. SearchSource(searchSource)
  103. res, err := searchService.Do(ctx)
  104. if err != nil {
  105. if err == io.EOF {
  106. fmt.Println("没有数据")
  107. } else {
  108. panic(err)
  109. }
  110. }
  111. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  112. fmt.Println("总数是:", res.TotalHits())
  113. total := 0
  114. for len(res.Hits.Hits) > 0 {
  115. for _, hit := range res.Hits.Hits {
  116. var doc map[string]interface{}
  117. err := json.Unmarshal(hit.Source, &doc)
  118. if err != nil {
  119. log.Printf("解析文档失败:%s", err)
  120. continue
  121. }
  122. delete(doc, "filetext")
  123. delete(doc, "detail")
  124. //存入新表
  125. err = MgoB.InsertOrUpdate("qfw", "wcc_subtype_err_0429", doc)
  126. if err != nil {
  127. log.Println("error", doc["id"])
  128. }
  129. // 处理查询结果
  130. //area := util.ObjToString(doc["area"])
  131. //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
  132. //if !IsInStringArray(area, areas) {
  133. // continue
  134. //}
  135. //projectName := util.ObjToString(doc["projectname"])
  136. //if strings.Contains(projectName, "非政府") {
  137. // continue
  138. //}
  139. //buyerclass := util.ObjToString(doc["buyerclass"])
  140. //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
  141. // continue
  142. //}
  143. ////存入新表
  144. //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
  145. //if err != nil {
  146. // log.Println("error", doc["id"])
  147. //}
  148. }
  149. total = total + len(res.Hits.Hits)
  150. scrollID = res.ScrollId
  151. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  152. log.Println("current count:", total)
  153. if err != nil {
  154. if err == io.EOF {
  155. // 滚动到最后一批数据,退出循环
  156. break
  157. }
  158. log.Println("滚动搜索失败:", err, res)
  159. break // 处理错误时退出循环
  160. }
  161. }
  162. // 在循环外调用 ClearScroll
  163. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  164. if err != nil {
  165. log.Printf("清理滚动搜索失败:%s", err)
  166. }
  167. fmt.Println("结束~~~~~~~~~~~~~~~")
  168. }
  169. // getProjectDataFromEs 获取项目 中标成交数据
  170. func getProjectDataFromEs() {
  171. url := "http://172.17.4.184:19908"
  172. //url := "http://127.0.0.1:19908"
  173. username := "jybid"
  174. password := "Top2023_JEB01i@31"
  175. index := "projectset" //索引名称
  176. // 创建 Elasticsearch 客户端
  177. client, err := elastic.NewClient(
  178. elastic.SetURL(url),
  179. elastic.SetBasicAuth(username, password),
  180. elastic.SetSniff(false),
  181. )
  182. if err != nil {
  183. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  184. }
  185. //85 抽取库
  186. Mgo := &mongodb.MongodbSim{
  187. //MongodbAddr: "127.0.0.1:27080",
  188. MongodbAddr: "172.17.4.85:27080",
  189. DbName: "top",
  190. Size: 10,
  191. //Direct: true,
  192. }
  193. Mgo.InitPool()
  194. //MgoB := &mongodb.MongodbSim{
  195. // MongodbAddr: "172.17.189.140:27080",
  196. // //MongodbAddr: "127.0.0.1:27083",
  197. // Size: 10,
  198. // DbName: "qfw",
  199. // UserName: "SJZY_RWbid_ES",
  200. // Password: "SJZY@B4i4D5e6S",
  201. // //Direct: true,
  202. //}
  203. //MgoB.InitPool()
  204. //2023年01-01 2023-10-01,,1-3季度
  205. //2024-1 - 2024-4;1704038400-1711900800
  206. //2023-10-1 2024-1-1;1696089600-1704038400
  207. //areaTermsQuery := elastic.NewTermsQuery("area", "江苏", "安徽", "上海", "天津", "河北", "浙江", "天津市", "上海市", "河北省", "安徽省", "江苏省", "浙江省", "北京", "北京市")
  208. rangeQuery := elastic.NewRangeQuery("firsttime").Gte(1711900800).Lt(1719763200) //2024年4-7月
  209. query := elastic.NewBoolQuery().
  210. //Must(areaTermsQuery).
  211. Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  212. Must(rangeQuery)
  213. //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
  214. //query := elastic.NewBoolQuery().
  215. // //北京,天津,河北,上海,江苏,浙江,安徽
  216. // //Must(elastic.NewTermQuery("area", "北京市")).
  217. // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  218. // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
  219. // Must(rangeQuery)
  220. ctx := context.Background()
  221. //开始滚动搜索
  222. scrollID := ""
  223. scroll := "10m"
  224. searchSource := elastic.NewSearchSource().
  225. Query(query).
  226. Size(10000).
  227. Sort("_doc", true) //升序排序
  228. //Sort("_doc", false) //降序排序
  229. searchService := client.Scroll(index).
  230. Size(10000).
  231. Scroll(scroll).
  232. SearchSource(searchSource)
  233. res, err := searchService.Do(ctx)
  234. if err != nil {
  235. if err == io.EOF {
  236. fmt.Println("没有数据")
  237. } else {
  238. panic(err)
  239. }
  240. }
  241. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  242. fmt.Println("总数是:", res.TotalHits())
  243. total := 0
  244. for len(res.Hits.Hits) > 0 {
  245. for _, hit := range res.Hits.Hits {
  246. var doc map[string]interface{}
  247. err := json.Unmarshal(hit.Source, &doc)
  248. if err != nil {
  249. log.Printf("解析文档失败:%s", err)
  250. continue
  251. }
  252. delete(doc, "filetext")
  253. delete(doc, "detail")
  254. sWinner := util.ObjToString(doc["s_winner"])
  255. winners := strings.Split(sWinner, ",")
  256. for _, v := range winners {
  257. insert := doc
  258. insert["s_winner"] = v
  259. //存入新表
  260. err = Mgo.InsertOrUpdate("top", "wcc_allcity_2024Q2", insert)
  261. if err != nil {
  262. log.Println("error", doc["id"])
  263. }
  264. }
  265. // 处理查询结果
  266. //area := util.ObjToString(doc["area"])
  267. //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
  268. //if !IsInStringArray(area, areas) {
  269. // continue
  270. //}
  271. //projectName := util.ObjToString(doc["projectname"])
  272. //if strings.Contains(projectName, "非政府") {
  273. // continue
  274. //}
  275. //buyerclass := util.ObjToString(doc["buyerclass"])
  276. //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
  277. // continue
  278. //}
  279. ////存入新表
  280. //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
  281. //if err != nil {
  282. // log.Println("error", doc["id"])
  283. //}
  284. }
  285. total = total + len(res.Hits.Hits)
  286. scrollID = res.ScrollId
  287. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  288. log.Println("current count:", total)
  289. if err != nil {
  290. if err == io.EOF {
  291. // 滚动到最后一批数据,退出循环
  292. break
  293. }
  294. log.Println("滚动搜索失败:", err, res)
  295. break // 处理错误时退出循环
  296. }
  297. }
  298. // 在循环外调用 ClearScroll
  299. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  300. if err != nil {
  301. log.Printf("清理滚动搜索失败:%s", err)
  302. }
  303. fmt.Println("结束~~~~~~~~~~~~~~~")
  304. }
  305. // getQyxytData 获取企业数据
  306. func getQyxytData() {
  307. url := "http://172.17.4.184:19908"
  308. //url := "http://127.0.0.1:19908"
  309. username := "jybid"
  310. password := "Top2023_JEB01i@31"
  311. index := "qyxy" //索引名称
  312. // 创建 Elasticsearch 客户端
  313. client, err := elastic.NewClient(
  314. elastic.SetURL(url),
  315. elastic.SetBasicAuth(username, password),
  316. elastic.SetSniff(false),
  317. )
  318. if err != nil {
  319. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  320. }
  321. //85 抽取库
  322. //Mgo := &mongodb.MongodbSim{
  323. // //MongodbAddr: "127.0.0.1:27080",
  324. // MongodbAddr: "172.17.4.85:27080",
  325. // DbName: "top",
  326. // Size: 10,
  327. // //Direct: true,
  328. //}
  329. //Mgo.InitPool()
  330. MgoB := &mongodb.MongodbSim{
  331. MongodbAddr: "172.17.189.140:27080",
  332. //MongodbAddr: "127.0.0.1:27083",
  333. Size: 10,
  334. DbName: "qfw",
  335. UserName: "SJZY_RWbid_ES",
  336. Password: "SJZY@B4i4D5e6S",
  337. //Direct: true,
  338. }
  339. MgoB.InitPool()
  340. //2023年01-01 2023-10-01,,1-3季度
  341. //2024-1 - 2024-4;1704038400-1711900800
  342. //2023-10-1 2024-1-1;1696089600-1704038400
  343. //城市范围
  344. //areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市")
  345. //rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400)
  346. //query := elastic.NewBoolQuery().
  347. // Must(areaTermsQuery).
  348. // Must(rangeQuery)
  349. //---------------------------//
  350. query := elastic.NewBoolQuery()
  351. query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
  352. query.Must(elastic.NewTermQuery("company_city", "北京市"))
  353. //rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
  354. //query := elastic.NewBoolQuery().
  355. // //北京,天津,河北,上海,江苏,浙江,安徽
  356. // //Must(elastic.NewTermQuery("area", "北京市")).
  357. // Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  358. // Must(elastic.NewTermsQuery("area", "北京", "上海", "江苏", "浙江", "广东")).
  359. // Must(rangeQuery)
  360. ctx := context.Background()
  361. //开始滚动搜索
  362. scrollID := ""
  363. scroll := "10m"
  364. searchSource := elastic.NewSearchSource().
  365. Query(query).
  366. Size(10000).
  367. Sort("_doc", true) //升序排序
  368. //Sort("_doc", false) //降序排序
  369. searchService := client.Scroll(index).
  370. Size(10000).
  371. Scroll(scroll).
  372. SearchSource(searchSource)
  373. res, err := searchService.Do(ctx)
  374. if err != nil {
  375. if err == io.EOF {
  376. fmt.Println("没有数据")
  377. } else {
  378. panic(err)
  379. }
  380. }
  381. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  382. fmt.Println("总数是:", res.TotalHits())
  383. total := 0
  384. for len(res.Hits.Hits) > 0 {
  385. for _, hit := range res.Hits.Hits {
  386. var doc map[string]interface{}
  387. err := json.Unmarshal(hit.Source, &doc)
  388. if err != nil {
  389. log.Printf("解析文档失败:%s", err)
  390. continue
  391. }
  392. if strings.Contains(util.ObjToString(doc["business_scope"]), "招投标代理") {
  393. //存入新表
  394. insert := map[string]interface{}{
  395. "company_name": doc["company_name"],
  396. "business_scope": doc["business_scope"],
  397. "employee_name": doc["employee_name"],
  398. "company_phone": doc["company_phone"],
  399. }
  400. err = MgoB.InsertOrUpdate("qfw", "wcc_2024_beijing_dailijigou", insert)
  401. if err != nil {
  402. log.Println("error", doc["id"])
  403. }
  404. }
  405. //sWinner := util.ObjToString(doc["s_winner"])
  406. //winners := strings.Split(sWinner, ",")
  407. //for _, v := range winners {
  408. // insert := doc
  409. // insert["s_winner"] = v
  410. // //存入新表
  411. // err = MgoB.InsertOrUpdate("qfw", "wcc_2024_pingdingshan", insert)
  412. // if err != nil {
  413. // log.Println("error", doc["id"])
  414. // }
  415. //}
  416. // 处理查询结果
  417. //area := util.ObjToString(doc["area"])
  418. //areas := []string{"北京", "上海", "广东", "江苏", "浙江"}
  419. //if !IsInStringArray(area, areas) {
  420. // continue
  421. //}
  422. //projectName := util.ObjToString(doc["projectname"])
  423. //if strings.Contains(projectName, "非政府") {
  424. // continue
  425. //}
  426. //buyerclass := util.ObjToString(doc["buyerclass"])
  427. //if buyerclass == "批发零售" || buyerclass == "住宿餐饮" || buyerclass == "信息技术" {
  428. // continue
  429. //}
  430. ////存入新表
  431. //err = Mgo.InsertOrUpdate("qfw", "wcc_bank_poc", doc)
  432. //if err != nil {
  433. // log.Println("error", doc["id"])
  434. //}
  435. }
  436. total = total + len(res.Hits.Hits)
  437. scrollID = res.ScrollId
  438. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  439. log.Println("current count:", total)
  440. if err != nil {
  441. if err == io.EOF {
  442. // 滚动到最后一批数据,退出循环
  443. break
  444. }
  445. log.Println("滚动搜索失败:", err, res)
  446. break // 处理错误时退出循环
  447. }
  448. }
  449. // 在循环外调用 ClearScroll
  450. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  451. if err != nil {
  452. log.Printf("清理滚动搜索失败:%s", err)
  453. }
  454. fmt.Println("结束~~~~~~~~~~~~~~~")
  455. }
  456. // getQyLimitData 获取qyxy 条件数据
  457. func getQyLimitData() {
  458. //url := "http://172.17.4.184:19908"
  459. url := "http://127.0.0.1:19908"
  460. username := "jybid"
  461. password := "Top2023_JEB01i@31"
  462. index := "qyxy" //索引名称
  463. // 创建 Elasticsearch 客户端
  464. client, err := elastic.NewClient(
  465. elastic.SetURL(url),
  466. elastic.SetBasicAuth(username, password),
  467. elastic.SetSniff(false),
  468. )
  469. if err != nil {
  470. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  471. }
  472. // 构建查询
  473. query := elastic.NewBoolQuery().
  474. Must(elastic.NewMatchQuery("company_area", "河南")).
  475. Must(elastic.NewMatchQuery("company_status", "存续")).
  476. MustNot(elastic.NewMatchQuery("company_type", "个体工商户"))
  477. // 执行查询
  478. searchResult, err := client.Search().Size(50).
  479. Index(index).
  480. Query(query).
  481. Do(context.Background())
  482. if err != nil {
  483. log.Fatalf("Error executing search: %s", err)
  484. }
  485. // 本地数据库
  486. MgoB := &mongodb.MongodbSim{
  487. MongodbAddr: "127.0.0.1:27017",
  488. Size: 10,
  489. DbName: "wcc",
  490. }
  491. MgoB.InitPool()
  492. for _, hit := range searchResult.Hits.Hits {
  493. var doc map[string]interface{}
  494. err := json.Unmarshal(hit.Source, &doc)
  495. if err != nil {
  496. log.Printf("解析文档失败:%s", err)
  497. continue
  498. }
  499. MgoB.SaveByOriID("wcc_henan_0428", doc)
  500. }
  501. }
  502. // getTidb 获取tidb 数据
  503. func getTidb() {
  504. MgoB := &mongodb.MongodbSim{
  505. MongodbAddr: "172.17.189.140:27080",
  506. //MongodbAddr: "127.0.0.1:27083",
  507. Size: 10,
  508. DbName: "qfw",
  509. UserName: "SJZY_RWbid_ES",
  510. Password: "SJZY@B4i4D5e6S",
  511. //Direct: true,
  512. }
  513. MgoB.InitPool()
  514. //tidb
  515. username := "datascbi"
  516. password := "Da#Bi20221111SC"
  517. //host := "127.0.0.1:4001"
  518. host := "172.17.162.25:4000"
  519. database := "global_common_data"
  520. dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
  521. // 连接到数据库
  522. db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
  523. if err != nil {
  524. log.Println("Failed to connect to database:", err)
  525. return
  526. }
  527. fmt.Println("Connected to the database!")
  528. defer util.Catch()
  529. sess := MgoB.GetMgoConn()
  530. defer MgoB.DestoryMongoConn(sess)
  531. it := sess.DB("qfw").C("wcc_2024_beijing_dailijigou").Find(nil).Select(nil).Iter()
  532. fmt.Println("taskRun 开始")
  533. count := 0
  534. for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
  535. if count%10000 == 0 {
  536. log.Println("current:", count)
  537. }
  538. companyName := util.ObjToString(tmp["company_name"])
  539. var baseInfo EnterpriseBaseInfo
  540. db.Where(&EnterpriseBaseInfo{Name: companyName}).First(&baseInfo)
  541. if baseInfo.ID > 0 {
  542. insert := map[string]interface{}{
  543. "company_name": companyName,
  544. "name_id": baseInfo.NameID,
  545. "business_scope": tmp["business_scope"],
  546. }
  547. MgoB.InsertOrUpdate("qfw", "wcc_beijing_daili_bidding", insert)
  548. }
  549. }
  550. log.Println("over")
  551. }
  552. // getEntInfo 获取法人库数据
  553. func getEntInfo() {
  554. url := "http://172.17.4.184:19908"
  555. //url := "http://127.0.0.1:19908"
  556. username := "jybid"
  557. password := "Top2023_JEB01i@31"
  558. index := "ent_info" //索引名称
  559. // 创建 Elasticsearch 客户端
  560. client, err := elastic.NewClient(
  561. elastic.SetURL(url),
  562. elastic.SetBasicAuth(username, password),
  563. elastic.SetSniff(false),
  564. )
  565. if err != nil {
  566. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  567. }
  568. query := elastic.NewBoolQuery().
  569. //北京,天津,河北,上海,江苏,浙江,安徽
  570. //Must(elastic.NewMatchQuery("company_name", "医院")).
  571. //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  572. Must(elastic.NewExistsQuery("tag_labels"))
  573. //Must(rangeQuery)
  574. ctx := context.Background()
  575. //开始滚动搜索
  576. scrollID := ""
  577. scroll := "10m"
  578. searchSource := elastic.NewSearchSource().
  579. Query(query).
  580. Size(10000).
  581. Sort("_doc", true) //升序排序
  582. //Sort("_doc", false) //降序排序
  583. searchService := client.Scroll(index).
  584. Size(10000).
  585. Scroll(scroll).
  586. SearchSource(searchSource)
  587. res, err := searchService.Do(ctx)
  588. if err != nil {
  589. if err == io.EOF {
  590. fmt.Println("没有数据")
  591. } else {
  592. panic(err)
  593. }
  594. }
  595. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  596. fmt.Println("总数是:", res.TotalHits())
  597. total := 0
  598. for len(res.Hits.Hits) > 0 {
  599. for _, hit := range res.Hits.Hits {
  600. var doc map[string]interface{}
  601. err = json.Unmarshal(hit.Source, &doc)
  602. if err != nil {
  603. log.Printf("解析文档失败:%s", err)
  604. continue
  605. }
  606. name := util.ObjToString(doc["company_name"])
  607. updateData := make(map[string]interface{})
  608. if tag_labels, ok := doc["tag_labels"].([]interface{}); ok {
  609. updateData["main_label"] = tag_labels[0]
  610. _, err = client.Update().
  611. Index(index).
  612. Id(util.ObjToString(doc["id"])).
  613. Doc(updateData).
  614. Do(context.Background())
  615. if err != nil {
  616. log.Println("更新失败", name, tag_labels, err)
  617. }
  618. }
  619. }
  620. total = total + len(res.Hits.Hits)
  621. scrollID = res.ScrollId
  622. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  623. log.Println("current count:", total)
  624. if err != nil {
  625. if err == io.EOF {
  626. // 滚动到最后一批数据,退出循环
  627. break
  628. }
  629. log.Println("滚动搜索失败:", err, res)
  630. break // 处理错误时退出循环
  631. }
  632. }
  633. // 在循环外调用 ClearScroll
  634. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  635. if err != nil {
  636. log.Printf("清理滚动搜索失败:%s", err)
  637. }
  638. fmt.Println("结束~~~~~~~~~~~~~~~")
  639. }
  640. // getBuyerData 获取采购单位数据
  641. func getBuyerData() {
  642. //key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
  643. //model := "glm-4-air"
  644. url := "http://172.17.4.184:19908"
  645. //url := "http://127.0.0.1:19908"
  646. username := "jybid"
  647. password := "Top2023_JEB01i@31"
  648. index := "buyer" //索引名称
  649. // 创建 Elasticsearch 客户端
  650. client, err := elastic.NewClient(
  651. elastic.SetURL(url),
  652. elastic.SetBasicAuth(username, password),
  653. elastic.SetSniff(false),
  654. )
  655. if err != nil {
  656. log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
  657. }
  658. MgoB := &mongodb.MongodbSim{
  659. MongodbAddr: "172.17.189.140:27080",
  660. //MongodbAddr: "127.0.0.1:27083",
  661. Size: 10,
  662. DbName: "qfw",
  663. UserName: "SJZY_RWbid_ES",
  664. Password: "SJZY@B4i4D5e6S",
  665. //Direct: true,
  666. }
  667. MgoB.InitPool()
  668. //query := elastic.NewBoolQuery().
  669. // //北京,天津,河北,上海,江苏,浙江,安徽
  670. // Must(elastic.NewMatchQuery("company_name", "医院")).
  671. // //Must(elastic.NewTermsQuery("subtype", "中标", "单一", "成交", "合同")).
  672. // Must(elastic.NewTermsQuery("tag_labels", "学校", "教育"))
  673. //Must(rangeQuery)
  674. ctx := context.Background()
  675. //开始滚动搜索
  676. scrollID := ""
  677. scroll := "10m"
  678. searchSource := elastic.NewSearchSource().
  679. //Query(query).
  680. Size(10000).
  681. Sort("_doc", true) //升序排序
  682. //Sort("_doc", false) //降序排序
  683. searchService := client.Scroll(index).
  684. Size(10000).
  685. Scroll(scroll).
  686. SearchSource(searchSource)
  687. res, err := searchService.Do(ctx)
  688. if err != nil {
  689. if err == io.EOF {
  690. fmt.Println("没有数据")
  691. } else {
  692. panic(err)
  693. }
  694. }
  695. //defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
  696. fmt.Println("总数是:", res.TotalHits())
  697. total := 0
  698. for len(res.Hits.Hits) > 0 {
  699. for _, hit := range res.Hits.Hits {
  700. var doc map[string]interface{}
  701. err = json.Unmarshal(hit.Source, &doc)
  702. if err != nil {
  703. log.Printf("解析文档失败:%s", err)
  704. continue
  705. }
  706. //name := util.ObjToString(doc["buyer_name"])
  707. //ra := ZpAI(key, model, name)
  708. //if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
  709. // doc["national_top"] = ra["label1"]
  710. // doc["main_label"] = ra["label1"]
  711. //}
  712. //if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
  713. // doc["national_sub"] = ra["label2"]
  714. //}
  715. //if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
  716. // doc["national_subsub"] = ra["label3"]
  717. //}
  718. MgoB.Save("ent_info_buyer", doc)
  719. //time.Sleep(time.Microsecond)
  720. }
  721. total = total + len(res.Hits.Hits)
  722. scrollID = res.ScrollId
  723. res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
  724. log.Println("current count:", total)
  725. if err != nil {
  726. if err == io.EOF {
  727. // 滚动到最后一批数据,退出循环
  728. break
  729. }
  730. log.Println("滚动搜索失败:", err, res)
  731. break // 处理错误时退出循环
  732. }
  733. }
  734. // 在循环外调用 ClearScroll
  735. _, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
  736. if err != nil {
  737. log.Printf("清理滚动搜索失败:%s", err)
  738. }
  739. fmt.Println("结束~~~~~~~~~~~~~~~")
  740. }
  741. // mgoBidding mgoBidding 数据
  742. func mgoBidding() {
  743. MgoB := &mongodb.MongodbSim{
  744. MongodbAddr: "172.17.189.140:27080",
  745. //MongodbAddr: "127.0.0.1:27083",
  746. Size: 10,
  747. DbName: "qfw",
  748. UserName: "SJZY_RWbid_ES",
  749. Password: "SJZY@B4i4D5e6S",
  750. //Direct: true,
  751. }
  752. MgoB.InitPool()
  753. sess := MgoB.GetMgoConn()
  754. defer MgoB.DestoryMongoConn(sess)
  755. //181 凭安库
  756. MgoQY := &mongodb.MongodbSim{
  757. MongodbAddr: "172.17.4.181:27001",
  758. //MongodbAddr: "127.0.0.1:27001",
  759. DbName: "mixdata",
  760. Size: 10,
  761. UserName: "",
  762. Password: "",
  763. //Direct: true,
  764. }
  765. MgoQY.InitPool()
  766. where := map[string]interface{}{
  767. "qy_flag": 1,
  768. }
  769. query := sess.DB("qfw").C("ent_info_buyer").Find(where).Select(map[string]interface{}{
  770. "contenthtml": 0}).Iter()
  771. count := 0
  772. key := "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i"
  773. model := "glm-4-air"
  774. ch := make(chan bool, 10)
  775. wg := &sync.WaitGroup{}
  776. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  777. if count%100 == 0 {
  778. log.Println("current:", count, tmp["name"])
  779. }
  780. //存在就不再调用大模型
  781. //if _, ok := tmp["national_top"]; ok {
  782. // continue
  783. //}
  784. if utf8.RuneCountInString(util.ObjToString(tmp["name"])) < 4 {
  785. continue
  786. }
  787. ch <- true
  788. wg.Add(1)
  789. go func(tmp map[string]interface{}) {
  790. defer func() {
  791. <-ch
  792. wg.Done()
  793. }()
  794. //
  795. biddingID := mongodb.BsonIdToSId(tmp["_id"])
  796. update := make(map[string]interface{})
  797. name := util.ObjToString(tmp["name"])
  798. where2 := map[string]interface{}{
  799. "company_name": name,
  800. }
  801. data, _ := MgoQY.FindOne("company_base", where2)
  802. businessScope := util.ObjToString((*data)["business_scope"])
  803. ra := ZpAI1(key, model, name, businessScope)
  804. if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
  805. //update["national_top"] = ra["label1"]
  806. //update["main_label"] = ra["label1"]
  807. update["label1"] = ra["label1"]
  808. }
  809. if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
  810. //update["national_sub"] = ra["label2"]
  811. update["label2"] = ra["label2"]
  812. }
  813. if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
  814. //update["national_subsub"] = ra["label3"]
  815. update["label3"] = ra["label3"]
  816. }
  817. if len(update) > 0 {
  818. MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
  819. }
  820. }(tmp)
  821. tmp = map[string]interface{}{}
  822. }
  823. wg.Wait()
  824. log.Println("over 22222222222")
  825. //log.Println("开始第二轮迭代")
  826. //for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  827. // if _, ok := tmp["national_top"]; ok {
  828. // continue
  829. // }
  830. // biddingID := mongodb.BsonIdToSId(tmp["_id"])
  831. // name := util.ObjToString(tmp["name"])
  832. // update := make(map[string]interface{})
  833. // ra := ZpAI(key, model, name)
  834. // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
  835. // update["national_top"] = ra["label1"]
  836. // update["main_label"] = ra["label1"]
  837. // }
  838. // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
  839. // update["national_sub"] = ra["label2"]
  840. // }
  841. // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
  842. // update["national_subsub"] = ra["label3"]
  843. // }
  844. // if count%1000 == 0 {
  845. // log.Println("current", count, name, ra["label1"], ra["label2"])
  846. // }
  847. //
  848. // if len(update) > 0 {
  849. // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
  850. // }
  851. // //time.Sleep(time.Microsecond)
  852. //}
  853. //
  854. //log.Println("开始第3轮迭代")
  855. //for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  856. // if _, ok := tmp["national_top"]; ok {
  857. // continue
  858. // }
  859. // biddingID := mongodb.BsonIdToSId(tmp["_id"])
  860. // name := util.ObjToString(tmp["name"])
  861. // update := make(map[string]interface{})
  862. // ra := ZpAI(key, model, name)
  863. // if util.ObjToString(ra["label1"]) != "" && !checkString(util.ObjToString(ra["label1"])) {
  864. // update["national_top"] = ra["label1"]
  865. // update["main_label"] = ra["label1"]
  866. // }
  867. // if util.ObjToString(ra["label2"]) != "" && !checkString(util.ObjToString(ra["label2"])) {
  868. // update["national_sub"] = ra["label2"]
  869. // }
  870. // if util.ObjToString(ra["label3"]) != "" && !checkString(util.ObjToString(ra["label3"])) {
  871. // update["national_subsub"] = ra["label3"]
  872. // }
  873. // if count%1000 == 0 {
  874. // log.Println("current", count, name, ra["label1"], ra["label2"])
  875. // }
  876. //
  877. // if len(update) > 0 {
  878. // MgoB.UpdateById("ent_info_buyer", biddingID, map[string]interface{}{"$set": update})
  879. // }
  880. // //time.Sleep(time.Microsecond)
  881. //}
  882. }
  883. // updateMgoEntInfoBuyer updateMgoEntInfoBuyer
  884. func updateMgoEntInfoBuyer() {
  885. MgoB := &mongodb.MongodbSim{
  886. MongodbAddr: "172.17.189.140:27080",
  887. //MongodbAddr: "127.0.0.1:27083",
  888. Size: 10,
  889. DbName: "qfw",
  890. UserName: "SJZY_RWbid_ES",
  891. Password: "SJZY@B4i4D5e6S",
  892. //Direct: true,
  893. }
  894. MgoB.InitPool()
  895. //181 凭安库
  896. MgoQY := &mongodb.MongodbSim{
  897. MongodbAddr: "172.17.4.181:27001",
  898. //MongodbAddr: "127.0.0.1:27001",
  899. DbName: "mixdata",
  900. Size: 10,
  901. UserName: "",
  902. Password: "",
  903. //Direct: true,
  904. }
  905. MgoQY.InitPool()
  906. sess := MgoB.GetMgoConn()
  907. defer MgoB.DestoryMongoConn(sess)
  908. query := sess.DB("qfw").C("ent_info_buyer").Find(nil).Select(map[string]interface{}{
  909. "contenthtml": 0}).Iter()
  910. count := 0
  911. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  912. if count%1000 == 0 {
  913. log.Println("current:", count, tmp["name"])
  914. }
  915. name := util.ObjToString(tmp["name"])
  916. where := map[string]interface{}{
  917. "company_name": name,
  918. }
  919. id := mongodb.BsonIdToSId(tmp["_id"])
  920. data, _ := MgoQY.FindOne("company_base", where)
  921. if data != nil && len(*data) > 0 {
  922. update := map[string]interface{}{
  923. "qy_flag": 1,
  924. "use_flag": (*data)["use_flag"],
  925. "company_type": (*data)["company_type"],
  926. "company_status": (*data)["company_status"],
  927. "credit_no": (*data)["credit_no"],
  928. "business_scope": (*data)["business_scope"],
  929. }
  930. MgoB.UpdateById("ent_info_buyer", id, map[string]interface{}{"$set": update})
  931. }
  932. }
  933. }