task1.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fieldproject_common/config"
  6. "fmt"
  7. es "github.com/olivere/elastic/v7"
  8. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  9. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  10. "strconv"
  11. "sync"
  12. "time"
  13. )
  14. var province_map = map[string]string{
  15. "BJ": "北京", "TJ": "天津", "SH": "上海", "CQ": "重庆", "HB": "河北", "SX": "山西", "NMG": "内蒙古", "LN": "辽宁", "JL": "吉林",
  16. "HLJ": "黑龙江", "JS": "江苏", "ZJ": "浙江", "AH": "安徽", "FJ": "福建", "JX": "江西", "SD": "山东", "HEN": "河南", "HUB": "湖北",
  17. "HUN": "湖南", "GD": "广东", "GX": "广西", "HAIN": "海南", "SC": "四川", "GZ": "贵州", "YN": "云南", "XZ": "西藏", "SAX": "陕西",
  18. "GS": "甘肃", "QH": "青海", "NX": "宁夏", "XJ": "新疆",
  19. }
  20. // @Description 标讯数据
  21. // @Author J 2022/9/7 11:42
  22. func taskBiddingData1() {
  23. go SaveMethod()
  24. sess := MongoTool.GetMgoConn()
  25. defer MongoTool.DestoryMongoConn(sess)
  26. ch := make(chan bool, 10)
  27. wg := &sync.WaitGroup{}
  28. //q := map[string]interface{}{"_id": mongodb.StringTOBsonId("5a8d7f4840d2d9bbe8962002")}
  29. query := sess.DB(config.Conf.DB.Mongo.Dbname).C("bidding").Find(nil).Iter()
  30. count := 0
  31. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  32. if count%20000 == 0 {
  33. log.Info(fmt.Sprintf("current --- %d", count))
  34. }
  35. ch <- true
  36. wg.Add(1)
  37. go func(tmp map[string]interface{}) {
  38. defer func() {
  39. <-ch
  40. wg.Done()
  41. }()
  42. if b := util.ObjToString(tmp["bid_field"]); b != "" {
  43. taskA(tmp)
  44. }
  45. }(tmp)
  46. tmp = make(map[string]interface{})
  47. }
  48. wg.Wait()
  49. log.Info(fmt.Sprintf("over --- %d", count))
  50. }
  51. // @Description 企业数据
  52. // @Author J 2022/8/23 09:08
  53. func taskCompanyData() {
  54. sess := MongoTool.GetMgoConn()
  55. defer MongoTool.DestoryMongoConn(sess)
  56. ch := make(chan bool, 10)
  57. wg := &sync.WaitGroup{}
  58. //log.Info(fmt.Sprintf("%d", MongoTool2.Count("zktest_mysql_company_info", nil)))
  59. var p1 []map[string]interface{}
  60. p1 = append(p1, map[string]interface{}{"$group": map[string]interface{}{"_id": "$company_id"}})
  61. query := sess.DB(config.Conf.DB.Mongo.Dbname).C("bidding_p_list_0907").Pipe(p1).Iter()
  62. //q := map[string]interface{}{"_id": mongodb.StringTOBsonId("61efb24b70f4a1409599badb")}
  63. //query := sess.DB(config.Conf.DB.Mongo2.Dbname).C("zktest_mysql_company_info").Find(nil).Iter()
  64. count := 0
  65. for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
  66. if count%5000 == 0 {
  67. log.Info(fmt.Sprintf("current --- %d", count))
  68. }
  69. ch <- true
  70. wg.Add(1)
  71. go func(tmp map[string]interface{}) {
  72. defer func() {
  73. <-ch
  74. wg.Done()
  75. }()
  76. //taskC(tmp)
  77. taskB(util.ObjToString(tmp["_id"]))
  78. //info, _ := MongoTool.FindOne("qyxy_std", map[string]interface{}{"_id": util.ObjToString(tmp["company_id"])})
  79. //if len(*info) > 0 {
  80. // savePool <- *info
  81. //}
  82. }(tmp)
  83. tmp = make(map[string]interface{})
  84. }
  85. wg.Wait()
  86. log.Info(fmt.Sprintf("over --- %d", count))
  87. }
  88. // @Description 标讯数据企业与产品
  89. // @Author J 2022/8/24 14:37
  90. func taskA(tmp map[string]interface{}) {
  91. if tmp["entidlist"] != nil {
  92. saveMap := make(map[string]interface{})
  93. for _, eid := range tmp["entidlist"].([]interface{}) {
  94. if util.ObjToString(eid) != "" && util.ObjToString(eid) != "-" {
  95. saveMap["company_id"] = eid
  96. if tmp["purchasinglist"] != nil {
  97. for _, p1 := range tmp["purchasinglist"].([]interface{}) {
  98. p2 := p1.(map[string]interface{})
  99. if p2["itemname"] != nil {
  100. saveMap["itemname"] = p2["itemname"]
  101. if p2["brandname"] != nil {
  102. saveMap["brand"] = p2["brandname"]
  103. }
  104. if p2["model"] != nil {
  105. saveMap["model"] = p2["model"]
  106. }
  107. MongoTool.Save("bidding_p_list_0907", saveMap)
  108. //savePool <- saveMap
  109. }
  110. }
  111. }
  112. }
  113. }
  114. }
  115. }
  116. var company_field = []string{"company_id", "company_name", "company_code", "credit_no", "org_code", "tax_code", "establish_date", "legal_person", "legal_person_caption",
  117. "company_status", "company_type", "authority", "issue_date", "operation_startdate", "operation_enddate", "capital", "company_address", "business_scope", "cancel_date",
  118. "cancel_reason", "revoke_date", "revoke_reason", "legal_person_type", "real_capital", "en_name", "list_code", "area_code", "employee_no", "company_phone", "company_email",
  119. "website", "sourcetype"}
  120. // @Description 医疗企业
  121. // @Author J 2022/8/24 14:37
  122. func taskB(tid string) {
  123. saveM := make(map[string]interface{})
  124. info, _ := MongoTool1.FindOneByField("company_base", map[string]interface{}{"company_id": tid}, "")
  125. if len(*info) == 0 {
  126. info, _ = MongoTool1.FindOneByField("special_enterprise", map[string]interface{}{"company_id": tid}, "")
  127. }
  128. if len(*info) > 0 {
  129. for _, v := range company_field {
  130. if (*info)[v] != nil {
  131. saveM[v] = (*info)[v]
  132. }
  133. if v == "area_code" {
  134. std, _ := MongoTool.FindOneByField("qyxy_std", map[string]interface{}{"_id": tid}, map[string]interface{}{"company_area": 1, "company_city": 1, "company_district": 1, "website_url": 1, "capital": 1})
  135. if len(*std) > 0 {
  136. m := make(map[string]interface{})
  137. for k, v := range map[string]string{"company_district": "district", "company_city": "city", "company_area": "area"} {
  138. if v1 := util.ObjToString((*std)[k]); v1 != "" {
  139. m = map[string]interface{}{v: v1}
  140. info := MysqlB.FindOne("code_area", m, "", "")
  141. if info != nil && len(*info) > 0 {
  142. saveM["area_code"] = (*info)["code"]
  143. break
  144. }
  145. }
  146. }
  147. if saveM["area_code"] == nil {
  148. saveM["area_code"] = "000000"
  149. }
  150. if (*std)["website_url"] != nil && len(util.ObjToString((*std)["website_url"])) <= 255 {
  151. saveM["website"] = (*std)["website_url"]
  152. }
  153. if (*std)["company_phone"] != nil {
  154. saveM["company_phone"] = (*std)["company_phone"]
  155. }
  156. if (*std)["company_email"] != nil {
  157. saveM["company_email"] = (*std)["company_email"]
  158. }
  159. if (*std)["capital"] != nil {
  160. saveM["capital"] = util.ObjToString((*std)["capital"])
  161. }
  162. }
  163. } else if v == "employee_no" {
  164. rep, _ := MongoTool1.Find("annual_report_base", map[string]interface{}{"company_id": tid}, map[string]interface{}{"_id": -1}, "", false, -1, -1)
  165. if len(*rep) > 0 && util.ObjToString((*rep)[0]["employee_no"]) != "" {
  166. i, err := strconv.Atoi(util.ObjToString((*rep)[0]["employee_no"]))
  167. if err != nil {
  168. saveM[v] = i
  169. }
  170. }
  171. }
  172. //else if v == "website" {
  173. // info, _ := MongoTool.Find("annual_report_website", map[string]interface{}{"company_id": tid}, map[string]interface{}{"_id": -1}, "", false, -1, -1)
  174. // if len(*info) > 0 && util.ObjToString((*info)[0]["website_url"]) != "" {
  175. // saveM[v] = util.ObjToString((*info)[0]["website_url"])
  176. // }
  177. //}
  178. }
  179. saveM["comeintime"] = time.Now()
  180. saveM["updatetime"] = time.Now()
  181. saveM["sourcetype"] = 3
  182. MysqlB.Insert("company_baseinfo", saveM)
  183. //savePool <- saveM
  184. } else {
  185. util.Debug("company_id err", tid)
  186. }
  187. }
  188. func taskB_1(tid string) {
  189. saveM := make(map[string]interface{})
  190. info, _ := MongoTool1.FindOneByField("company_base", map[string]interface{}{"company_id": tid}, "")
  191. if len(*info) == 0 {
  192. info, _ = MongoTool1.FindOneByField("special_enterprise", map[string]interface{}{"company_id": tid}, "")
  193. }
  194. if len(*info) > 0 {
  195. for _, v := range company_field {
  196. if (*info)[v] != nil {
  197. saveM[v] = (*info)[v]
  198. }
  199. if v == "area_code" {
  200. if a := util.ObjToString((*info)["province_short"]); a != "" {
  201. m := map[string]interface{}{"area": province_map[a]}
  202. info := MysqlB.FindOne("code_area", m, "", "")
  203. if info != nil && len(*info) > 0 {
  204. saveM["area_code"] = (*info)["code"]
  205. }
  206. }
  207. } else if (*info)["capital"] != nil {
  208. text := util.ObjToString((*info)["capital"])
  209. capital := ObjToMoney(text)
  210. capital = capital / 10000
  211. if capital != 0 {
  212. saveM["capital"] = fmt.Sprint(capital)
  213. }
  214. }
  215. }
  216. saveM["comeintime"] = time.Now()
  217. saveM["updatetime"] = time.Now()
  218. saveM["sourcetype"] = 1
  219. MysqlB.Insert("company_baseinfo", saveM)
  220. //savePool <- saveM
  221. } else {
  222. util.Debug("company_id err", tid)
  223. }
  224. }
  225. func taskC(tmp map[string]interface{}) {
  226. tid := util.ObjToString(tmp["company_id"])
  227. saveM := make(map[string]interface{})
  228. for _, v := range company_field {
  229. if tmp[v] != nil {
  230. saveM[v] = tmp[v]
  231. }
  232. if v == "area_code" {
  233. std, _ := MongoTool.FindOneByField("qyxy_std", map[string]interface{}{"_id": tid}, map[string]interface{}{"company_area": 1, "company_city": 1, "company_district": 1, "website_url": 1, "company_phone": 1, "company_email": 1, "capital": 1})
  234. if len(*std) > 0 {
  235. m := make(map[string]interface{})
  236. for k, v := range map[string]string{"company_district": "district", "company_city": "city", "company_area": "area"} {
  237. if v1 := util.ObjToString((*std)[k]); v1 != "" {
  238. m = map[string]interface{}{v: v1}
  239. info := MysqlB.FindOne("code_area", m, "", "")
  240. if info != nil && len(*info) > 0 {
  241. saveM["area_code"] = (*info)["code"]
  242. break
  243. }
  244. }
  245. }
  246. if saveM["area_code"] == nil {
  247. saveM["area_code"] = "000000"
  248. }
  249. if (*std)["website_url"] != nil && len(util.ObjToString((*std)["website_url"])) <= 255 {
  250. saveM["website"] = (*std)["website_url"]
  251. }
  252. if (*std)["company_phone"] != nil {
  253. saveM["company_phone"] = (*std)["company_phone"]
  254. }
  255. if (*std)["company_email"] != nil {
  256. saveM["company_email"] = (*std)["company_email"]
  257. }
  258. if (*std)["capital"] != nil {
  259. saveM["capital"] = fmt.Sprint((*std)["capital"])
  260. }
  261. }
  262. } else if v == "employee_no" {
  263. rep, _ := MongoTool1.Find("annual_report_base", map[string]interface{}{"company_id": tid}, map[string]interface{}{"_id": -1}, "", false, -1, -1)
  264. if len(*rep) > 0 && util.ObjToString((*rep)[0]["employee_no"]) != "" {
  265. i, err := strconv.Atoi(util.ObjToString((*rep)[0]["employee_no"]))
  266. if err != nil {
  267. saveM[v] = i
  268. }
  269. }
  270. }
  271. }
  272. saveM["comeintime"] = time.Now()
  273. saveM["updatetime"] = time.Now()
  274. saveM["sourcetype"] = 2
  275. //MysqlB.Insert("company_business_model", map[string]interface{}{"company_id": tmp["company_id"],
  276. // "business_model": util.IntAll(tmp["business_type"]), "company_field_code": "0101", "comeintime": time.Now()})
  277. MysqlB.Insert("company_baseinfo", saveM)
  278. }
  279. func taskBiddingData() {
  280. client := Es.GetEsConn()
  281. defer Es.DestoryEsConn(client)
  282. wg := &sync.WaitGroup{}
  283. //lock := &sync.Mutex{}
  284. esquery := `{"query":{"bool":{"must":[],"must_not":[{"constant_score":{"filter":{"missing":{"field":"bid_field"}}}}],"should":[{"match_all":{}}]}},"from":0,"size":10,"sort":[],"facets":{}}`
  285. q := es.NewRawStringQuery(esquery)
  286. util.Debug(Es.Count("bidding", q))
  287. countDocs := 0
  288. res, err := client.Scroll().Index("bidding").Query(q).Size(200).Do(context.TODO()) //查询一条获取游标
  289. if err == nil {
  290. taskInfoA(res, wg, &countDocs)
  291. scrollId := res.ScrollId
  292. for {
  293. searchResult, err := client.Scroll("1m").ScrollId(scrollId).Size(200).Do(context.TODO()) //查询
  294. if err != nil {
  295. util.Debug("Es Search Data Error:", err)
  296. break
  297. }
  298. taskInfoA(searchResult, wg, &countDocs)
  299. scrollId = searchResult.ScrollId
  300. }
  301. wg.Wait()
  302. util.Debug("over---", countDocs)
  303. _, _ = client.ClearScroll().ScrollId(scrollId).Do(context.TODO()) //清理游标
  304. } else {
  305. util.Debug(err)
  306. }
  307. c := make(chan bool, 1)
  308. <-c
  309. }
  310. func taskInfoA(searchResult *es.SearchResult, wg *sync.WaitGroup, countDocs *int) {
  311. for _, hit := range searchResult.Hits.Hits {
  312. //开始处理数据
  313. wg.Add(1)
  314. ChEs <- true
  315. go func(tmpHit *es.SearchHit) {
  316. defer func() {
  317. <-ChEs
  318. wg.Done()
  319. }()
  320. tmp := make(map[string]interface{})
  321. if json.Unmarshal(tmpHit.Source, &tmp) == nil {
  322. id := util.ObjToString(tmp["_id"])
  323. if tmp["entidlist"] != nil && tmp["purchasinglist"] != nil {
  324. for _, eid := range util.ObjArrToStringArr(tmp["entidlist"].([]interface{})) {
  325. if util.ObjToString(eid) != "" && util.ObjToString(eid) != "-" {
  326. saveMap := make(map[string]interface{})
  327. saveMap["infoid"] = id
  328. saveMap["company_id"] = eid
  329. for _, p1 := range tmp["purchasinglist"].([]interface{}) {
  330. p2 := p1.(map[string]interface{})
  331. if p2["itemname"] != nil {
  332. b := method3(id, util.ObjToString(p2["itemname"]))
  333. if b {
  334. saveMap["itemname"] = p2["itemname"]
  335. if p2["brandname"] != nil {
  336. saveMap["brand"] = p2["brandname"]
  337. }
  338. if p2["model"] != nil {
  339. saveMap["model"] = p2["model"]
  340. }
  341. MongoTool.Save("bidding_p_list_0907", saveMap)
  342. }
  343. }
  344. }
  345. }
  346. }
  347. }
  348. }
  349. }(hit)
  350. *countDocs += 1
  351. if *countDocs%20000 == 0 {
  352. util.Debug("Current:", *countDocs)
  353. }
  354. }
  355. }
  356. func method3(id, itemname string) bool {
  357. q := map[string]interface{}{"infoid": id, "itemname": itemname}
  358. info := MysqlB.FindOne("bid_purchasinginfo", q, "id", "")
  359. if info != nil && len(*info) > 0 {
  360. q1 := map[string]interface{}{"bid_purchasing_id": (*info)["id"]}
  361. info1 := MysqlB.FindOne("bid_purchasing_field_record", q1, "id", "")
  362. if info1 != nil && len(*info1) > 0 {
  363. return true
  364. }
  365. }
  366. return false
  367. }