package main import ( "context" "encoding/json" "fieldproject_common/config" "fmt" es "github.com/olivere/elastic/v7" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" "jygit.jydev.jianyu360.cn/data_processing/common_utils/log" "strconv" "sync" "time" ) var province_map = map[string]string{ "BJ": "北京", "TJ": "天津", "SH": "上海", "CQ": "重庆", "HB": "河北", "SX": "山西", "NMG": "内蒙古", "LN": "辽宁", "JL": "吉林", "HLJ": "黑龙江", "JS": "江苏", "ZJ": "浙江", "AH": "安徽", "FJ": "福建", "JX": "江西", "SD": "山东", "HEN": "河南", "HUB": "湖北", "HUN": "湖南", "GD": "广东", "GX": "广西", "HAIN": "海南", "SC": "四川", "GZ": "贵州", "YN": "云南", "XZ": "西藏", "SAX": "陕西", "GS": "甘肃", "QH": "青海", "NX": "宁夏", "XJ": "新疆", } // @Description 标讯数据 // @Author J 2022/9/7 11:42 func taskBiddingData1() { go SaveMethod() sess := MongoTool.GetMgoConn() defer MongoTool.DestoryMongoConn(sess) ch := make(chan bool, 10) wg := &sync.WaitGroup{} //q := map[string]interface{}{"_id": mongodb.StringTOBsonId("5a8d7f4840d2d9bbe8962002")} query := sess.DB(config.Conf.DB.Mongo.Dbname).C("bidding").Find(nil).Iter() count := 0 for tmp := make(map[string]interface{}); query.Next(tmp); count++ { if count%20000 == 0 { log.Info(fmt.Sprintf("current --- %d", count)) } ch <- true wg.Add(1) go func(tmp map[string]interface{}) { defer func() { <-ch wg.Done() }() if b := util.ObjToString(tmp["bid_field"]); b != "" { taskA(tmp) } }(tmp) tmp = make(map[string]interface{}) } wg.Wait() log.Info(fmt.Sprintf("over --- %d", count)) } // @Description 企业数据 // @Author J 2022/8/23 09:08 func taskCompanyData() { sess := MongoTool.GetMgoConn() defer MongoTool.DestoryMongoConn(sess) ch := make(chan bool, 10) wg := &sync.WaitGroup{} //log.Info(fmt.Sprintf("%d", MongoTool2.Count("zktest_mysql_company_info", nil))) var p1 []map[string]interface{} p1 = append(p1, map[string]interface{}{"$group": map[string]interface{}{"_id": "$company_id"}}) query := sess.DB(config.Conf.DB.Mongo.Dbname).C("bidding_p_list_0907").Pipe(p1).Iter() //q := map[string]interface{}{"_id": mongodb.StringTOBsonId("61efb24b70f4a1409599badb")} //query := sess.DB(config.Conf.DB.Mongo2.Dbname).C("zktest_mysql_company_info").Find(nil).Iter() count := 0 for tmp := make(map[string]interface{}); query.Next(tmp); count++ { if count%5000 == 0 { log.Info(fmt.Sprintf("current --- %d", count)) } ch <- true wg.Add(1) go func(tmp map[string]interface{}) { defer func() { <-ch wg.Done() }() //taskC(tmp) taskB(util.ObjToString(tmp["_id"])) //info, _ := MongoTool.FindOne("qyxy_std", map[string]interface{}{"_id": util.ObjToString(tmp["company_id"])}) //if len(*info) > 0 { // savePool <- *info //} }(tmp) tmp = make(map[string]interface{}) } wg.Wait() log.Info(fmt.Sprintf("over --- %d", count)) } // @Description 标讯数据企业与产品 // @Author J 2022/8/24 14:37 func taskA(tmp map[string]interface{}) { if tmp["entidlist"] != nil { saveMap := make(map[string]interface{}) for _, eid := range tmp["entidlist"].([]interface{}) { if util.ObjToString(eid) != "" && util.ObjToString(eid) != "-" { saveMap["company_id"] = eid if tmp["purchasinglist"] != nil { for _, p1 := range tmp["purchasinglist"].([]interface{}) { p2 := p1.(map[string]interface{}) if p2["itemname"] != nil { saveMap["itemname"] = p2["itemname"] if p2["brandname"] != nil { saveMap["brand"] = p2["brandname"] } if p2["model"] != nil { saveMap["model"] = p2["model"] } MongoTool.Save("bidding_p_list_0907", saveMap) //savePool <- saveMap } } } } } } } var company_field = []string{"company_id", "company_name", "company_code", "credit_no", "org_code", "tax_code", "establish_date", "legal_person", "legal_person_caption", "company_status", "company_type", "authority", "issue_date", "operation_startdate", "operation_enddate", "capital", "company_address", "business_scope", "cancel_date", "cancel_reason", "revoke_date", "revoke_reason", "legal_person_type", "real_capital", "en_name", "list_code", "area_code", "employee_no", "company_phone", "company_email", "website", "sourcetype"} // @Description 医疗企业 // @Author J 2022/8/24 14:37 func taskB(tid string) { saveM := make(map[string]interface{}) info, _ := MongoTool1.FindOneByField("company_base", map[string]interface{}{"company_id": tid}, "") if len(*info) == 0 { info, _ = MongoTool1.FindOneByField("special_enterprise", map[string]interface{}{"company_id": tid}, "") } if len(*info) > 0 { for _, v := range company_field { if (*info)[v] != nil { saveM[v] = (*info)[v] } if v == "area_code" { std, _ := MongoTool.FindOneByField("qyxy_std", map[string]interface{}{"_id": tid}, map[string]interface{}{"company_area": 1, "company_city": 1, "company_district": 1, "website_url": 1, "capital": 1}) if len(*std) > 0 { m := make(map[string]interface{}) for k, v := range map[string]string{"company_district": "district", "company_city": "city", "company_area": "area"} { if v1 := util.ObjToString((*std)[k]); v1 != "" { m = map[string]interface{}{v: v1} info := MysqlB.FindOne("code_area", m, "", "") if info != nil && len(*info) > 0 { saveM["area_code"] = (*info)["code"] break } } } if saveM["area_code"] == nil { saveM["area_code"] = "000000" } if (*std)["website_url"] != nil && len(util.ObjToString((*std)["website_url"])) <= 255 { saveM["website"] = (*std)["website_url"] } if (*std)["company_phone"] != nil { saveM["company_phone"] = (*std)["company_phone"] } if (*std)["company_email"] != nil { saveM["company_email"] = (*std)["company_email"] } if (*std)["capital"] != nil { saveM["capital"] = util.ObjToString((*std)["capital"]) } } } else if v == "employee_no" { rep, _ := MongoTool1.Find("annual_report_base", map[string]interface{}{"company_id": tid}, map[string]interface{}{"_id": -1}, "", false, -1, -1) if len(*rep) > 0 && util.ObjToString((*rep)[0]["employee_no"]) != "" { i, err := strconv.Atoi(util.ObjToString((*rep)[0]["employee_no"])) if err != nil { saveM[v] = i } } } //else if v == "website" { // info, _ := MongoTool.Find("annual_report_website", map[string]interface{}{"company_id": tid}, map[string]interface{}{"_id": -1}, "", false, -1, -1) // if len(*info) > 0 && util.ObjToString((*info)[0]["website_url"]) != "" { // saveM[v] = util.ObjToString((*info)[0]["website_url"]) // } //} } saveM["comeintime"] = time.Now() saveM["updatetime"] = time.Now() saveM["sourcetype"] = 3 MysqlB.Insert("company_baseinfo", saveM) //savePool <- saveM } else { util.Debug("company_id err", tid) } } func taskB_1(tid string) { saveM := make(map[string]interface{}) info, _ := MongoTool1.FindOneByField("company_base", map[string]interface{}{"company_id": tid}, "") if len(*info) == 0 { info, _ = MongoTool1.FindOneByField("special_enterprise", map[string]interface{}{"company_id": tid}, "") } if len(*info) > 0 { for _, v := range company_field { if (*info)[v] != nil { saveM[v] = (*info)[v] } if v == "area_code" { if a := util.ObjToString((*info)["province_short"]); a != "" { m := map[string]interface{}{"area": province_map[a]} info := MysqlB.FindOne("code_area", m, "", "") if info != nil && len(*info) > 0 { saveM["area_code"] = (*info)["code"] } } } else if (*info)["capital"] != nil { text := util.ObjToString((*info)["capital"]) capital := ObjToMoney(text) capital = capital / 10000 if capital != 0 { saveM["capital"] = fmt.Sprint(capital) } } } saveM["comeintime"] = time.Now() saveM["updatetime"] = time.Now() saveM["sourcetype"] = 1 MysqlB.Insert("company_baseinfo", saveM) //savePool <- saveM } else { util.Debug("company_id err", tid) } } func taskC(tmp map[string]interface{}) { tid := util.ObjToString(tmp["company_id"]) saveM := make(map[string]interface{}) for _, v := range company_field { if tmp[v] != nil { saveM[v] = tmp[v] } if v == "area_code" { std, _ := MongoTool.FindOneByField("qyxy_std", map[string]interface{}{"_id": tid}, map[string]interface{}{"company_area": 1, "company_city": 1, "company_district": 1, "website_url": 1, "company_phone": 1, "company_email": 1, "capital": 1}) if len(*std) > 0 { m := make(map[string]interface{}) for k, v := range map[string]string{"company_district": "district", "company_city": "city", "company_area": "area"} { if v1 := util.ObjToString((*std)[k]); v1 != "" { m = map[string]interface{}{v: v1} info := MysqlB.FindOne("code_area", m, "", "") if info != nil && len(*info) > 0 { saveM["area_code"] = (*info)["code"] break } } } if saveM["area_code"] == nil { saveM["area_code"] = "000000" } if (*std)["website_url"] != nil && len(util.ObjToString((*std)["website_url"])) <= 255 { saveM["website"] = (*std)["website_url"] } if (*std)["company_phone"] != nil { saveM["company_phone"] = (*std)["company_phone"] } if (*std)["company_email"] != nil { saveM["company_email"] = (*std)["company_email"] } if (*std)["capital"] != nil { saveM["capital"] = fmt.Sprint((*std)["capital"]) } } } else if v == "employee_no" { rep, _ := MongoTool1.Find("annual_report_base", map[string]interface{}{"company_id": tid}, map[string]interface{}{"_id": -1}, "", false, -1, -1) if len(*rep) > 0 && util.ObjToString((*rep)[0]["employee_no"]) != "" { i, err := strconv.Atoi(util.ObjToString((*rep)[0]["employee_no"])) if err != nil { saveM[v] = i } } } } saveM["comeintime"] = time.Now() saveM["updatetime"] = time.Now() saveM["sourcetype"] = 2 //MysqlB.Insert("company_business_model", map[string]interface{}{"company_id": tmp["company_id"], // "business_model": util.IntAll(tmp["business_type"]), "company_field_code": "0101", "comeintime": time.Now()}) MysqlB.Insert("company_baseinfo", saveM) } func taskBiddingData() { client := Es.GetEsConn() defer Es.DestoryEsConn(client) wg := &sync.WaitGroup{} //lock := &sync.Mutex{} esquery := `{"query":{"bool":{"must":[],"must_not":[{"constant_score":{"filter":{"missing":{"field":"bid_field"}}}}],"should":[{"match_all":{}}]}},"from":0,"size":10,"sort":[],"facets":{}}` q := es.NewRawStringQuery(esquery) util.Debug(Es.Count("bidding", q)) countDocs := 0 res, err := client.Scroll().Index("bidding").Query(q).Size(200).Do(context.TODO()) //查询一条获取游标 if err == nil { taskInfoA(res, wg, &countDocs) scrollId := res.ScrollId for { searchResult, err := client.Scroll("1m").ScrollId(scrollId).Size(200).Do(context.TODO()) //查询 if err != nil { util.Debug("Es Search Data Error:", err) break } taskInfoA(searchResult, wg, &countDocs) scrollId = searchResult.ScrollId } wg.Wait() util.Debug("over---", countDocs) _, _ = client.ClearScroll().ScrollId(scrollId).Do(context.TODO()) //清理游标 } else { util.Debug(err) } c := make(chan bool, 1) <-c } func taskInfoA(searchResult *es.SearchResult, wg *sync.WaitGroup, countDocs *int) { for _, hit := range searchResult.Hits.Hits { //开始处理数据 wg.Add(1) ChEs <- true go func(tmpHit *es.SearchHit) { defer func() { <-ChEs wg.Done() }() tmp := make(map[string]interface{}) if json.Unmarshal(tmpHit.Source, &tmp) == nil { id := util.ObjToString(tmp["_id"]) if tmp["entidlist"] != nil && tmp["purchasinglist"] != nil { for _, eid := range util.ObjArrToStringArr(tmp["entidlist"].([]interface{})) { if util.ObjToString(eid) != "" && util.ObjToString(eid) != "-" { saveMap := make(map[string]interface{}) saveMap["infoid"] = id saveMap["company_id"] = eid for _, p1 := range tmp["purchasinglist"].([]interface{}) { p2 := p1.(map[string]interface{}) if p2["itemname"] != nil { b := method3(id, util.ObjToString(p2["itemname"])) if b { saveMap["itemname"] = p2["itemname"] if p2["brandname"] != nil { saveMap["brand"] = p2["brandname"] } if p2["model"] != nil { saveMap["model"] = p2["model"] } MongoTool.Save("bidding_p_list_0907", saveMap) } } } } } } } }(hit) *countDocs += 1 if *countDocs%20000 == 0 { util.Debug("Current:", *countDocs) } } } func method3(id, itemname string) bool { q := map[string]interface{}{"infoid": id, "itemname": itemname} info := MysqlB.FindOne("bid_purchasinginfo", q, "id", "") if info != nil && len(*info) > 0 { q1 := map[string]interface{}{"bid_purchasing_id": (*info)["id"]} info1 := MysqlB.FindOne("bid_purchasing_field_record", q1, "id", "") if info1 != nil && len(*info1) > 0 { return true } } return false }