123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531 |
- // standarwinner
- package main
- import (
- "dbutil/mongo"
- "dbutil/redis"
- "encoding/json"
- "log"
- qu "qfw/util"
- "strings"
- "time"
- "unicode/utf8"
- "go.mongodb.org/mongo-driver/bson/primitive"
- "gopkg.in/mgo.v2/bson"
- )
- //增量处理
- func winnerStandarData(db string, query map[string]interface{}) {
- defer qu.Catch()
- sess := MongoFrom.GetMgoConn()
- defer MongoFrom.Close()
- it := sess.DB(db).C(extractcoll).Find(query).Select(bson.M{"repeat": 1, "winner": 1, "winnertel": 1, "winnerperson": 1, "topscopeclass": 1, "package": 1}).Sort("_id").Iter()
- index := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
- if qu.IntAll(tmp["repeat"]) > 0 { //重复数据跳过
- continue
- }
- winner := qu.ObjToString(tmp["winner"])
- if utf8.RuneCountInString(winner) < 5 {
- continue
- }
- infoid := mongo.BsonTOStringId(tmp["_id"])
- topscopeclass, _ := tmp["topscopeclass"].(primitive.A)
- entid, _ := redis.GetRedisStr("winner", winnerbd, winner)
- winnerperson := qu.ObjToString(tmp["winnerperson"])
- winnertel := qu.ObjToString(tmp["winnertel"])
- if entid == "" { //新增标准库
- savetoerr := true
- if winnerperson != "" || winnertel != "" {
- v := map[string]interface{}{
- "contact_person": winnerperson,
- "phone": winnertel,
- "topscopeclass": comRepTopscopeclass(topscopeclass),
- "infoid": infoid,
- }
- data := comHisMegerNewData(winner, "winner", []map[string]interface{}{v})
- if data != nil {
- province, city, district := "", "", ""
- province = qu.ObjToString(data["province"])
- if province == "" { //省份为空,buyer优先提取区域信息再company_address
- province, city, district = GetProvinceCityDistrict([]string{winner}) //先buyer
- if province == "" { //再address
- if address := qu.ObjToString(data["company_address"]); address != "" {
- province, city, district = GetProvinceCityDistrict([]string{address})
- }
- }
- data["province"] = province
- data["city"] = city
- data["district"] = district
- }
- //提取固话和手机号
- contactArr := []interface{}{}
- contactArr = append(contactArr, v)
- latestFixedPhone, latestMobilePhone, timesFixedPhone, timesMobilePhone := getPhone(contactArr)
- data["latestfixedphone"] = latestFixedPhone
- data["latestmobilephone"] = latestMobilePhone
- data["fixedphone"] = timesFixedPhone
- data["mobilephone"] = timesMobilePhone
- data["institute_type"] = "企业"
- _id := MongoTo.Save(winnerent, data)
- redis.PutRedis("winner", winnerbd, winner, _id.(primitive.ObjectID).Hex(), -1)
- savetoerr = false
- }
- }
- if savetoerr {
- t := MongoTo.FindOne(winnererr, map[string]interface{}{"name": winner})
- if len(t) < 1 {
- province, city, district := GetProvinceCityDistrict([]string{winner})
- MongoTo.Save(winnererr, map[string]interface{}{
- "name": winner,
- "topscopeclass": comRepTopscopeclass(topscopeclass),
- "check": comMarkdata(winner, "winner"),
- "updatetime": time.Now().Unix(),
- "province": province,
- "city": city,
- "district": district,
- })
- }
- }
- } else { //更新标准库
- if winnerperson != "" && winnertel != "" {
- v := map[string]interface{}{
- "contact_person": winnerperson,
- "phone": winnertel,
- "topscopeclass": comRepTopscopeclass(topscopeclass),
- "infoid": infoid,
- }
- data := winMegerIndustry(entid, v)
- MongoTo.UpdateById(winnerent, entid,
- map[string]interface{}{
- "$set": data,
- //"$push": map[string]interface{}{"contact": v},
- },
- )
- }
- }
- //分包处理
- if packages, ok := tmp["package"].(map[string]interface{}); ok {
- entpacks := getWinnerPacks(infoid, packages, comRepTopscopeclass(topscopeclass))
- for name, contact := range entpacks {
- entid, _ := redis.GetRedisStr("winner", winnerbd, name)
- if entid == "" {
- data := comHisMegerNewData(winner, "winner", []map[string]interface{}{contact})
- if data != nil {
- _id := MongoTo.Save(winnerent, data)
- redis.PutRedis("winner", winnerbd, winner, _id.(primitive.ObjectID).Hex(), -1)
- }
- } else {
- data := winMegerIndustry(entid, contact)
- MongoTo.UpdateById(winnerent, entid,
- map[string]interface{}{
- "$set": data,
- "$push": map[string]interface{}{"contact": contact},
- },
- )
- }
- }
- }
- tmp = map[string]interface{}{}
- if index%100 == 0 {
- log.Println("winner index", index)
- }
- }
- log.Println("winner ok index", index)
- }
- //历史数据处理
- func historywinner(db, fromcoll string) {
- defer qu.Catch()
- log.Println("history start")
- sess := MongoFrom.GetMgoConn()
- defer MongoFrom.Close()
- it := sess.DB(db).C(fromcoll).Find(map[string]interface{}{}).Select(bson.M{"repeat": 1, "winner": 1, "winnertel": 1, "winnerperson": 1, "topscopeclass": 1}).Sort("_id").Iter()
- index := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
- if qu.IntAll(tmp["repeat"]) > 0 { //重复数据跳过
- continue
- }
- _id := mongo.BsonTOStringId(tmp["_id"])
- winchanbool <- true
- go func(tmp map[string]interface{}) {
- defer func() {
- <-winchanbool
- }()
- winner := qu.ObjToString(tmp["winner"])
- topscopeclass, _ := tmp["topscopeclass"].(primitive.A)
- if winner != "" && utf8.RuneCountInString(winner) > 4 {
- winnerperson := qu.ObjToString(tmp["winnerperson"])
- winnertel := qu.ObjToString(tmp["winnertel"])
- b, _ := redis.ExistRedis("winner", winnerbd, winner)
- if b {
- if winnerperson != "" || winnertel != "" {
- strs, _ := redis.GetRedisStr("winner", winnerbd, winner)
- ps := []interface{}{}
- err := json.Unmarshal([]byte(strs), &ps)
- if err == nil {
- v := map[string]interface{}{
- "contact_person": winnerperson,
- "phone": winnertel,
- "topscopeclass": comRepTopscopeclass(topscopeclass),
- "infoid": _id,
- }
- ps = append(ps, v)
- bs, _ := json.Marshal(ps)
- redis.PutRedis("winner", winnerbd, winner, bs, -1)
- //log.Println(_id, index, winner)
- } else {
- log.Println("jsonErr", err)
- }
- }
- } else {
- val := []map[string]interface{}{}
- if winnerperson != "" || winnertel != "" {
- tmp := map[string]interface{}{
- "contact_person": winnerperson,
- "phone": winnertel,
- "topscopeclass": comRepTopscopeclass(topscopeclass),
- "infoid": _id,
- }
- val = append(val, tmp)
- }
- bs, _ := json.Marshal(val)
- redis.PutRedis("winner", winnerbd, winner, bs, -1)
- MongoTo.Save(winnererr, map[string]interface{}{
- "name": winner,
- "topscopeclass": comRepTopscopeclass(topscopeclass),
- "updatetime": time.Now().Unix(),
- })
- }
- }
- }(tmp)
- tmp = map[string]interface{}{}
- if index%10000 == 0 {
- log.Println("index", index, _id)
- }
- }
- log.Println("history ok index", index)
- winStandarHistory(qu.ObjToString(sysconfig["mgotodb"]))
- }
- //查询winnererr标准化历史数据
- func winStandarHistory(db string) {
- defer qu.Catch()
- log.Println("开始标准化数据--winner", db)
- sessto := MongoTo.GetMgoConn()
- defer MongoTo.Close()
- it := sessto.DB(db).C(winnererr).Find(map[string]interface{}{}).Iter()
- index := 0
- entnum := 0
- for tmp := make(map[string]interface{}); it.Next(&tmp); index++ {
- err_id := mongo.BsonTOStringId(tmp["_id"])
- name := qu.ObjToString(tmp["name"])
- winchanbool <- true
- go func(tmp map[string]interface{}) {
- defer func() {
- <-winchanbool
- }()
- strs, err := redis.GetRedisStr("winner", winnerbd, name)
- if err != nil {
- return
- }
- ps := []map[string]interface{}{}
- err = json.Unmarshal([]byte(strs), &ps)
- if err == nil {
- data := comHisMegerNewData(name, "winner", ps)
- if data != nil {
- MongoTo.Save(winnerent, data)
- MongoTo.DeleteById(winnererr, err_id)
- entnum++
- } else { //未查询到企业,打标记并存表
- num := comMarkdata(name, "winner")
- tmp["check"] = num
- MongoTo.UpdateById(winnererr, err_id, map[string]interface{}{"$set": map[string]interface{}{"check": num}})
- }
- } else {
- log.Println("jsonErr", name, err)
- }
- }(tmp)
- if index%1000 == 0 {
- log.Println("标准化历史数据--winner", index, err_id, entnum)
- }
- tmp = map[string]interface{}{}
- }
- log.Println("标准化数据完成--winner", index, entnum)
- }
- //企业数据整合(已有标注信息)
- func winMegerIndustry(id string, ps map[string]interface{}) map[string]interface{} {
- tmp := MongoEnt.FindById(winnerent, id, bson.M{"industry": 1, "contact": 1})
- if len(tmp) < 1 {
- return nil
- }
- data := map[string]interface{}{}
- industry,ok := tmp["industry"].(primitive.A)
- if ok{
- tmpindustry := map[string]bool{}
- for _, v := range industry {
- tt := qu.ObjToString(v)
- tmpindustry[tt] = true
- }
- if topscopeclass, ok := ps["topscopeclass"].([]interface{}); ok {
- for _, v := range topscopeclass {
- tt := qu.ObjToString(v)
- tmpindustry[tt] = true
- }
- }
- newindustry := []interface{}{}
- for k, _ := range tmpindustry {
- newindustry = append(newindustry, k)
- }
- data["industry"] = newindustry
- }
- data["updatetime"] = time.Now().Unix()
- //contact
- contact := tmp["contact"].(primitive.A)
- contact = append(contact, ps)
- //bid_contact
- bid_contacts,contacts := bid_contact(contact)
- if len(bid_contacts) > 0 {
- data["bid_contact"] = bid_contacts
- }
- //sort 200
- contact = contacts
- data["contact"] = contact
- //提取固话和手机号
- latestFixedPhone, latestMobilePhone, timesFixedPhone, timesMobilePhone := getPhone(contact)
- data["latestfixedphone"] = latestFixedPhone
- data["latestmobilephone"] = latestMobilePhone
- data["fixedphone"] = timesFixedPhone
- data["mobilephone"] = timesMobilePhone
- return data
- }
- //中标单位分包提取联系方式
- func getWinnerPacks(infoid string, packs map[string]interface{}, topscopeclass []interface{}) map[string]map[string]interface{} {
- entmappacks := map[string]map[string]interface{}{}
- for _, v := range packs {
- if tmp, ok := v.(map[string]interface{}); ok {
- winner := qu.ObjToString(tmp["winner"])
- if utf8.RuneCountInString(winner) < 5 {
- continue
- }
- winnerperson := qu.ObjToString(tmp["winnerperson"])
- winnertel := qu.ObjToString(tmp["winnertel"])
- if winnerperson != "" || winnertel != "" {
- p := map[string]interface{}{
- "contact_person": winnerperson,
- "phone": winnertel,
- "topscopeclass": topscopeclass,
- "infoid": infoid,
- "extfrom": "package",
- }
- entmappacks[winner] = p
- }
- }
- }
- return entmappacks
- }
- //数据整合
- func comHisMegerNewData(name, datatype string, ps []map[string]interface{}) map[string]interface{} {
- tmp := MongoEnt.FindOne("qyxy", map[string]interface{}{"company_name": name})
- if len(tmp) < 1 {
- return nil
- }
- data := map[string]interface{}{
- "history_name": "",
- "credit_no": "",
- "company_email": "",
- "area_code": qu.ObjToString(tmp["area_code"]),
- "province": qu.ObjToString(tmp["province"]),
- "city": "",
- "district": "",
- "company_type": qu.ObjToString(tmp["company_type"]),
- "legal_person": qu.ObjToString(tmp["legal_person"]),
- "company_phone": "",
- "company_address": qu.ObjToString(tmp["company_address"]),
- "business_scope": qu.ObjToString(tmp["business_scope"]),
- "wechat_accounts": []interface{}{},
- "website": "",
- "contact": ps,
- "comeintime": time.Now().Unix(),
- "updatetime": time.Now().Unix(),
- }
- //统一信用代码
- credit_no := strings.TrimSpace(qu.ObjToString(tmp["credit_no"]))
- if credit_no == "" {
- credit_no = strings.TrimSpace(qu.ObjToString(tmp["company_code"]))
- }
- if credit_no != "" {
- data["credit_no"] = credit_no
- if len(credit_no) > 8 {
- dataNo := credit_no[2:8]
- if Addrs[dataNo] != nil {
- if v, ok := Addrs[dataNo].(map[string]interface{}); ok {
- if data["province"] == "" {
- data["province"] = v["province"]
- }
- data["city"] = v["city"]
- data["district"] = v["district"]
- }
- }
- }
- } else {
- return nil
- }
- //从年报中取网址、邮箱、打电话
- annual_reports := tmp["annual_reports"]
- if annual_reports != nil {
- report_websitesArr := []string{}
- if anreports, ok := annual_reports.(primitive.A); ok {
- for _, anreportmp := range anreports {
- if anreport, ok := anreportmp.(map[string]interface{}); ok {
- if websites, ok := anreport["report_websites"].(primitive.A); ok {
- for _, website := range websites {
- if rv, ok := website.(map[string]interface{}); ok {
- web := qu.ObjToString(rv["website_url"])
- if web != "" {
- report_websitesArr = append(report_websitesArr, web)
- }
- }
- }
- }
- company_email := qu.ObjToString(anreport["company_email"])
- if company_email != "" {
- data["company_email"] = company_email
- }
- company_phone := qu.ObjToString(anreport["company_phone"])
- if company_phone != "" {
- data["company_phone"] = company_phone
- }
- }
- }
- }
- if len(report_websitesArr) > 0 {
- data["website"] = strings.Join(report_websitesArr, ";")
- }
- }
- if datatype == "winner" {
- data["company_name"] = name
- data["partners"] = tmp["partners"]
- establish_date := tmp["establish_date"]
- if establish_date != nil {
- data["establish_date"] = qu.Int64All(establish_date) / 1000
- }
- capital := tmp["capital"]
- if capital != nil {
- data["capital"] = ObjToMoney([]interface{}{capital, ""})[0]
- }
- industry := make([]string, 0)
- tmpindustry := map[string]bool{}
- for _, p := range ps {
- if ts, ok := (p["topscopeclass"]).([]interface{}); ok {
- for _, v := range ts {
- tt := qu.ObjToString(v)
- tmpindustry[tt] = true
- }
- }
- }
- for k, _ := range tmpindustry {
- industry = append(industry, k)
- }
- data["industry"] = industry
- } else if datatype == "buyer" {
- data["buyer_name"] = name
- tmpbuyerclass := map[string]bool{}
- for _, p := range ps {
- tmpbuyerclass[qu.ObjToString(p["buyerclass"])] = true
- }
- buyerclass := []interface{}{}
- for k, _ := range tmpbuyerclass {
- buyerclass = append(buyerclass, k)
- }
- data["buyerclass"] = buyerclass
- data["ranks"] = ""
- data["type"] = ""
- data["address"] = ""
- } else {
- data["agency_name"] = name
- data["ranks"] = ""
- data["type"] = ""
- data["address"] = ""
- }
- return data
- }
- //根据规则数据打标记
- func comMarkdata(name, datatype string) int {
- tag := 0 //默认错误
- switch datatype {
- case "winner":
- for _, v := range WinnerRegOk {
- isok := v.MatchString(name)
- if isok { //匹配ok完,匹配err
- errflag := true
- for _, vRegErr := range WinnerRegErr {
- err := vRegErr.MatchString(name)
- if err {
- errflag = true
- break
- }
- }
- if errflag {
- tag = 1
- }
- }
- }
- case "buyer":
- for _, v := range BuyerRegOk {
- isok := v.MatchString(name)
- if isok { //匹配ok完,匹配err
- errflag := true
- for _, vRegErr := range BuyerRegErr {
- err := vRegErr.MatchString(name)
- if err {
- errflag = true
- break
- }
- }
- if errflag {
- tag = 1
- }
- }
- }
- case "agency":
- for _, v := range AgencyRegOk {
- isok := v.MatchString(name)
- if isok { //匹配ok完,匹配err
- errflag := true
- for _, vRegErr := range AgencyRegErr {
- err := vRegErr.MatchString(name)
- if err {
- errflag = true
- break
- }
- }
- if errflag {
- tag = 1
- }
- }
- }
- default:
- }
- return tag
- }
- //过滤行业冗余字符
- func comRepTopscopeclass(tops []interface{}) []interface{} {
- data := []interface{}{}
- for _, v := range tops {
- tt := qu.ObjToString(v)
- if len(tt) > 1 {
- data = append(data, tt[:len(tt)-1])
- }
- }
- return data
- }
|