package main import ( "context" "fmt" util "jygit.jydev.jianyu360.cn/data_processing/common_utils" //"github.com/cron" "github.com/wangbin/jiebago" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo/options" "log" "math/rand" "regexp" "strconv" "strings" "time" ) var ( AreaFiled = []string{"credit_no", "company_code"} WordsArr = []string{"研发", "研制", "开发", "生产", "制造", "制作", "加工", "种植"} seg jiebago.Segmenter regPre, _ = regexp.Compile(`^(.+[省|市|区|县|州])?(.+)`) seed = 188 //startSeoId int64 ) // 企业基本信息 var company_base = []string{"company_name", "company_code", "credit_no", "org_code", "legal_person", "company_status", "authority", "establish_date", "issue_date", "operation_startdate", "operation_enddate", "capital", "company_type", "company_status", "company_address", "business_scope", "cancel_date", "cancel_reason", "revoke_date", "revoke_reason", "legal_person_type", "real_capital", "en_name", "list_code", "tax_code", "use_flag", } var province_map = map[string]string{ "BJ": "北京", "TJ": "天津", "SH": "上海", "CQ": "重庆", "HB": "河北", "SX": "山西", "NMG": "内蒙古", "LN": "辽宁", "JL": "吉林", "HLJ": "黑龙江", "JS": "江苏", "ZJ": "浙江", "AH": "安徽", "FJ": "福建", "JX": "江西", "SD": "山东", "HEN": "河南", "HUB": "湖北", "HUN": "湖南", "GD": "广东", "GX": "广西", "HAIN": "海南", "SC": "四川", "GZ": "贵州", "YN": "云南", "XZ": "西藏", "SAX": "陕西", "GS": "甘肃", "QH": "青海", "NX": "宁夏", "XJ": "新疆", } // company_shortname 企业简称 func getStName(name string) string { regnames := regPre.FindStringSubmatch(name) lenth := len(regnames) if lenth < 1 { return "" } newstr := regnames[lenth-1] ch := seg.Cut(newstr, false) val := []string{} for word := range ch { val = append(val, word) } name2 := "" for _, v := range val { name2 = name2 + v if len([]rune(name2)) >= 4 { break } } return name2 } func DealMemberNo(memberNo string) (no int) { memberNo = strings.Trim(memberNo, " ") if memberNo == "" { return no } if memberNo == "企业选择不公示" { return no } if strings.HasSuffix(memberNo, "人") { cou := strings.Split(memberNo, "人")[0] count, _ := strconv.Atoi(cou) return count } no, _ = strconv.Atoi(memberNo) return no } // updateStd 根据qyxy_std 表,更新 nseo_id func updateStd() { db := DB() defer db.Close() // 创建数据库操作对象 badgerDB := &bdb{db} var id int64 var startSeoId int64 //获取 seo_id seoIdOld := badgerDB.Get("seoid") if seoIdOld == "" { startSeoId = int64(GF.Env.Seoid) //起始 nseo_id badgerDB.Set("seoid", strconv.Itoa(GF.Env.Seoid)) } else { seoid, _ := strconv.Atoi(seoIdOld) startSeoId = int64(seoid) } log.Println("seoid--------", startSeoId) if startSeoId == 0 { log.Fatalln("seoid 为 0,请检查配置") } rand.Seed(time.Now().UnixNano()) count := int64(0) //var wg sync.WaitGroup sess := MongoTool.GetMgoConn() defer MongoTool.DestoryMongoConn(sess) ctx, _ := context.WithTimeout(context.Background(), 99999*time.Hour) coll := sess.M.C.Database("mixdata").Collection("qyxy_std") find := options.Find().SetBatchSize(200).SetSort(bson.D{bson.E{"updatetime", 1}}).SetProjection(bson.M{"_id": 1, "autoid": 1, "establish_date": 1, "create_time_msql": 1, "nseo_id": 1}) cur, err := coll.Find(ctx, bson.M{"updatetime": bson.M{"$gt": startTime}}, find) if err != nil { log.Println("mgo find err", err.Error()) return } for tmp := make(map[string]interface{}); cur.Next(ctx); { count++ if cur != nil { cur.Decode(&tmp) id = util.Int64All(tmp["autoid"]) //存在就不处理 if seoid, ok := tmp["nseo_id"]; ok && seoid != nil { continue } ed, _ := tmp["establish_date"].(string) pre := "" if len(ed) == 10 { pre = ed[2:4] + ed[5:7] + ed[8:10] } else { cd, _ := tmp["create_time_msql"].(string) if len(cd) > 9 { pre = cd[2:4] + cd[5:7] + cd[8:10] } } startSeoId += int64(rand.Intn(seed) + 2) nseo_id := fmt.Sprintf("%s%d", pre, startSeoId) update := make(map[string]interface{}) update["$set"] = bson.M{ "nseo_id": nseo_id, } where := map[string]interface{}{ "_id": tmp["_id"], } res := MongoTool.Update("qyxy_std", where, update, true, false) if !res { log.Println(tmp["_id"], "数据更新失败,nseo_id", nseo_id) } if count%10000 == 0 { log.Println("current,autoid,startSeoId,nseo_id", count, id, startSeoId, nseo_id) } tmp = make(map[string]interface{}) } else { cur.Close(ctx) break } } badgerDB.Set("autoid", strconv.Itoa(int(id))) badgerDB.Set("seoid", strconv.Itoa(int(startSeoId))) log.Println("over ----;", "autoid", id, "seoid:", startSeoId) }