123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- package main
- import (
- "context"
- "fmt"
- //"github.com/cron"
- "github.com/wangbin/jiebago"
- "go.mongodb.org/mongo-driver/bson"
- "go.mongodb.org/mongo-driver/mongo/options"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "log"
- "math/rand"
- "regexp"
- "strconv"
- "strings"
- "time"
- )
- var (
- AreaFiled = []string{"credit_no", "company_code"}
- WordsArr = []string{"研发", "研制", "开发", "生产", "制造", "制作", "加工", "种植"}
- seg jiebago.Segmenter
- regPre, _ = regexp.Compile(`^(.+[省|市|区|县|州])?(.+)`)
- seed = 188
- )
- // 企业基本信息
- var company_base = []string{"company_name", "company_code", "credit_no", "org_code", "legal_person", "company_status",
- "authority", "establish_date", "issue_date", "operation_startdate", "operation_enddate", "capital", "company_type",
- "company_status", "company_address", "business_scope", "cancel_date", "cancel_reason", "revoke_date", "revoke_reason",
- "legal_person_type", "real_capital", "en_name", "list_code", "tax_code", "use_flag",
- }
- var province_map = map[string]string{
- "BJ": "北京", "TJ": "天津", "SH": "上海", "CQ": "重庆", "HB": "河北", "SX": "山西", "NMG": "内蒙古", "LN": "辽宁", "JL": "吉林",
- "HLJ": "黑龙江", "JS": "江苏", "ZJ": "浙江", "AH": "安徽", "FJ": "福建", "JX": "江西", "SD": "山东", "HEN": "河南", "HUB": "湖北",
- "HUN": "湖南", "GD": "广东", "GX": "广西", "HAIN": "海南", "SC": "四川", "GZ": "贵州", "YN": "云南", "XZ": "西藏", "SAX": "陕西",
- "GS": "甘肃", "QH": "青海", "NX": "宁夏", "XJ": "新疆",
- }
- // company_shortname 企业简称
- func getStName(name string) string {
- regnames := regPre.FindStringSubmatch(name)
- lenth := len(regnames)
- if lenth < 1 {
- return ""
- }
- newstr := regnames[lenth-1]
- ch := seg.Cut(newstr, false)
- val := []string{}
- for word := range ch {
- val = append(val, word)
- }
- name2 := ""
- for _, v := range val {
- name2 = name2 + v
- if len([]rune(name2)) >= 4 {
- break
- }
- }
- return name2
- }
- func DealMemberNo(memberNo string) (no int) {
- memberNo = strings.Trim(memberNo, " ")
- if memberNo == "" {
- return no
- }
- if memberNo == "企业选择不公示" {
- return no
- }
- if strings.HasSuffix(memberNo, "人") {
- cou := strings.Split(memberNo, "人")[0]
- count, _ := strconv.Atoi(cou)
- return count
- }
- no, _ = strconv.Atoi(memberNo)
- return no
- }
- //updateStd 根据qyxy_std 表,更新 nseo_id
- func updateStd() {
- id := int64(GF.Env.Autoid) //起始 autoid
- //生成seoid
- //31395235740
- startSeoId := int64(GF.Env.Seoid) //起始 nseo_id
- rand.Seed(time.Now().UnixNano())
- count := int64(0)
- //var wg sync.WaitGroup
- sess := MongoTool.GetMgoConn()
- defer MongoTool.DestoryMongoConn(sess)
- ctx, _ := context.WithTimeout(context.Background(), 99999*time.Hour)
- coll := sess.M.C.Database("mixdata").Collection("qyxy_std")
- find := options.Find().SetBatchSize(200).SetSort(bson.D{bson.E{"autoid", 1}}).SetProjection(bson.M{"_id": 1, "autoid": 1, "establish_date": 1, "create_time_msql": 1})
- cur, err := coll.Find(ctx, bson.M{"autoid": bson.M{"$gt": id}}, find)
- if err != nil {
- log.Println("mgo find err", err.Error())
- return
- }
- for tmp := make(map[string]interface{}); cur.Next(ctx); {
- count++
- if cur != nil {
- cur.Decode(&tmp)
- id = util.Int64All(tmp["autoid"])
- ed, _ := tmp["establish_date"].(string)
- pre := ""
- if len(ed) == 10 {
- pre = ed[2:4] + ed[5:7] + ed[8:10]
- } else {
- cd, _ := tmp["create_time_msql"].(string)
- if len(cd) > 9 {
- pre = cd[2:4] + cd[5:7] + cd[8:10]
- }
- }
- startSeoId += int64(rand.Intn(seed) + 2)
- nseo_id := fmt.Sprintf("%s%d", pre, startSeoId)
- update := make(map[string]interface{})
- update["$set"] = bson.M{
- "nseo_id": nseo_id,
- "autoid": id,
- }
- where := map[string]interface{}{
- "_id": tmp["_id"],
- }
- go MongoTool.Update("qyxy_std", where, update, true, false)
- if count%10000 == 0 {
- log.Println("current,id", count, id, startSeoId, nseo_id)
- }
- tmp = make(map[string]interface{})
- } else {
- cur.Close(ctx)
- break
- }
- }
- log.Println("over ----;autoid: ", id, "seo_id:", startSeoId)
- time.Sleep(20 * time.Minute)
- }
|