package main import ( "context" "encoding/json" "esindex/config" "fmt" "github.com/olivere/elastic/v7" "go.uber.org/zap" "jygit.jydev.jianyu360.cn/data_processing/common_utils/log" "regexp" "strconv" "strings" "testing" "time" ) func TestMatchService(t *testing.T) { data := `{ "_id" : "6422d91e779467cff1a84885", "area" : "全国", "city" : "", "extracttype" : 0, "s_sha" : "d7cc66ac91dc6551991df0a37331b628de4c70973c6844f1ee6ef1c2d4e29e95", "jsondata" : { "area_city_district" : "福建", "buyer" : "莆田市第一医院", "item" : " 货物/医药品/医用材料/其他医用材料", "agency" : "福建省荔卫药械招标服务有限公司" }, "channel" : "地方公告" }` var obj map[string]interface{} if err := json.Unmarshal([]byte(data), &obj); err != nil { panic(err) } objectType := MatchService(obj) fmt.Println("objectType=>", objectType) } func TestGetMappting(t *testing.T) { client, _ := elastic.NewClient( elastic.SetURL(config.Conf.DB.Es.Addr), elastic.SetBasicAuth(config.Conf.DB.Es.Username, config.Conf.DB.Es.Password), elastic.SetSniff(false), ) index := config.Conf.DB.Es.IndexB // 获取 Elasticsearch 索引的 mapping 信息 mapping, err := client.GetMapping().Index(index).Do(context.Background()) if err != nil { fmt.Println("Error getting Elasticsearch mapping:", err) return } indexName, _ := GetIndexName(client, index) properties := mapping[indexName].(map[string]interface{})["mappings"].(map[string]interface{})["properties"].(map[string]interface{}) var errField = make([]string, 0) var okField = make([]string, 0) var analyzerMap = make(map[string]string) // 分词信息 var esMap = make(map[string]string) //存储es 字段类型 // for field, ftype := range BiddingField { eftypeMap, _ := properties[field].(map[string]interface{}) var etype string var analyzer string if fftype, ok := eftypeMap["type"]; ok { etype = fftype.(string) esMap[field] = etype } if ffanalyzer, ok := eftypeMap["analyzer"]; ok { analyzer = ffanalyzer.(string) analyzerMap[field] = analyzer } if ftype != "" { if chargeType(ftype, etype) { okField = append(okField, field) } else { errField = append(errField, field) } } else { if field == "_id" { continue } else if field == "purchasinglist" || field == "package" || field == "winnerorder" || field == "procurementlist" { if eproperties, ok := eftypeMap["properties"]; ok { if eproMap, ok := eproperties.(map[string]interface{}); ok { for k, v := range eproMap { if innerMap, ok := v.(map[string]interface{}); ok { if innerType, ok := innerMap["type"]; ok { innerLevel := BiddingLevelField[field] esMap[fmt.Sprintf("%s.%s", field, k)] = innerType.(string) if chargeType(innerLevel[k], innerType.(string)) { okField = append(okField, fmt.Sprintf("%s.%s", field, k)) } else { errField = append(errField, fmt.Sprintf("%s.%s", field, k)) } } } } } } } } } if len(errField) > 0 { log.Info("test", zap.Int("错误字段数量", len(errField))) for _, field := range errField { if strings.Contains(field, ".") { fe := strings.Split(field, ".") log.Info(fmt.Sprintf("%s 字段类型错误", field), zap.String(fmt.Sprintf("数据库类型为:%s,但是es字段类型是:", BiddingLevelField[fe[0]][fe[1]]), esMap[field])) } else { log.Info(fmt.Sprintf("%s 字段类型错误", field), zap.String(fmt.Sprintf("数据库类型为:%s,但是es字段类型是:", BiddingField[field]), esMap[field])) } } } else { log.Info("es 字段类型检测结束,", zap.Int("所有字段都符合,检测字段数量为:", len(okField))) } } func TestGetIndexName(t *testing.T) { client, _ := elastic.NewClient( elastic.SetURL(config.Conf.DB.Es.Addr), elastic.SetBasicAuth(config.Conf.DB.Es.Username, config.Conf.DB.Es.Password), elastic.SetSniff(false), ) index := "bidding_v2" //index := config.Conf.DB.Es.IndexB name, _ := GetIndexName(client, index) fmt.Println("name ->", name) fmt.Println(name) } func TestBuyer(t *testing.T) { rowsPerPage := 1000 currentPage := 1 var total int for { fmt.Println("currentPage", currentPage) arrEs := make([]map[string]interface{}, 0) offset := (currentPage - 1) * rowsPerPage query := fmt.Sprintf(` SELECT * from goods LIMIT %d, %d; `, offset, rowsPerPage) result := Mysql.SelectBySql(query) if len(*result) > 0 { for _, re := range *result { arrEs = append(arrEs, re) } } total = total + len(*result) if len(*result) < rowsPerPage { break } // 继续查询下一页 currentPage++ } fmt.Println("over --------") fmt.Println("total --------", total) } func TestIsHanStart(t *testing.T) { name := "\\u001c 陈巴尔虎旗天顺矿业有限责任公司" fmt.Println("aa", IsCompanyName(name)) //fmt.Println("uni", IsUnicodeStart(name)) fmt.Println("name", getCompanyName(name)) name = "RT农业发展(乌鲁木齐)有限责任公司" //fmt.Println("uni", IsUnicodeStart(name)) fmt.Println("bb", IsCompanyName(name)) fmt.Println("name", getCompanyName(name)) name = "(宁波)综命能源服务有限公司" fmt.Println("name", getCompanyName(name)) //fmt.Println("uni", IsUnicodeStart(name)) fmt.Println("cc", IsCompanyName(name)) a := 15 b := 2 fmt.Println((a / b) + 1) } func TestParseTime(t *testing.T) { //dateString := "2023年12月" // //// 正则表达式匹配 //pattern := `(\d{4})[年.\-/]?(\d{1,2})[月.\-/]?(\d{1,2})日?$` //re := regexp.MustCompile(pattern) //match := re.FindStringSubmatch(dateString) // //if len(match) >= 4 { // year, _ := strconv.Atoi(match[1]) // month, _ := strconv.Atoi(match[2]) // day, _ := strconv.Atoi(match[3]) // // dateInt64 := int64(year*10000 + month*100 + day) // fmt.Println(dateInt64) //} else { // fmt.Println("Date string does not match the pattern.") //} // //str := "2023年09月24日" //arr := getMethod(str) // //fmt.Println(arr) dateStrings := []string{ "2022年3月", "2022-03", "2022-3", "2022-10-10", "2022.2", "2022.02.1", "2022.12.12", "2022年10月", "2022年10月12日", "2022-10", "2023/4/28 0:12:12", "[2023/8/28/]", "2023-8-28T12:12:12", "2023.8/28", "8/28", } for _, dateString := range dateStrings { timestamp, err := parseDateString(dateString) if err != nil { fmt.Printf("Error parsing date string '%s': %v\n", dateString, err) } else { fmt.Printf("Date string: '%s', Timestamp: %d\n", dateString, timestamp) } } } func parseDateString(dateString string) (int64, error) { // Regular expressions for different date formats regexPatterns := []string{ //`^(\d{4})年(\d{1,2})月(\d{1,2})日?$`, //`^(\d{4})年(\d{1,2})月$`, //`^(\d{4})-(\d{1,2})-(\d{1,2})$`, //`^(\d{4})年(\d{1,2})$`, //`^(\d{4})\-(\d{1,2})$`, //`^(\d{4})\.(\d{1,2})\.(\d{1,2})$`, //`^(\d{4})\.(\d{1,2})$`, //`^(\d{4})年$`, //`^(\d{4})$`, `(\d{4})[年.\-/]?(\d{1,2})[月.\-/]?(\d{1,2})日?`, `(\d{1,2})[月.\-/]?(\d{1,2})日?`, } for _, pattern := range regexPatterns { re := regexp.MustCompile(pattern) match := re.FindStringSubmatch(dateString) if len(match) > 0 { year, _ := strconv.Atoi(match[1]) month, _ := strconv.Atoi(match[2]) var day int if len(match) >= 4 && match[3] != "" { day, _ = strconv.Atoi(match[3]) } else { day = 1 } parsedTime := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC) return parsedTime.Unix(), nil } } return 0, fmt.Errorf("unrecognized date format") }