123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 |
- package main
- import (
- "context"
- "encoding/json"
- "esindex/config"
- "fmt"
- "github.com/olivere/elastic/v7"
- "go.uber.org/zap"
- "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
- "regexp"
- "strconv"
- "strings"
- "testing"
- "time"
- )
- func TestMatchService(t *testing.T) {
- data := `{
- "_id" : "6422d91e779467cff1a84885",
- "area" : "全国",
- "city" : "",
- "extracttype" : 0,
- "s_sha" : "d7cc66ac91dc6551991df0a37331b628de4c70973c6844f1ee6ef1c2d4e29e95",
- "jsondata" : {
- "area_city_district" : "福建",
- "buyer" : "莆田市第一医院",
- "item" : " 货物/医药品/医用材料/其他医用材料",
- "agency" : "福建省荔卫药械招标服务有限公司"
- },
- "channel" : "地方公告"
- }`
- var obj map[string]interface{}
- if err := json.Unmarshal([]byte(data), &obj); err != nil {
- panic(err)
- }
- objectType := MatchService(obj)
- fmt.Println("objectType=>", objectType)
- }
- func TestGetMappting(t *testing.T) {
- client, _ := elastic.NewClient(
- elastic.SetURL(config.Conf.DB.Es.Addr),
- elastic.SetBasicAuth(config.Conf.DB.Es.Username, config.Conf.DB.Es.Password),
- elastic.SetSniff(false),
- )
- index := config.Conf.DB.Es.IndexB
- // 获取 Elasticsearch 索引的 mapping 信息
- mapping, err := client.GetMapping().Index(index).Do(context.Background())
- if err != nil {
- fmt.Println("Error getting Elasticsearch mapping:", err)
- return
- }
- indexName, _ := GetIndexName(client, index)
- properties := mapping[indexName].(map[string]interface{})["mappings"].(map[string]interface{})["properties"].(map[string]interface{})
- var errField = make([]string, 0)
- var okField = make([]string, 0)
- var analyzerMap = make(map[string]string) // 分词信息
- var esMap = make(map[string]string) //存储es 字段类型
- //
- for field, ftype := range BiddingField {
- eftypeMap, _ := properties[field].(map[string]interface{})
- var etype string
- var analyzer string
- if fftype, ok := eftypeMap["type"]; ok {
- etype = fftype.(string)
- esMap[field] = etype
- }
- if ffanalyzer, ok := eftypeMap["analyzer"]; ok {
- analyzer = ffanalyzer.(string)
- analyzerMap[field] = analyzer
- }
- if ftype != "" {
- if chargeType(ftype, etype) {
- okField = append(okField, field)
- } else {
- errField = append(errField, field)
- }
- } else {
- if field == "_id" {
- continue
- } else if field == "purchasinglist" || field == "package" || field == "winnerorder" || field == "procurementlist" {
- if eproperties, ok := eftypeMap["properties"]; ok {
- if eproMap, ok := eproperties.(map[string]interface{}); ok {
- for k, v := range eproMap {
- if innerMap, ok := v.(map[string]interface{}); ok {
- if innerType, ok := innerMap["type"]; ok {
- innerLevel := BiddingLevelField[field]
- esMap[fmt.Sprintf("%s.%s", field, k)] = innerType.(string)
- if chargeType(innerLevel[k], innerType.(string)) {
- okField = append(okField, fmt.Sprintf("%s.%s", field, k))
- } else {
- errField = append(errField, fmt.Sprintf("%s.%s", field, k))
- }
- }
- }
- }
- }
- }
- }
- }
- }
- if len(errField) > 0 {
- log.Info("test", zap.Int("错误字段数量", len(errField)))
- for _, field := range errField {
- if strings.Contains(field, ".") {
- fe := strings.Split(field, ".")
- log.Info(fmt.Sprintf("%s 字段类型错误", field), zap.String(fmt.Sprintf("数据库类型为:%s,但是es字段类型是:", BiddingLevelField[fe[0]][fe[1]]), esMap[field]))
- } else {
- log.Info(fmt.Sprintf("%s 字段类型错误", field), zap.String(fmt.Sprintf("数据库类型为:%s,但是es字段类型是:", BiddingField[field]), esMap[field]))
- }
- }
- } else {
- log.Info("es 字段类型检测结束,", zap.Int("所有字段都符合,检测字段数量为:", len(okField)))
- }
- }
- func TestGetIndexName(t *testing.T) {
- client, _ := elastic.NewClient(
- elastic.SetURL(config.Conf.DB.Es.Addr),
- elastic.SetBasicAuth(config.Conf.DB.Es.Username, config.Conf.DB.Es.Password),
- elastic.SetSniff(false),
- )
- index := "bidding_v2"
- //index := config.Conf.DB.Es.IndexB
- name, _ := GetIndexName(client, index)
- fmt.Println("name ->", name)
- fmt.Println(name)
- }
- func TestBuyer(t *testing.T) {
- rowsPerPage := 1000
- currentPage := 1
- var total int
- for {
- fmt.Println("currentPage", currentPage)
- arrEs := make([]map[string]interface{}, 0)
- offset := (currentPage - 1) * rowsPerPage
- query := fmt.Sprintf(`
- SELECT * from goods
-
- LIMIT %d, %d;
- `, offset, rowsPerPage)
- result := Mysql.SelectBySql(query)
- if len(*result) > 0 {
- for _, re := range *result {
- arrEs = append(arrEs, re)
- }
- }
- total = total + len(*result)
- if len(*result) < rowsPerPage {
- break
- }
- // 继续查询下一页
- currentPage++
- }
- fmt.Println("over --------")
- fmt.Println("total --------", total)
- }
- func TestIsHanStart(t *testing.T) {
- name := "\\u001c 陈巴尔虎旗天顺矿业有限责任公司"
- fmt.Println("aa", IsCompanyName(name))
- //fmt.Println("uni", IsUnicodeStart(name))
- fmt.Println("name", getCompanyName(name))
- name = "RT农业发展(乌鲁木齐)有限责任公司"
- //fmt.Println("uni", IsUnicodeStart(name))
- fmt.Println("bb", IsCompanyName(name))
- fmt.Println("name", getCompanyName(name))
- name = "(宁波)综命能源服务有限公司"
- fmt.Println("name", getCompanyName(name))
- //fmt.Println("uni", IsUnicodeStart(name))
- fmt.Println("cc", IsCompanyName(name))
- a := 15
- b := 2
- fmt.Println((a / b) + 1)
- }
- func TestParseTime(t *testing.T) {
- //dateString := "2023年12月"
- //
- //// 正则表达式匹配
- //pattern := `(\d{4})[年.\-/]?(\d{1,2})[月.\-/]?(\d{1,2})日?$`
- //re := regexp.MustCompile(pattern)
- //match := re.FindStringSubmatch(dateString)
- //
- //if len(match) >= 4 {
- // year, _ := strconv.Atoi(match[1])
- // month, _ := strconv.Atoi(match[2])
- // day, _ := strconv.Atoi(match[3])
- //
- // dateInt64 := int64(year*10000 + month*100 + day)
- // fmt.Println(dateInt64)
- //} else {
- // fmt.Println("Date string does not match the pattern.")
- //}
- //
- //str := "2023年09月24日"
- //arr := getMethod(str)
- //
- //fmt.Println(arr)
- dateStrings := []string{
- "2022年3月",
- "2022-03",
- "2022-3",
- "2022-10-10",
- "2022.2",
- "2022.02.1",
- "2022.12.12",
- "2022年10月",
- "2022年10月12日",
- "2022-10",
- "2023/4/28 0:12:12",
- "[2023/8/28/]",
- "2023-8-28T12:12:12",
- "2023.8/28",
- "8/28",
- }
- for _, dateString := range dateStrings {
- timestamp, err := parseDateString(dateString)
- if err != nil {
- fmt.Printf("Error parsing date string '%s': %v\n", dateString, err)
- } else {
- fmt.Printf("Date string: '%s', Timestamp: %d\n", dateString, timestamp)
- }
- }
- }
- func parseDateString(dateString string) (int64, error) {
- // Regular expressions for different date formats
- regexPatterns := []string{
- //`^(\d{4})年(\d{1,2})月(\d{1,2})日?$`,
- //`^(\d{4})年(\d{1,2})月$`,
- //`^(\d{4})-(\d{1,2})-(\d{1,2})$`,
- //`^(\d{4})年(\d{1,2})$`,
- //`^(\d{4})\-(\d{1,2})$`,
- //`^(\d{4})\.(\d{1,2})\.(\d{1,2})$`,
- //`^(\d{4})\.(\d{1,2})$`,
- //`^(\d{4})年$`,
- //`^(\d{4})$`,
- `(\d{4})[年.\-/]?(\d{1,2})[月.\-/]?(\d{1,2})日?`,
- `(\d{1,2})[月.\-/]?(\d{1,2})日?`,
- }
- for _, pattern := range regexPatterns {
- re := regexp.MustCompile(pattern)
- match := re.FindStringSubmatch(dateString)
- if len(match) > 0 {
- year, _ := strconv.Atoi(match[1])
- month, _ := strconv.Atoi(match[2])
- var day int
- if len(match) >= 4 && match[3] != "" {
- day, _ = strconv.Atoi(match[3])
- } else {
- day = 1
- }
- parsedTime := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC)
- return parsedTime.Unix(), nil
- }
- }
- return 0, fmt.Errorf("unrecognized date format")
- }
- func TestYuceEndtime(t *testing.T) {
- data, _ := MgoB.FindById("bidding", "657ac63c6977356f55773c3e", nil)
- YuceEndtime(*data)
- fmt.Println(data)
- }
|