es_test.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "esindex/config"
  6. "fmt"
  7. "github.com/olivere/elastic/v7"
  8. "go.uber.org/zap"
  9. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. "testing"
  14. "time"
  15. )
  16. func TestMatchService(t *testing.T) {
  17. data := `{
  18. "_id" : "6422d91e779467cff1a84885",
  19. "area" : "全国",
  20. "city" : "",
  21. "extracttype" : 0,
  22. "s_sha" : "d7cc66ac91dc6551991df0a37331b628de4c70973c6844f1ee6ef1c2d4e29e95",
  23. "jsondata" : {
  24. "area_city_district" : "福建",
  25. "buyer" : "莆田市第一医院",
  26. "item" : " 货物/医药品/医用材料/其他医用材料",
  27. "agency" : "福建省荔卫药械招标服务有限公司"
  28. },
  29. "channel" : "地方公告"
  30. }`
  31. var obj map[string]interface{}
  32. if err := json.Unmarshal([]byte(data), &obj); err != nil {
  33. panic(err)
  34. }
  35. objectType := MatchService(obj)
  36. fmt.Println("objectType=>", objectType)
  37. }
  38. func TestGetMappting(t *testing.T) {
  39. client, _ := elastic.NewClient(
  40. elastic.SetURL(config.Conf.DB.Es.Addr),
  41. elastic.SetBasicAuth(config.Conf.DB.Es.Username, config.Conf.DB.Es.Password),
  42. elastic.SetSniff(false),
  43. )
  44. index := config.Conf.DB.Es.IndexB
  45. // 获取 Elasticsearch 索引的 mapping 信息
  46. mapping, err := client.GetMapping().Index(index).Do(context.Background())
  47. if err != nil {
  48. fmt.Println("Error getting Elasticsearch mapping:", err)
  49. return
  50. }
  51. indexName, _ := GetIndexName(client, index)
  52. properties := mapping[indexName].(map[string]interface{})["mappings"].(map[string]interface{})["properties"].(map[string]interface{})
  53. var errField = make([]string, 0)
  54. var okField = make([]string, 0)
  55. var analyzerMap = make(map[string]string) // 分词信息
  56. var esMap = make(map[string]string) //存储es 字段类型
  57. //
  58. for field, ftype := range BiddingField {
  59. eftypeMap, _ := properties[field].(map[string]interface{})
  60. var etype string
  61. var analyzer string
  62. if fftype, ok := eftypeMap["type"]; ok {
  63. etype = fftype.(string)
  64. esMap[field] = etype
  65. }
  66. if ffanalyzer, ok := eftypeMap["analyzer"]; ok {
  67. analyzer = ffanalyzer.(string)
  68. analyzerMap[field] = analyzer
  69. }
  70. if ftype != "" {
  71. if chargeType(ftype, etype) {
  72. okField = append(okField, field)
  73. } else {
  74. errField = append(errField, field)
  75. }
  76. } else {
  77. if field == "_id" {
  78. continue
  79. } else if field == "purchasinglist" || field == "package" || field == "winnerorder" || field == "procurementlist" {
  80. if eproperties, ok := eftypeMap["properties"]; ok {
  81. if eproMap, ok := eproperties.(map[string]interface{}); ok {
  82. for k, v := range eproMap {
  83. if innerMap, ok := v.(map[string]interface{}); ok {
  84. if innerType, ok := innerMap["type"]; ok {
  85. innerLevel := BiddingLevelField[field]
  86. esMap[fmt.Sprintf("%s.%s", field, k)] = innerType.(string)
  87. if chargeType(innerLevel[k], innerType.(string)) {
  88. okField = append(okField, fmt.Sprintf("%s.%s", field, k))
  89. } else {
  90. errField = append(errField, fmt.Sprintf("%s.%s", field, k))
  91. }
  92. }
  93. }
  94. }
  95. }
  96. }
  97. }
  98. }
  99. }
  100. if len(errField) > 0 {
  101. log.Info("test", zap.Int("错误字段数量", len(errField)))
  102. for _, field := range errField {
  103. if strings.Contains(field, ".") {
  104. fe := strings.Split(field, ".")
  105. log.Info(fmt.Sprintf("%s 字段类型错误", field), zap.String(fmt.Sprintf("数据库类型为:%s,但是es字段类型是:", BiddingLevelField[fe[0]][fe[1]]), esMap[field]))
  106. } else {
  107. log.Info(fmt.Sprintf("%s 字段类型错误", field), zap.String(fmt.Sprintf("数据库类型为:%s,但是es字段类型是:", BiddingField[field]), esMap[field]))
  108. }
  109. }
  110. } else {
  111. log.Info("es 字段类型检测结束,", zap.Int("所有字段都符合,检测字段数量为:", len(okField)))
  112. }
  113. }
  114. func TestGetIndexName(t *testing.T) {
  115. client, _ := elastic.NewClient(
  116. elastic.SetURL(config.Conf.DB.Es.Addr),
  117. elastic.SetBasicAuth(config.Conf.DB.Es.Username, config.Conf.DB.Es.Password),
  118. elastic.SetSniff(false),
  119. )
  120. index := "bidding_v2"
  121. //index := config.Conf.DB.Es.IndexB
  122. name, _ := GetIndexName(client, index)
  123. fmt.Println("name ->", name)
  124. fmt.Println(name)
  125. }
  126. func TestBuyer(t *testing.T) {
  127. rowsPerPage := 1000
  128. currentPage := 1
  129. var total int
  130. for {
  131. fmt.Println("currentPage", currentPage)
  132. arrEs := make([]map[string]interface{}, 0)
  133. offset := (currentPage - 1) * rowsPerPage
  134. query := fmt.Sprintf(`
  135. SELECT * from goods
  136. LIMIT %d, %d;
  137. `, offset, rowsPerPage)
  138. result := Mysql.SelectBySql(query)
  139. if len(*result) > 0 {
  140. for _, re := range *result {
  141. arrEs = append(arrEs, re)
  142. }
  143. }
  144. total = total + len(*result)
  145. if len(*result) < rowsPerPage {
  146. break
  147. }
  148. // 继续查询下一页
  149. currentPage++
  150. }
  151. fmt.Println("over --------")
  152. fmt.Println("total --------", total)
  153. }
  154. func TestIsHanStart(t *testing.T) {
  155. name := "\\u001c 陈巴尔虎旗天顺矿业有限责任公司"
  156. fmt.Println("aa", IsCompanyName(name))
  157. //fmt.Println("uni", IsUnicodeStart(name))
  158. fmt.Println("name", getCompanyName(name))
  159. name = "RT农业发展(乌鲁木齐)有限责任公司"
  160. //fmt.Println("uni", IsUnicodeStart(name))
  161. fmt.Println("bb", IsCompanyName(name))
  162. fmt.Println("name", getCompanyName(name))
  163. name = "(宁波)综命能源服务有限公司"
  164. fmt.Println("name", getCompanyName(name))
  165. //fmt.Println("uni", IsUnicodeStart(name))
  166. fmt.Println("cc", IsCompanyName(name))
  167. a := 15
  168. b := 2
  169. fmt.Println((a / b) + 1)
  170. }
  171. func TestParseTime(t *testing.T) {
  172. //dateString := "2023年12月"
  173. //
  174. //// 正则表达式匹配
  175. //pattern := `(\d{4})[年.\-/]?(\d{1,2})[月.\-/]?(\d{1,2})日?$`
  176. //re := regexp.MustCompile(pattern)
  177. //match := re.FindStringSubmatch(dateString)
  178. //
  179. //if len(match) >= 4 {
  180. // year, _ := strconv.Atoi(match[1])
  181. // month, _ := strconv.Atoi(match[2])
  182. // day, _ := strconv.Atoi(match[3])
  183. //
  184. // dateInt64 := int64(year*10000 + month*100 + day)
  185. // fmt.Println(dateInt64)
  186. //} else {
  187. // fmt.Println("Date string does not match the pattern.")
  188. //}
  189. //
  190. //str := "2023年09月24日"
  191. //arr := getMethod(str)
  192. //
  193. //fmt.Println(arr)
  194. dateStrings := []string{
  195. "2022年3月",
  196. "2022-03",
  197. "2022-3",
  198. "2022-10-10",
  199. "2022.2",
  200. "2022.02.1",
  201. "2022.12.12",
  202. "2022年10月",
  203. "2022年10月12日",
  204. "2022-10",
  205. "2023/4/28 0:12:12",
  206. "[2023/8/28/]",
  207. "2023-8-28T12:12:12",
  208. "2023.8/28",
  209. "8/28",
  210. }
  211. for _, dateString := range dateStrings {
  212. timestamp, err := parseDateString(dateString)
  213. if err != nil {
  214. fmt.Printf("Error parsing date string '%s': %v\n", dateString, err)
  215. } else {
  216. fmt.Printf("Date string: '%s', Timestamp: %d\n", dateString, timestamp)
  217. }
  218. }
  219. }
  220. func parseDateString(dateString string) (int64, error) {
  221. // Regular expressions for different date formats
  222. regexPatterns := []string{
  223. //`^(\d{4})年(\d{1,2})月(\d{1,2})日?$`,
  224. //`^(\d{4})年(\d{1,2})月$`,
  225. //`^(\d{4})-(\d{1,2})-(\d{1,2})$`,
  226. //`^(\d{4})年(\d{1,2})$`,
  227. //`^(\d{4})\-(\d{1,2})$`,
  228. //`^(\d{4})\.(\d{1,2})\.(\d{1,2})$`,
  229. //`^(\d{4})\.(\d{1,2})$`,
  230. //`^(\d{4})年$`,
  231. //`^(\d{4})$`,
  232. `(\d{4})[年.\-/]?(\d{1,2})[月.\-/]?(\d{1,2})日?`,
  233. `(\d{1,2})[月.\-/]?(\d{1,2})日?`,
  234. }
  235. for _, pattern := range regexPatterns {
  236. re := regexp.MustCompile(pattern)
  237. match := re.FindStringSubmatch(dateString)
  238. if len(match) > 0 {
  239. year, _ := strconv.Atoi(match[1])
  240. month, _ := strconv.Atoi(match[2])
  241. var day int
  242. if len(match) >= 4 && match[3] != "" {
  243. day, _ = strconv.Atoi(match[3])
  244. } else {
  245. day = 1
  246. }
  247. parsedTime := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC)
  248. return parsedTime.Unix(), nil
  249. }
  250. }
  251. return 0, fmt.Errorf("unrecognized date format")
  252. }