pinyin.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. package util
  2. import (
  3. "errors"
  4. "strconv"
  5. "strings"
  6. "unicode/utf8"
  7. )
  8. var (
  9. tones = [][]rune{
  10. {'ā', 'ē', 'ī', 'ō', 'ū', 'ǖ', 'Ā', 'Ē', 'Ī', 'Ō', 'Ū', 'Ǖ'},
  11. {'á', 'é', 'í', 'ó', 'ú', 'ǘ', 'Á', 'É', 'Í', 'Ó', 'Ú', 'Ǘ'},
  12. {'ǎ', 'ě', 'ǐ', 'ǒ', 'ǔ', 'ǚ', 'Ǎ', 'Ě', 'Ǐ', 'Ǒ', 'Ǔ', 'Ǚ'},
  13. {'à', 'è', 'ì', 'ò', 'ù', 'ǜ', 'À', 'È', 'Ì', 'Ò', 'Ù', 'Ǜ'},
  14. }
  15. neutrals = []rune{'a', 'e', 'i', 'o', 'u', 'v', 'A', 'E', 'I', 'O', 'U', 'V'}
  16. ErrInitialize = errors.New("not yet initialized")
  17. )
  18. var (
  19. // 从带声调的声母到对应的英文字符的映射
  20. tonesMap map[rune]rune
  21. // 从汉字到声调的映射
  22. numericTonesMap map[rune]int
  23. // 从汉字到拼音的映射(带声调)
  24. pinyinMap map[rune]string
  25. initialized bool
  26. )
  27. type Mode int
  28. const (
  29. WithoutTone Mode = iota + 1 // 默认模式,例如:guo
  30. Tone // 带声调的拼音 例如:guó
  31. InitialsInCapitals // 首字母大写不带声调,例如:Guo
  32. )
  33. type pinyin struct {
  34. origin string
  35. split string
  36. mode Mode
  37. }
  38. func init() {
  39. tonesMap = make(map[rune]rune)
  40. numericTonesMap = make(map[rune]int)
  41. pinyinMap = make(map[rune]string)
  42. for i, runes := range tones {
  43. for j, tone := range runes {
  44. tonesMap[tone] = neutrals[j]
  45. numericTonesMap[tone] = i + 1
  46. }
  47. }
  48. for k, v := range Resource {
  49. i, err := strconv.ParseInt(k, 16, 32)
  50. if err != nil {
  51. continue
  52. }
  53. pinyinMap[rune(i)] = v
  54. }
  55. initialized = true
  56. }
  57. func New(origin string) *pinyin {
  58. return &pinyin{
  59. origin: origin,
  60. split: " ",
  61. mode: WithoutTone,
  62. }
  63. }
  64. func (py *pinyin) Split(split string) *pinyin {
  65. py.split = split
  66. return py
  67. }
  68. func (py *pinyin) Mode(mode Mode) *pinyin {
  69. py.mode = mode
  70. return py
  71. }
  72. func (py *pinyin) Convert() (string, error) {
  73. if !initialized {
  74. return "", ErrInitialize
  75. }
  76. sr := []rune(py.origin)
  77. words := make([]string, 0)
  78. var temp string
  79. for i, s := range sr {
  80. _, ok := pinyinMap[s]
  81. if !ok {
  82. // 非中文处理
  83. temp += string(s)
  84. if i == len(sr)-1 {
  85. words = append(words, temp)
  86. }
  87. continue
  88. }
  89. word, err := getPinyin(s, py.mode)
  90. if err != nil {
  91. return "", err
  92. }
  93. if len(temp) > 0 {
  94. words = append(words, temp)
  95. temp = ""
  96. }
  97. if len(word) > 0 {
  98. words = append(words, word)
  99. }
  100. }
  101. result := strings.Join(words, py.split)
  102. result = strings.Replace(result, " ", " ", -1)
  103. result = strings.Replace(result, " ", " ", -1)
  104. return result, nil
  105. }
  106. func getPinyin(hanzi rune, mode Mode) (string, error) {
  107. if !initialized {
  108. return "", ErrInitialize
  109. }
  110. switch mode {
  111. case Tone:
  112. return getTone(hanzi), nil
  113. case InitialsInCapitals:
  114. return getInitialsInCapitals(hanzi), nil
  115. default:
  116. return getDefault(hanzi), nil
  117. }
  118. }
  119. func getTone(hanzi rune) string {
  120. return pinyinMap[hanzi]
  121. }
  122. func getDefault(hanzi rune) string {
  123. tone := getTone(hanzi)
  124. if tone == "" {
  125. return tone
  126. }
  127. output := make([]rune, utf8.RuneCountInString(tone))
  128. count := 0
  129. for _, t := range tone {
  130. neutral, found := tonesMap[t]
  131. if found {
  132. output[count] = neutral
  133. } else {
  134. output[count] = t
  135. }
  136. count++
  137. }
  138. return string(output)
  139. }
  140. func getInitialsInCapitals(hanzi rune) string {
  141. def := getDefault(hanzi)
  142. if def == "" {
  143. return def
  144. }
  145. sr := []rune(def)
  146. if sr[0] > 32 {
  147. sr[0] = sr[0] - 32
  148. }
  149. return string(sr)
  150. }