123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- package util
- import (
- "errors"
- "strconv"
- "strings"
- "unicode/utf8"
- )
- var (
- tones = [][]rune{
- {'ā', 'ē', 'ī', 'ō', 'ū', 'ǖ', 'Ā', 'Ē', 'Ī', 'Ō', 'Ū', 'Ǖ'},
- {'á', 'é', 'í', 'ó', 'ú', 'ǘ', 'Á', 'É', 'Í', 'Ó', 'Ú', 'Ǘ'},
- {'ǎ', 'ě', 'ǐ', 'ǒ', 'ǔ', 'ǚ', 'Ǎ', 'Ě', 'Ǐ', 'Ǒ', 'Ǔ', 'Ǚ'},
- {'à', 'è', 'ì', 'ò', 'ù', 'ǜ', 'À', 'È', 'Ì', 'Ò', 'Ù', 'Ǜ'},
- }
- neutrals = []rune{'a', 'e', 'i', 'o', 'u', 'v', 'A', 'E', 'I', 'O', 'U', 'V'}
- ErrInitialize = errors.New("not yet initialized")
- )
- var (
- // 从带声调的声母到对应的英文字符的映射
- tonesMap map[rune]rune
- // 从汉字到声调的映射
- numericTonesMap map[rune]int
- // 从汉字到拼音的映射(带声调)
- pinyinMap map[rune]string
- initialized bool
- )
- type Mode int
- const (
- WithoutTone Mode = iota + 1 // 默认模式,例如:guo
- Tone // 带声调的拼音 例如:guó
- InitialsInCapitals // 首字母大写不带声调,例如:Guo
- )
- type pinyin struct {
- origin string
- split string
- mode Mode
- }
- func init() {
- tonesMap = make(map[rune]rune)
- numericTonesMap = make(map[rune]int)
- pinyinMap = make(map[rune]string)
- for i, runes := range tones {
- for j, tone := range runes {
- tonesMap[tone] = neutrals[j]
- numericTonesMap[tone] = i + 1
- }
- }
- for k, v := range Resource {
- i, err := strconv.ParseInt(k, 16, 32)
- if err != nil {
- continue
- }
- pinyinMap[rune(i)] = v
- }
- initialized = true
- }
- func New(origin string) *pinyin {
- return &pinyin{
- origin: origin,
- split: " ",
- mode: WithoutTone,
- }
- }
- func (py *pinyin) Split(split string) *pinyin {
- py.split = split
- return py
- }
- func (py *pinyin) Mode(mode Mode) *pinyin {
- py.mode = mode
- return py
- }
- func (py *pinyin) Convert() (string, error) {
- if !initialized {
- return "", ErrInitialize
- }
- sr := []rune(py.origin)
- words := make([]string, 0)
- var temp string
- for i, s := range sr {
- _, ok := pinyinMap[s]
- if !ok {
- // 非中文处理
- temp += string(s)
- if i == len(sr)-1 {
- words = append(words, temp)
- }
- continue
- }
- word, err := getPinyin(s, py.mode)
- if err != nil {
- return "", err
- }
- if len(temp) > 0 {
- words = append(words, temp)
- temp = ""
- }
- if len(word) > 0 {
- words = append(words, word)
- }
- }
- result := strings.Join(words, py.split)
- result = strings.Replace(result, " ", " ", -1)
- result = strings.Replace(result, " ", " ", -1)
- return result, nil
- }
- func getPinyin(hanzi rune, mode Mode) (string, error) {
- if !initialized {
- return "", ErrInitialize
- }
- switch mode {
- case Tone:
- return getTone(hanzi), nil
- case InitialsInCapitals:
- return getInitialsInCapitals(hanzi), nil
- default:
- return getDefault(hanzi), nil
- }
- }
- func getTone(hanzi rune) string {
- return pinyinMap[hanzi]
- }
- func getDefault(hanzi rune) string {
- tone := getTone(hanzi)
- if tone == "" {
- return tone
- }
- output := make([]rune, utf8.RuneCountInString(tone))
- count := 0
- for _, t := range tone {
- neutral, found := tonesMap[t]
- if found {
- output[count] = neutral
- } else {
- output[count] = t
- }
- count++
- }
- return string(output)
- }
- func getInitialsInCapitals(hanzi rune) string {
- def := getDefault(hanzi)
- if def == "" {
- return def
- }
- sr := []rune(def)
- if sr[0] > 32 {
- sr[0] = sr[0] - 32
- }
- return string(sr)
- }
|