package clear

import (
	"fmt"
	"regexp"
	"strings"
)

var cutSpace *regexp.Regexp
var cutAllSpace *regexp.Regexp
var catSymbol *regexp.Regexp
var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}

func init() {
	cutSpace, _ = regexp.Compile(`^\s*|\s*$`)
	cutAllSpace, _ = regexp.Compile(`\s*`)
	catSymbol, _ = regexp.Compile(`[]+`)
}

var LableStr = "&?(amp|nbsp|#8266);?|(&lt;).*?(&gt;?)"
var at = rune('&')
var ed = rune(';')
var lableMap = map[string]rune{
	"&amp;":  rune('&'),
	"&nbsp;": rune(' '),
	"&gt;":   rune('>'),
	"&lt;":   rune('<'),
}

//处理转义标签
func CutLableStr(con string) string {
	for i := 0; i < 3; i++ {
		runes := []rune{}
		pools := []rune{}
		bpool := false
		strings.IndexFunc(con, func(s rune) bool {
			if !bpool && s == at {
				bpool = true
				pools = []rune{}
			}
			if bpool {
				pools = append(pools, s)
				if s == ed { //结束
					lb := lableMap[string(pools)]
					if lb != 0 {
						runes = append(runes, lb)
					} else {
						runes = append(runes, pools...)
					}
					bpool = false
				} else if len(pools) > 6 {
					bpool = false
					runes = append(runes, pools...)
				}
			} else {
				runes = append(runes, s)
			}
			return false
		})
		str1 := string(runes)
		if i > 0 && con == str1 {
			break
		}
		con = str1
	}
	return con
}

//清理开始、结尾的空白字符
func CutSpace(data []interface{}) []interface{} {
	tmp := cutSpace.ReplaceAllString(strings.Replace(fmt.Sprint(data[0]), "　", " ", -1), "")
	tmp = replaceSymbol(tmp, spaces)
	//fmt.Println("cutspace", tmp)
	data[0] = tmp
	return data
}

//清理所有空白符
func CutAllSpace(data []interface{}) []interface{} {
	tmp := cutAllSpace.ReplaceAllString(fmt.Sprint(data[0]), "")
	tmp = replaceSymbol(tmp, spaces)
	data[0] = tmp
	return data
}

//清理符号
func CutSymbol(data []interface{}) []interface{} {
	value := fmt.Sprint(CutSpace(data)[0])
	symbol := ",，;；:：'\"“”。.\\?？、/+=\\_—*&……\\^%$￥@!！`~·"
	startSymbol := "^[" + "）\\)>》】\\]}｝〕" + symbol + "]+"
	endSymbol := "[" + "（\\(<《【\\[{｛〔" + symbol + "]+$"
	startReg := regexp.MustCompile(startSymbol)
	endReg := regexp.MustCompile(endSymbol)
	value = startReg.ReplaceAllString(value, "")
	value = endReg.ReplaceAllString(value, "")
	value = fmt.Sprint(CutSpace([]interface{}{value, data[1]})[0])
	return []interface{}{value, data[1]}
}

//不成对出现的符号，把符号后面的内容清理掉
func CutNotPrs(data []interface{}) []interface{} {
	return childCutNotPrs(data, 1)
}

//不成对出现的符号，把符号后面的内容清理掉
func childCutNotPrs(data []interface{}, count int) []interface{} {
	value := fmt.Sprint(data[0])
	if count >= 50 || value == "" {
		return data
	}
	startChars := []string{"[（(]", "[\\[【]", "[{｛]", "[<《]", "[>》]", "〔"}
	endChars := []string{"[）)]", "[\\]】]", "[}｝]", "[<《]", "[>》]", "〕"}
	for k, v := range startChars {
		sReg := regexp.MustCompile(v)
		eReg := regexp.MustCompile(endChars[k])
		sIndex := sReg.FindAllStringIndex(value, -1)
		eIndex := eReg.FindAllStringIndex(value, -1)
		sCount := len(sIndex)
		eCount := len(eIndex)
		if sCount == eCount {
			continue
		}
		//清理前面
		if sCount > eCount {
			value = value[sIndex[eCount][1]:]
		}
		//清理后面
		if sCount < eCount {
			value = value[:eIndex[sCount][0]]
		}
	}
	//交叉出现情况处理
	sReplReg := regexp.MustCompile("[（(\\[【{｛〔<《][^）)\\]】}｝〕>》]*$")
	eReplReg := regexp.MustCompile("^[^（(\\[【{｛〔<《]*[）)\\]】}｝〕>》]")
	if sReplReg.MatchString(value) || eReplReg.MatchString(value) {
		value = sReplReg.ReplaceAllString(value, "")
		value = eReplReg.ReplaceAllString(value, "")
		value = fmt.Sprint(childCutNotPrs([]interface{}{value, data[1]}, count+1)[0])
	}
	data[0] = value
	return data
}

//全部是汉字或者特殊符号的情况，清理掉
func ClearAllWord(data []interface{}) []interface{} {
	value := fmt.Sprint(data[0])
	reg := regexp.MustCompile("^[\u4e00-\u9fa5、,，.。?？'\"“”‘’·~！@#￥$%…&*（）()\\-—+=【】\\[\\]｛｝{}<>《》|\\/\\s]+$")
	data[0] = reg.ReplaceAllString(value, "")
	return data
}