package main import ( "jy/admin/track" "jy/extract" . "jy/mongodbutil" "log" "regexp" "testing" "time" ) func Test_task(t *testing.T) { Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf") //extract.StartExtractTaskId("5b8f804025e29a290415aee1") extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5a999f3140d2d9bbe820dbb1", "5", "mxs_v3", "mxs_v3") //extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5beb99eaa5cb26b9b74c104b", "10", "mxs_v3", "mxs_v3") time.Sleep(5 * time.Second) } func Test_reghan(t *testing.T) { context := `你好` reg := regexp.MustCompile(`^[\p{Han}]+$`) //纯汉字 //reg := regexp.MustCompile(`[\\p{Han}]`) //含汉字 tmp := reg.MatchString(context) log.Println(tmp) } func Test_reg(t *testing.T) { context := `sss dfdf` reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`) tmp := reg.ReplaceAllString(context, "$1") log.Println(tmp) } func Test_paths(t *testing.T) { Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3") tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3") for code, v := range tracks { if tmp, ok := v.([]map[string]interface{}); ok { for k, v := range tmp { if k == 0 { log.Println(v) } else { log.Println(code, v["code"], v["value"]) } } } break } } func Test_reg3(t *testing.T) { text := []rune("(法撒旦法士大夫发的发)生(的]发的法旦法士大夫三发的)") for i := 1; i <= 2; i++ { if len(text) > 0 { text = aa12(i, text) } } log.Println("finish--", string(text)) } func aa12(i int, text []rune) []rune { pairedIndex := make(map[int]int) surplusMax := -1 //记录多余的反符号最大值 positiveMax := -1 //记录多余的正符号最大值 removeLength := 0 nb := 0 //na := 0 length := len(text) allSymbol := "[((\\[【{{〔<《))\\]】}}〕>》]" allReg := regexp.MustCompile(allSymbol) symmetricMap := map[string]string{ "]": "[", ")": "(", "】": "【", "}": "{", } symbolIndex := map[string][]int{} //记录符号和当前索引位置 //log.Println(string(text)) for index, t := range text { now := allReg.FindString(string(t)) if len(now) > 0 { //匹配到符号 if index == 0 { if symmetricMap[now] != "" { //去除第一个反符号 text = text[1:len(text)] } else if len(now) > 0 { //第一个是正符号,记录索引位置 tmpArr := []int{index} symbolIndex[now] = tmpArr } } else { if symmetricMap[now] != "" { //反向符号,找出对称的正向符号 fdSymbol := symmetricMap[now] //正向符号 tmp := symbolIndex[fdSymbol] if len(tmp) == 0 { //多出来的反向符号,记录最大值 //log.Println("多余反向符号----", now) if index > surplusMax { surplusMax = index } } else { nowIndex := tmp[len(tmp)-1] //索引位置 symbolIndex[fdSymbol] = tmp[:len(tmp)-1] //匹配索引位置后,删除之前的记录 if len(symbolIndex[fdSymbol]) == 0 { delete(symbolIndex, fdSymbol) } //将成对的符号的index记录, if index == length-1 { pairedIndex[index] = nowIndex } pairedIndex[nowIndex] = index } } else { //正向符号,加入symbolIndex记录索引 tmpArr := []int{} if len(symbolIndex[now]) > 0 { //有该符号的索引位置 tmpArr = symbolIndex[now] tmpArr = append(tmpArr, index) } else { //没有该符号的索引位置 tmpArr = []int{index} } symbolIndex[now] = tmpArr } } } } if len(symbolIndex) != 0 { //多余的正符号索引位置 for _, arr := range symbolIndex { for j, l := range arr { if j == 0 && l == 0 { text = text[1:] //删除text开头的正向符号 removeLength = 1 nb = nb + 1 } if positiveMax < l { //记录最大正向索引 positiveMax = l } } } } firstOpposite := pairedIndex[0] if firstOpposite != 0 { //第一个正符号对应反符号的位置 text = text[firstOpposite+1:] removeLength = firstOpposite + 1 nb = nb + removeLength } lastOpposite := pairedIndex[length-1] //最后一个符号 if lastOpposite > 0 { //有对称的正向符号,删除其中间内容 //na = length - lastOpposite text = text[:lastOpposite-removeLength] } else if surplusMax == length-1 { //没有对称,只删除最后一个反符号 text = text[:length-1-removeLength] //na = na + 1 } //有多余反向符号,删除之前部分 surplusMax所有多余反向符号的最大索引 if surplusMax != -1 && surplusMax > firstOpposite && surplusMax < length-1 { if (lastOpposite > 0 && surplusMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发}发(发发发发发发) text = text[surplusMax-nb+1:] nb = surplusMax + 1 } } //多余正符号删除之后部分(优先删除反符号之前部分)//(发发{发发)发发发发发发发发发发发发发发发(发{发) if positiveMax != -1 && positiveMax != 0 && positiveMax > surplusMax && positiveMax > firstOpposite { ////发发发发发发]发发{ if (lastOpposite > 0 && positiveMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发发发{发发发发发(发发) text = text[:positiveMax-nb] } } log.Println(string(text)) return text }