package main
import (
"fmt"
"jy/admin/track"
"jy/clear"
"jy/extract"
. "jy/mongodbutil"
"log"
"os"
"regexp"
"strconv"
"strings"
"testing"
"time"
)
func Test_han(t *testing.T) {
str := `[\u4e00-\u9fa5]` //"[\u4e00-\u9fa5]"
//var rg = regexp.MustCompile(`[\u4e00-\u9fa5]`)会出错
if strings.Contains(str, "\\u") {
pattern, _ := strconv.Unquote(`"` + str + `"`)
log.Println(pattern)
}
var rg = regexp.MustCompile(str)
fmt.Println(rg.MatchString(str))
os.Exit(0)
}
func Test_task(t *testing.T) {
Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5a53966e40d2d9bbe8f7d30a", "1", "mxs_v2", "mxs_v2")
//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
time.Sleep(5 * time.Second)
}
func Test_extractcity(t *testing.T) {
Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
extract.InitDFA2()
//查询采购单位信息
extract.FindBuyer()
}
func Test_reg(t *testing.T) {
context := `sfsa.`
reg := regexp.MustCompile(`(勘察|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|[,,\.。、::“”‘’"])`)
//reg := regexp.MustCompile(`[\\p{Han}]`)
tmp := reg.MatchString(context)
log.Println(tmp)
}
func Test_reg1(t *testing.T) {
context := `sss
dfdf`
reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
tmp := reg.ReplaceAllString(context, "$1")
log.Println(tmp)
}
func Test_paths(t *testing.T) {
Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
for code, v := range tracks {
if tmp, ok := v.([]map[string]interface{}); ok {
for k, v := range tmp {
if k == 0 {
log.Println(v)
} else {
log.Println(code, v["code"], v["value"])
}
}
}
break
}
}
func Test_clear(t *testing.T) {
text := clear.OtherClean("winner", "宁夏泷泽医疗器械有限公司(地址:银川市兴庆区绿地214商城D区7号楼317房)")
log.Println(text)
}
func Test_reg3(t *testing.T) {
text := []rune("(法撒旦法士大夫发的发)生(的]发的法旦法士大夫三发的)")
for i := 1; i <= 2; i++ {
if len(text) > 0 {
text = gl(i, text)
}
}
log.Println("finish--", string(text))
}
func gl(i int, text []rune) []rune {
pairedIndex := make(map[int]int)
surplusMax := -1 //记录多余的反符号最大值
positiveMax := -1 //记录多余的正符号最大值
removeLength := 0
nb := 0
//na := 0
length := len(text)
allSymbol := "[((\\[【{{〔<《))\\]】}}〕>》]"
allReg := regexp.MustCompile(allSymbol)
symmetricMap := map[string]string{
"]": "[",
")": "(",
"】": "【",
"}": "{",
}
symbolIndex := map[string][]int{} //记录符号和当前索引位置
//log.Println(string(text))
for index, t := range text {
now := allReg.FindString(string(t))
if len(now) > 0 { //匹配到符号
if index == 0 {
if symmetricMap[now] != "" { //去除第一个反符号
text = text[1:len(text)]
} else if len(now) > 0 { //第一个是正符号,记录索引位置
tmpArr := []int{index}
symbolIndex[now] = tmpArr
}
} else {
if symmetricMap[now] != "" { //反向符号,找出对称的正向符号
fdSymbol := symmetricMap[now] //正向符号
tmp := symbolIndex[fdSymbol]
if len(tmp) == 0 { //多出来的反向符号,记录最大值
//log.Println("多余反向符号----", now)
if index > surplusMax {
surplusMax = index
}
} else {
nowIndex := tmp[len(tmp)-1] //索引位置
symbolIndex[fdSymbol] = tmp[:len(tmp)-1] //匹配索引位置后,删除之前的记录
if len(symbolIndex[fdSymbol]) == 0 {
delete(symbolIndex, fdSymbol)
}
//将成对的符号的index记录,
if index == length-1 {
pairedIndex[index] = nowIndex
}
pairedIndex[nowIndex] = index
}
} else { //正向符号,加入symbolIndex记录索引
tmpArr := []int{}
if len(symbolIndex[now]) > 0 { //有该符号的索引位置
tmpArr = symbolIndex[now]
tmpArr = append(tmpArr, index)
} else { //没有该符号的索引位置
tmpArr = []int{index}
}
symbolIndex[now] = tmpArr
}
}
}
}
if len(symbolIndex) != 0 { //多余的正符号索引位置
for _, arr := range symbolIndex {
for j, l := range arr {
if j == 0 && l == 0 {
text = text[1:] //删除text开头的正向符号
removeLength = 1
nb = nb + 1
}
if positiveMax < l { //记录最大正向索引
positiveMax = l
}
}
}
}
firstOpposite := pairedIndex[0]
if firstOpposite != 0 { //第一个正符号对应反符号的位置
text = text[firstOpposite+1:]
removeLength = firstOpposite + 1
nb = nb + removeLength
}
lastOpposite := pairedIndex[length-1] //最后一个符号
if lastOpposite > 0 { //有对称的正向符号,删除其中间内容
//na = length - lastOpposite
text = text[:lastOpposite-removeLength]
} else if surplusMax == length-1 { //没有对称,只删除最后一个反符号
text = text[:length-1-removeLength]
//na = na + 1
}
//有多余反向符号,删除之前部分 surplusMax所有多余反向符号的最大索引
if surplusMax != -1 && surplusMax > firstOpposite && surplusMax < length-1 {
if (lastOpposite > 0 && surplusMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发}发(发发发发发发)
text = text[surplusMax-nb+1:]
nb = surplusMax + 1
}
}
//多余正符号删除之后部分(优先删除反符号之前部分)//(发发{发发)发发发发发发发发发发发发发发发(发{发)
if positiveMax != -1 && positiveMax != 0 && positiveMax > surplusMax && positiveMax > firstOpposite { ////发发发发发发]发发{
if (lastOpposite > 0 && positiveMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发发发{发发发发发(发发)
text = text[:positiveMax-nb]
}
}
log.Println(string(text))
return text
}