main_test.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. package main
  2. import (
  3. "jy/admin/track"
  4. "jy/extract"
  5. . "jy/mongodbutil"
  6. "log"
  7. "regexp"
  8. "testing"
  9. "time"
  10. )
  11. func Test_task(t *testing.T) {
  12. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
  13. //extract.StartExtractTaskId("5b8f804025e29a290415aee1")
  14. //extract.StartExtractTestTask("5be107e600746bf92debf080", "5a999f3140d2d9bbe820dbb1", "5", "mxs_v3", "mxs_v3")
  15. extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5beb99eaa5cb26b9b74c104b", "10", "mxs_v3", "mxs_v3")
  16. time.Sleep(5 * time.Second)
  17. }
  18. func Test_reghan(t *testing.T) {
  19. context := `你好`
  20. reg := regexp.MustCompile(`^[\p{Han}]+$`) //纯汉字
  21. //reg := regexp.MustCompile(`[\\p{Han}]`) //含汉字
  22. tmp := reg.MatchString(context)
  23. log.Println(tmp)
  24. }
  25. func Test_reg(t *testing.T) {
  26. context := `sss<input name="AgentCode" size="30" maxsize="50" value="91370800688271668P" class="textbox">
  27. dfdf<input type="hidden" name="AgentCode" size="30" maxsize="50" value="tttt" class="textbox"></input>`
  28. reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
  29. tmp := reg.ReplaceAllString(context, "$1")
  30. log.Println(tmp)
  31. }
  32. func Test_paths(t *testing.T) {
  33. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
  34. tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
  35. for code, v := range tracks {
  36. if tmp, ok := v.([]map[string]interface{}); ok {
  37. for k, v := range tmp {
  38. if k == 0 {
  39. log.Println(v)
  40. } else {
  41. log.Println(code, v["code"], v["value"])
  42. }
  43. }
  44. }
  45. break
  46. }
  47. }
  48. func Test_reg3(t *testing.T) {
  49. text := []rune("(法撒旦法士大夫发的发)生(的]发的法旦法士大夫三发的)")
  50. for i := 1; i <= 2; i++ {
  51. if len(text) > 0 {
  52. text = aa12(i, text)
  53. }
  54. }
  55. log.Println("finish--", string(text))
  56. }
  57. func aa12(i int, text []rune) []rune {
  58. pairedIndex := make(map[int]int)
  59. surplusMax := -1 //记录多余的反符号最大值
  60. positiveMax := -1 //记录多余的正符号最大值
  61. removeLength := 0
  62. nb := 0
  63. //na := 0
  64. length := len(text)
  65. allSymbol := "[((\\[【{{〔<《))\\]】}}〕>》]"
  66. allReg := regexp.MustCompile(allSymbol)
  67. symmetricMap := map[string]string{
  68. "]": "[",
  69. ")": "(",
  70. "】": "【",
  71. "}": "{",
  72. }
  73. symbolIndex := map[string][]int{} //记录符号和当前索引位置
  74. //log.Println(string(text))
  75. for index, t := range text {
  76. now := allReg.FindString(string(t))
  77. if len(now) > 0 { //匹配到符号
  78. if index == 0 {
  79. if symmetricMap[now] != "" { //去除第一个反符号
  80. text = text[1:len(text)]
  81. } else if len(now) > 0 { //第一个是正符号,记录索引位置
  82. tmpArr := []int{index}
  83. symbolIndex[now] = tmpArr
  84. }
  85. } else {
  86. if symmetricMap[now] != "" { //反向符号,找出对称的正向符号
  87. fdSymbol := symmetricMap[now] //正向符号
  88. tmp := symbolIndex[fdSymbol]
  89. if len(tmp) == 0 { //多出来的反向符号,记录最大值
  90. //log.Println("多余反向符号----", now)
  91. if index > surplusMax {
  92. surplusMax = index
  93. }
  94. } else {
  95. nowIndex := tmp[len(tmp)-1] //索引位置
  96. symbolIndex[fdSymbol] = tmp[:len(tmp)-1] //匹配索引位置后,删除之前的记录
  97. if len(symbolIndex[fdSymbol]) == 0 {
  98. delete(symbolIndex, fdSymbol)
  99. }
  100. //将成对的符号的index记录,
  101. if index == length-1 {
  102. pairedIndex[index] = nowIndex
  103. }
  104. pairedIndex[nowIndex] = index
  105. }
  106. } else { //正向符号,加入symbolIndex记录索引
  107. tmpArr := []int{}
  108. if len(symbolIndex[now]) > 0 { //有该符号的索引位置
  109. tmpArr = symbolIndex[now]
  110. tmpArr = append(tmpArr, index)
  111. } else { //没有该符号的索引位置
  112. tmpArr = []int{index}
  113. }
  114. symbolIndex[now] = tmpArr
  115. }
  116. }
  117. }
  118. }
  119. if len(symbolIndex) != 0 { //多余的正符号索引位置
  120. for _, arr := range symbolIndex {
  121. for j, l := range arr {
  122. if j == 0 && l == 0 {
  123. text = text[1:] //删除text开头的正向符号
  124. removeLength = 1
  125. nb = nb + 1
  126. }
  127. if positiveMax < l { //记录最大正向索引
  128. positiveMax = l
  129. }
  130. }
  131. }
  132. }
  133. firstOpposite := pairedIndex[0]
  134. if firstOpposite != 0 { //第一个正符号对应反符号的位置
  135. text = text[firstOpposite+1:]
  136. removeLength = firstOpposite + 1
  137. nb = nb + removeLength
  138. }
  139. lastOpposite := pairedIndex[length-1] //最后一个符号
  140. if lastOpposite > 0 { //有对称的正向符号,删除其中间内容
  141. //na = length - lastOpposite
  142. text = text[:lastOpposite-removeLength]
  143. } else if surplusMax == length-1 { //没有对称,只删除最后一个反符号
  144. text = text[:length-1-removeLength]
  145. //na = na + 1
  146. }
  147. //有多余反向符号,删除之前部分 surplusMax所有多余反向符号的最大索引
  148. if surplusMax != -1 && surplusMax > firstOpposite && surplusMax < length-1 {
  149. if (lastOpposite > 0 && surplusMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发}发(发发发发发发)
  150. text = text[surplusMax-nb+1:]
  151. nb = surplusMax + 1
  152. }
  153. }
  154. //多余正符号删除之后部分(优先删除反符号之前部分)//(发发{发发)发发发发发发发发发发发发发发发(发{发)
  155. if positiveMax != -1 && positiveMax != 0 && positiveMax > surplusMax && positiveMax > firstOpposite { ////发发发发发发]发发{
  156. if (lastOpposite > 0 && positiveMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发发发{发发发发发(发发)
  157. text = text[:positiveMax-nb]
  158. }
  159. }
  160. log.Println(string(text))
  161. return text
  162. }