main_test.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. package main
  2. import (
  3. "fmt"
  4. "jy/admin/track"
  5. "jy/clear"
  6. "jy/extract"
  7. . "jy/mongodbutil"
  8. "log"
  9. "os"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. "testing"
  14. "time"
  15. )
  16. func Test_han(t *testing.T) {
  17. str := `[\u4e00-\u9fa5]` //"[\u4e00-\u9fa5]"
  18. //var rg = regexp.MustCompile(`[\u4e00-\u9fa5]`)会出错
  19. if strings.Contains(str, "\\u") {
  20. pattern, _ := strconv.Unquote(`"` + str + `"`)
  21. log.Println(pattern)
  22. }
  23. var rg = regexp.MustCompile(str)
  24. fmt.Println(rg.MatchString(str))
  25. os.Exit(0)
  26. }
  27. func Test_task(t *testing.T) {
  28. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
  29. //extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
  30. extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5a53966e40d2d9bbe8f7d30a", "1", "mxs_v2", "mxs_v2")
  31. //extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
  32. time.Sleep(5 * time.Second)
  33. }
  34. func Test_extractcity(t *testing.T) {
  35. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
  36. extract.InitDFA2()
  37. //查询采购单位信息
  38. extract.FindBuyer()
  39. }
  40. func Test_reg(t *testing.T) {
  41. context := `sfsa.`
  42. reg := regexp.MustCompile(`(勘察|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|[,,\.。、::“”‘’"])`)
  43. //reg := regexp.MustCompile(`[\\p{Han}]`)
  44. tmp := reg.MatchString(context)
  45. log.Println(tmp)
  46. }
  47. func Test_reg1(t *testing.T) {
  48. context := `sss<input name="AgentCode" size="30" maxsize="50" value="91370800688271668P" class="textbox">
  49. dfdf<input type="hidden" name="AgentCode" size="30" maxsize="50" value="tttt" class="textbox"></input>`
  50. reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
  51. tmp := reg.ReplaceAllString(context, "$1")
  52. log.Println(tmp)
  53. }
  54. func Test_paths(t *testing.T) {
  55. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
  56. tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
  57. for code, v := range tracks {
  58. if tmp, ok := v.([]map[string]interface{}); ok {
  59. for k, v := range tmp {
  60. if k == 0 {
  61. log.Println(v)
  62. } else {
  63. log.Println(code, v["code"], v["value"])
  64. }
  65. }
  66. }
  67. break
  68. }
  69. }
  70. func Test_clear(t *testing.T) {
  71. text := clear.OtherClean("winner", "宁夏泷泽医疗器械有限公司(地址:银川市兴庆区绿地214商城D区7号楼317房)")
  72. log.Println(text)
  73. }
  74. func Test_reg3(t *testing.T) {
  75. text := []rune("(法撒旦法士大夫发的发)生(的]发的法旦法士大夫三发的)")
  76. for i := 1; i <= 2; i++ {
  77. if len(text) > 0 {
  78. text = gl(i, text)
  79. }
  80. }
  81. log.Println("finish--", string(text))
  82. }
  83. func gl(i int, text []rune) []rune {
  84. pairedIndex := make(map[int]int)
  85. surplusMax := -1 //记录多余的反符号最大值
  86. positiveMax := -1 //记录多余的正符号最大值
  87. removeLength := 0
  88. nb := 0
  89. //na := 0
  90. length := len(text)
  91. allSymbol := "[((\\[【{{〔<《))\\]】}}〕>》]"
  92. allReg := regexp.MustCompile(allSymbol)
  93. symmetricMap := map[string]string{
  94. "]": "[",
  95. ")": "(",
  96. "】": "【",
  97. "}": "{",
  98. }
  99. symbolIndex := map[string][]int{} //记录符号和当前索引位置
  100. //log.Println(string(text))
  101. for index, t := range text {
  102. now := allReg.FindString(string(t))
  103. if len(now) > 0 { //匹配到符号
  104. if index == 0 {
  105. if symmetricMap[now] != "" { //去除第一个反符号
  106. text = text[1:len(text)]
  107. } else if len(now) > 0 { //第一个是正符号,记录索引位置
  108. tmpArr := []int{index}
  109. symbolIndex[now] = tmpArr
  110. }
  111. } else {
  112. if symmetricMap[now] != "" { //反向符号,找出对称的正向符号
  113. fdSymbol := symmetricMap[now] //正向符号
  114. tmp := symbolIndex[fdSymbol]
  115. if len(tmp) == 0 { //多出来的反向符号,记录最大值
  116. //log.Println("多余反向符号----", now)
  117. if index > surplusMax {
  118. surplusMax = index
  119. }
  120. } else {
  121. nowIndex := tmp[len(tmp)-1] //索引位置
  122. symbolIndex[fdSymbol] = tmp[:len(tmp)-1] //匹配索引位置后,删除之前的记录
  123. if len(symbolIndex[fdSymbol]) == 0 {
  124. delete(symbolIndex, fdSymbol)
  125. }
  126. //将成对的符号的index记录,
  127. if index == length-1 {
  128. pairedIndex[index] = nowIndex
  129. }
  130. pairedIndex[nowIndex] = index
  131. }
  132. } else { //正向符号,加入symbolIndex记录索引
  133. tmpArr := []int{}
  134. if len(symbolIndex[now]) > 0 { //有该符号的索引位置
  135. tmpArr = symbolIndex[now]
  136. tmpArr = append(tmpArr, index)
  137. } else { //没有该符号的索引位置
  138. tmpArr = []int{index}
  139. }
  140. symbolIndex[now] = tmpArr
  141. }
  142. }
  143. }
  144. }
  145. if len(symbolIndex) != 0 { //多余的正符号索引位置
  146. for _, arr := range symbolIndex {
  147. for j, l := range arr {
  148. if j == 0 && l == 0 {
  149. text = text[1:] //删除text开头的正向符号
  150. removeLength = 1
  151. nb = nb + 1
  152. }
  153. if positiveMax < l { //记录最大正向索引
  154. positiveMax = l
  155. }
  156. }
  157. }
  158. }
  159. firstOpposite := pairedIndex[0]
  160. if firstOpposite != 0 { //第一个正符号对应反符号的位置
  161. text = text[firstOpposite+1:]
  162. removeLength = firstOpposite + 1
  163. nb = nb + removeLength
  164. }
  165. lastOpposite := pairedIndex[length-1] //最后一个符号
  166. if lastOpposite > 0 { //有对称的正向符号,删除其中间内容
  167. //na = length - lastOpposite
  168. text = text[:lastOpposite-removeLength]
  169. } else if surplusMax == length-1 { //没有对称,只删除最后一个反符号
  170. text = text[:length-1-removeLength]
  171. //na = na + 1
  172. }
  173. //有多余反向符号,删除之前部分 surplusMax所有多余反向符号的最大索引
  174. if surplusMax != -1 && surplusMax > firstOpposite && surplusMax < length-1 {
  175. if (lastOpposite > 0 && surplusMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发}发(发发发发发发)
  176. text = text[surplusMax-nb+1:]
  177. nb = surplusMax + 1
  178. }
  179. }
  180. //多余正符号删除之后部分(优先删除反符号之前部分)//(发发{发发)发发发发发发发发发发发发发发发(发{发)
  181. if positiveMax != -1 && positiveMax != 0 && positiveMax > surplusMax && positiveMax > firstOpposite { ////发发发发发发]发发{
  182. if (lastOpposite > 0 && positiveMax < lastOpposite) || (lastOpposite == 0) { //发发发发发发发发{发发发发发(发发)
  183. text = text[:positiveMax-nb]
  184. }
  185. }
  186. log.Println(string(text))
  187. return text
  188. }