main_test.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. package main
  2. import (
  3. "fmt"
  4. "jy/admin/track"
  5. "jy/extract"
  6. . "jy/mongodbutil"
  7. "log"
  8. "os"
  9. "regexp"
  10. "strconv"
  11. "strings"
  12. "testing"
  13. "time"
  14. )
  15. func Test_han(t *testing.T) {
  16. str := `[\u4e00-\u9fa5]` //"[\u4e00-\u9fa5]"
  17. //var rg = regexp.MustCompile(`[\u4e00-\u9fa5]`)会出错
  18. if strings.Contains(str, "\\u") {
  19. pattern, _ := strconv.Unquote(`"` + str + `"`)
  20. log.Println(pattern)
  21. }
  22. var rg = regexp.MustCompile(str)
  23. fmt.Println(rg.MatchString(str))
  24. os.Exit(0)
  25. }
  26. func Test_task(t *testing.T) {
  27. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
  28. //extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
  29. //extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "59e47b5a40d2d9bbe82296bf", "1", "result_mxs", "result_mxs")
  30. extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5e17e00e85a9271abf0860a6", "1", "result_mxs", "result_mxs")
  31. //extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
  32. time.Sleep(5 * time.Second)
  33. }
  34. func Test_extractcity(t *testing.T) {
  35. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
  36. extract.InitDFA2()
  37. //查询采购单位信息
  38. extract.FindBuyer()
  39. }
  40. func Test_reg(t *testing.T) {
  41. reg1, _ := regexp.Compile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
  42. log.Println("---", reg1.FindAllString("05939-5365001(兰陵县芦柞镇人民政府)", -1))
  43. reg2, _ := regexp.Compile("^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$")
  44. log.Println("---", reg2.MatchString("张女士/"))
  45. filterK := regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
  46. log.Println(filterK.FindString("二)采购项目联系人(代理机构)"))
  47. }
  48. func Test_reg1(t *testing.T) {
  49. context := `sss<input name="AgentCode" size="30" maxsize="50" value="91370800688271668P" class="textbox">
  50. dfdf<input type="hidden" name="AgentCode" size="30" maxsize="50" value="tttt" class="textbox"></input>`
  51. reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
  52. tmp := reg.ReplaceAllString(context, "$1")
  53. log.Println(tmp)
  54. }
  55. func Test_paths(t *testing.T) {
  56. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
  57. tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
  58. for code, v := range tracks {
  59. if tmp, ok := v.([]map[string]interface{}); ok {
  60. for k, v := range tmp {
  61. if k == 0 {
  62. log.Println(v)
  63. } else {
  64. log.Println(code, v["code"], v["value"])
  65. }
  66. }
  67. }
  68. break
  69. }
  70. }
  71. func Test_clear(t *testing.T) {
  72. value := "法拉(盛(客{)户)端副科级沙发俩括号的"
  73. log.Println("pre---", value)
  74. startChars := []string{"[((]", "[\\[【]", "[{{]", "[<《]", "〔"}
  75. endChars := []string{"[))]", "[\\]】]", "[}}]", "[>》]", "〕"}
  76. for k, v := range startChars {
  77. sReg := regexp.MustCompile(v)
  78. eReg := regexp.MustCompile(endChars[k])
  79. sIndex := sReg.FindAllStringIndex(value, -1)
  80. eIndex := eReg.FindAllStringIndex(value, -1)
  81. sCount := len(sIndex)
  82. eCount := len(eIndex)
  83. if sCount == eCount {
  84. continue
  85. }
  86. log.Println("value1---", value, sCount, eCount)
  87. //清理前面
  88. if sCount > eCount {
  89. value = value[sIndex[eCount][1]:]
  90. }
  91. log.Println("value2---", value)
  92. //清理后面
  93. if sCount < eCount {
  94. value = value[:eIndex[sCount][0]]
  95. }
  96. log.Println("value3---", value)
  97. }
  98. log.Println("value4---", value)
  99. //交叉出现情况处理
  100. sReplReg := regexp.MustCompile("[((\\[【{{〔<《][^))\\]】}}〕>》]*$")
  101. eReplReg := regexp.MustCompile("^[^((\\[【{{〔<《]*[))\\]】}}〕>》]")
  102. if sReplReg.MatchString(value) || eReplReg.MatchString(value) {
  103. value = sReplReg.ReplaceAllString(value, "")
  104. value = eReplReg.ReplaceAllString(value, "")
  105. //value = fmt.Sprint(childCutNotPrs([]interface{}{value, data[1]}, count+1)[0])
  106. }
  107. log.Println("result---", value)
  108. }