main_test.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. package main
  2. import (
  3. "fmt"
  4. "jy/admin/track"
  5. "jy/extract"
  6. . "jy/mongodbutil"
  7. "log"
  8. "os"
  9. "qfw/util"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. "testing"
  14. "time"
  15. )
  16. func Test_han(t *testing.T) {
  17. str := `[\u4e00-\u9fa5]` //"[\u4e00-\u9fa5]"
  18. //var rg = regexp.MustCompile(`[\u4e00-\u9fa5]`)会出错
  19. if strings.Contains(str, "\\u") {
  20. pattern, _ := strconv.Unquote(`"` + str + `"`)
  21. log.Println(pattern)
  22. }
  23. var rg = regexp.MustCompile(str)
  24. fmt.Println(rg.MatchString(str))
  25. os.Exit(0)
  26. }
  27. func Test_task(t *testing.T) {
  28. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
  29. //extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
  30. //extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "5df59ee3e9d1f601e46fc3f9", "1", "mxs_v1", "mxs_v1")
  31. extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5df50776e9d1f601e4964179", "1", "mxs_v1", "mxs_v2")
  32. //extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
  33. time.Sleep(5 * time.Second)
  34. }
  35. func Test_extractcity(t *testing.T) {
  36. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
  37. extract.InitDFA2()
  38. //查询采购单位信息
  39. extract.FindBuyer()
  40. }
  41. func Test_reg(t *testing.T) {
  42. reg1, _ := regexp.Compile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
  43. log.Println("---", reg1.FindAllString("05939-5365001(兰陵县芦柞镇人民政府)", -1))
  44. reg2, _ := regexp.Compile("^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$")
  45. log.Println("---", reg2.MatchString("张女士/"))
  46. filterK := regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
  47. log.Println(filterK.FindString("二)采购项目联系人(代理机构)"))
  48. }
  49. func Test_reg1(t *testing.T) {
  50. context := `sss<input name="AgentCode" size="30" maxsize="50" value="91370800688271668P" class="textbox">
  51. dfdf<input type="hidden" name="AgentCode" size="30" maxsize="50" value="tttt" class="textbox"></input>`
  52. reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
  53. tmp := reg.ReplaceAllString(context, "$1")
  54. log.Println(tmp)
  55. }
  56. func Test_paths(t *testing.T) {
  57. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
  58. tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
  59. for code, v := range tracks {
  60. if tmp, ok := v.([]map[string]interface{}); ok {
  61. for k, v := range tmp {
  62. if k == 0 {
  63. log.Println(v)
  64. } else {
  65. log.Println(code, v["code"], v["value"])
  66. }
  67. }
  68. }
  69. break
  70. }
  71. }
  72. func Test_clear(t *testing.T) {
  73. value := "法拉(盛(客{)户)端副科级沙发俩括号的"
  74. log.Println("pre---", value)
  75. startChars := []string{"[((]", "[\\[【]", "[{{]", "[<《]", "〔"}
  76. endChars := []string{"[))]", "[\\]】]", "[}}]", "[>》]", "〕"}
  77. for k, v := range startChars {
  78. sReg := regexp.MustCompile(v)
  79. eReg := regexp.MustCompile(endChars[k])
  80. sIndex := sReg.FindAllStringIndex(value, -1)
  81. eIndex := eReg.FindAllStringIndex(value, -1)
  82. sCount := len(sIndex)
  83. eCount := len(eIndex)
  84. if sCount == eCount {
  85. continue
  86. }
  87. log.Println("value1---", value, sCount, eCount)
  88. //清理前面
  89. if sCount > eCount {
  90. value = value[sIndex[eCount][1]:]
  91. }
  92. log.Println("value2---", value)
  93. //清理后面
  94. if sCount < eCount {
  95. value = value[:eIndex[sCount][0]]
  96. }
  97. log.Println("value3---", value)
  98. }
  99. log.Println("value4---", value)
  100. //交叉出现情况处理
  101. sReplReg := regexp.MustCompile("[((\\[【{{〔<《][^))\\]】}}〕>》]*$")
  102. eReplReg := regexp.MustCompile("^[^((\\[【{{〔<《]*[))\\]】}}〕>》]")
  103. if sReplReg.MatchString(value) || eReplReg.MatchString(value) {
  104. value = sReplReg.ReplaceAllString(value, "")
  105. value = eReplReg.ReplaceAllString(value, "")
  106. //value = fmt.Sprint(childCutNotPrs([]interface{}{value, data[1]}, count+1)[0])
  107. }
  108. log.Println("result---", value)
  109. }
  110. func Test_buyer(t *testing.T) {
  111. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
  112. demo, _ := Mgo.Find("demo_data", nil, `{"_id:1"}`, `{"buyer":1,"title":1}`, false, -1, -1)
  113. result, _ := Mgo.Find("mxs_buyer", nil, `{"_id:1"}`, `{"buyer":1}`, false, -1, -1)
  114. for _, d := range *demo {
  115. id1 := util.BsonIdToSId(d["_id"])
  116. buyer1 := util.ObjToString(d["buyer"])
  117. //title := util.ObjToString(d["title"])
  118. for _, r := range *result {
  119. id2 := util.BsonIdToSId(r["_id"])
  120. buyer2 := util.ObjToString(r["buyer"])
  121. if id1 == id2 {
  122. if buyer1 != buyer2 {
  123. util.Debug(id1, buyer1, buyer2)
  124. }
  125. break
  126. }
  127. }
  128. }
  129. }