main_test.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. package main
  2. import (
  3. "fmt"
  4. "jy/admin/track"
  5. "jy/clear"
  6. "jy/extract"
  7. . "jy/mongodbutil"
  8. "log"
  9. "os"
  10. "qfw/util"
  11. "regexp"
  12. "strconv"
  13. "strings"
  14. "testing"
  15. "time"
  16. )
  17. func Test_han(t *testing.T) {
  18. str := `[\u4e00-\u9fa5]` //"[\u4e00-\u9fa5]"
  19. //var rg = regexp.MustCompile(`[\u4e00-\u9fa5]`)会出错
  20. if strings.Contains(str, "\\u") {
  21. pattern, _ := strconv.Unquote(`"` + str + `"`)
  22. log.Println(pattern)
  23. }
  24. var rg = regexp.MustCompile(str)
  25. fmt.Println(rg.MatchString(str))
  26. os.Exit(0)
  27. }
  28. func Test_task(t *testing.T) {
  29. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
  30. //extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
  31. //extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "5df59ee3e9d1f601e46fc3f9", "1", "mxs_v1", "mxs_v1")
  32. extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5df50776e9d1f601e4964179", "1", "mxs_v1", "mxs_v2")
  33. //extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
  34. time.Sleep(5 * time.Second)
  35. }
  36. func Test_extractcity(t *testing.T) {
  37. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
  38. extract.InitDFA2()
  39. //查询采购单位信息
  40. extract.FindBuyer()
  41. }
  42. func Test_reg(t *testing.T) {
  43. reg1, _ := regexp.Compile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
  44. log.Println("---", reg1.FindAllString("05939-5365001(兰陵县芦柞镇人民政府)", -1))
  45. reg2, _ := regexp.Compile("^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$")
  46. log.Println("---", reg2.MatchString("张女士/"))
  47. filterK := regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
  48. log.Println(filterK.FindString("二)采购项目联系人(代理机构)"))
  49. }
  50. func Test_reg1(t *testing.T) {
  51. context := `sss<input name="AgentCode" size="30" maxsize="50" value="91370800688271668P" class="textbox">
  52. dfdf<input type="hidden" name="AgentCode" size="30" maxsize="50" value="tttt" class="textbox"></input>`
  53. reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
  54. tmp := reg.ReplaceAllString(context, "$1")
  55. log.Println(tmp)
  56. }
  57. func Test_paths(t *testing.T) {
  58. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
  59. tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
  60. for code, v := range tracks {
  61. if tmp, ok := v.([]map[string]interface{}); ok {
  62. for k, v := range tmp {
  63. if k == 0 {
  64. log.Println(v)
  65. } else {
  66. log.Println(code, v["code"], v["value"])
  67. }
  68. }
  69. }
  70. break
  71. }
  72. }
  73. func Test_clear(t *testing.T) {
  74. value := "法拉(盛(客{)户)端副科级沙发俩括号的"
  75. log.Println("pre---", value)
  76. startChars := []string{"[((]", "[\\[【]", "[{{]", "[<《]", "〔"}
  77. endChars := []string{"[))]", "[\\]】]", "[}}]", "[>》]", "〕"}
  78. for k, v := range startChars {
  79. sReg := regexp.MustCompile(v)
  80. eReg := regexp.MustCompile(endChars[k])
  81. sIndex := sReg.FindAllStringIndex(value, -1)
  82. eIndex := eReg.FindAllStringIndex(value, -1)
  83. sCount := len(sIndex)
  84. eCount := len(eIndex)
  85. if sCount == eCount {
  86. continue
  87. }
  88. log.Println("value1---", value, sCount, eCount)
  89. //清理前面
  90. if sCount > eCount {
  91. value = value[sIndex[eCount][1]:]
  92. }
  93. log.Println("value2---", value)
  94. //清理后面
  95. if sCount < eCount {
  96. value = value[:eIndex[sCount][0]]
  97. }
  98. log.Println("value3---", value)
  99. }
  100. log.Println("value4---", value)
  101. //交叉出现情况处理
  102. sReplReg := regexp.MustCompile("[((\\[【{{〔<《][^))\\]】}}〕>》]*$")
  103. eReplReg := regexp.MustCompile("^[^((\\[【{{〔<《]*[))\\]】}}〕>》]")
  104. if sReplReg.MatchString(value) || eReplReg.MatchString(value) {
  105. value = sReplReg.ReplaceAllString(value, "")
  106. value = eReplReg.ReplaceAllString(value, "")
  107. //value = fmt.Sprint(childCutNotPrs([]interface{}{value, data[1]}, count+1)[0])
  108. }
  109. log.Println("result---", value)
  110. }
  111. func Test_buyer(t *testing.T) {
  112. Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
  113. demo, _ := Mgo.Find("demo_data", nil, `{"_id:1"}`, `{"buyer":1,"title":1}`, false, -1, -1)
  114. result, _ := Mgo.Find("mxs_buyer", nil, `{"_id:1"}`, `{"buyer":1}`, false, -1, -1)
  115. for _, d := range *demo {
  116. id1 := util.BsonIdToSId(d["_id"])
  117. buyer1 := util.ObjToString(d["buyer"])
  118. //title := util.ObjToString(d["title"])
  119. for _, r := range *result {
  120. id2 := util.BsonIdToSId(r["_id"])
  121. buyer2 := util.ObjToString(r["buyer"])
  122. if id1 == id2 {
  123. if buyer1 != buyer2 {
  124. util.Debug(id1, buyer1, buyer2)
  125. }
  126. break
  127. }
  128. }
  129. }
  130. }
  131. func Test_util1(t *testing.T) {
  132. data := clear.CutSymbol([]interface{}{"----------123123", "-----123123"})
  133. fmt.Println(data)
  134. }