jykeyword.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. package main
  2. import (
  3. "encoding/json"
  4. "field_sync/config"
  5. "log"
  6. "regexp"
  7. "strings"
  8. "time"
  9. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mfw"
  11. )
  12. var (
  13. client *mfw.Client
  14. reg = regexp.MustCompile("^[0-9a-zA-Z-.]+$")
  15. regNo = regexp.MustCompile("^[0-9]*$")
  16. regSpace = regexp.MustCompile("(?ism)(<style.*?>.*?</style>)|([.#]?\\w{1,20}\\{.*?\\})|(<.*?>)|(\\\\t)+|\\t|( +)|( +)|(" + string(rune(160)) + "+)")
  17. regRow = regexp.MustCompile("(?i)<(tr|div|p)[^>]*?>|(\\n)+")
  18. regDh = regexp.MustCompile("[,]+")
  19. regNewDb = regexp.MustCompile("([:,、:,。.;])[,]")
  20. DesLen = 120
  21. )
  22. func inits() {
  23. cf := &mfw.ClientConfig{
  24. ClientName: "剑鱼抽关键词",
  25. EventHandler: func(p *mfw.Packet) {},
  26. MsgServerAddr: config.Conf.Serve.MsgAddr,
  27. CanHandleEvents: []int{},
  28. OnConnectSuccess: func() {
  29. log.Println("剑鱼关键词 client")
  30. },
  31. ReadBufferSize: 10,
  32. WriteBufferSize: 10,
  33. }
  34. client, _ = mfw.NewClient(cf)
  35. }
  36. var keypool = make(chan bool, 1)
  37. func DealInfo(obj, update *map[string]interface{}) {
  38. defer util.Catch()
  39. if (*obj)["keywords"] != nil && (*obj)["description"] != nil {
  40. return
  41. } else {
  42. (*update)["keywords"] = ""
  43. (*update)["description"] = ""
  44. }
  45. title := util.ObjToString((*obj)["title"])
  46. var m [][]string
  47. select {
  48. case <-func() <-chan bool {
  49. ch := make(chan bool, 1)
  50. go func(chan bool) {
  51. select {
  52. case keypool <- true:
  53. defer func() {
  54. <-keypool
  55. }()
  56. ret, _ := client.Call("", mfw.UUID(8), 4010, mfw.SENDTO_TYPE_RAND_RECIVER, title, 1)
  57. json.Unmarshal(ret, &m)
  58. case <-time.After(10 * time.Millisecond):
  59. }
  60. ch <- true
  61. }(ch)
  62. return ch
  63. }():
  64. case <-time.After(40 * time.Millisecond):
  65. }
  66. arr := []string{}
  67. keyword := []string{}
  68. keywordnew := []string{}
  69. for _, tmp := range m {
  70. if reg.MatchString(tmp[0]) {
  71. arr = append(arr, tmp[0])
  72. } else {
  73. if len(arr) > 0 {
  74. str := strings.Join(arr, "")
  75. keyword = append(keyword, str)
  76. arr = []string{}
  77. }
  78. if len(tmp[0]) > 3 && (strings.HasPrefix(tmp[1], "n") || tmp[1] == "v" || tmp[1] == "vn" || strings.HasPrefix(tmp[1], "g")) {
  79. keyword = append(keyword, tmp[0])
  80. }
  81. }
  82. }
  83. for _, v := range keyword {
  84. v = regNo.ReplaceAllString(v, "")
  85. if len(v) > 0 {
  86. keywordnew = append(keywordnew, v)
  87. }
  88. }
  89. keywords := strings.Join(keywordnew, ",")
  90. (*update)["keywords"] = keywords
  91. content := ""
  92. if (*obj)["detail_bak"] != nil {
  93. content = util.ObjToString((*obj)["detail_bak"])
  94. } else {
  95. content = util.ObjToString((*obj)["detail"])
  96. }
  97. //内容替换
  98. content = strings.Replace(content, " ", "", -1)
  99. content = regSpace.ReplaceAllString(content, "")
  100. content = regRow.ReplaceAllString(content, ",")
  101. content = regDh.ReplaceAllString(content, ",")
  102. content = regNewDb.ReplaceAllString(content, "$1")
  103. if strings.HasPrefix(content, ",") {
  104. content = content[1:]
  105. }
  106. tc := []rune(content)
  107. ltc := len(tc)
  108. description := content
  109. if ltc > DesLen {
  110. description = string(tc[:DesLen])
  111. }
  112. (*update)["description"] = description
  113. //保存到数据库
  114. return
  115. }