keyword.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. package main
  2. import (
  3. "encoding/json"
  4. u "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mfw"
  6. "log"
  7. "regexp"
  8. "strings"
  9. "time"
  10. )
  11. var (
  12. reg = regexp.MustCompile("^[0-9a-zA-Z-.]+$")
  13. regNo = regexp.MustCompile("^[0-9]*$")
  14. regSpace = regexp.MustCompile("(?ism)(<style.*?>.*?</style>)|([.#]?\\w{1,20}\\{.*?\\})|(<.*?>)|(\\\\t)+|\\t|( +)|( +)|(" + string(rune(160)) + "+)")
  15. regRow = regexp.MustCompile("(?i)<(tr|div|p)[^>]*?>|(\\n)+")
  16. regDh = regexp.MustCompile("[,]+")
  17. regNewDb = regexp.MustCompile("([:,、:,。.;])[,]")
  18. DesLen = 120
  19. keypool = make(chan bool, 1)
  20. client *mfw.Client
  21. )
  22. func InitKeywordClient() {
  23. cf := &mfw.ClientConfig{
  24. ClientName: "剑鱼抽关键词",
  25. EventHandler: func(p *mfw.Packet) {},
  26. MsgServerAddr: conf.Config.KeywordAddr,
  27. CanHandleEvents: []int{},
  28. OnConnectSuccess: func() {
  29. log.Println("剑鱼关键词 client")
  30. },
  31. ReadBufferSize: 10,
  32. WriteBufferSize: 10,
  33. }
  34. client, _ = mfw.NewClient(cf)
  35. }
  36. func DealInfo(bid, data map[string]interface{}) {
  37. defer u.Catch()
  38. if bid["keywords"] != nil && bid["description"] != nil {
  39. return
  40. } else {
  41. data["keywords"] = ""
  42. data["description"] = ""
  43. }
  44. title := u.ObjToString(bid["title"])
  45. var m [][]string
  46. select {
  47. case <-func() <-chan bool {
  48. ch := make(chan bool, 1)
  49. go func(chan bool) {
  50. select {
  51. case keypool <- true:
  52. defer func() {
  53. <-keypool
  54. }()
  55. ret, _ := client.Call("", mfw.UUID(8), 4010, mfw.SENDTO_TYPE_RAND_RECIVER, title, 1)
  56. json.Unmarshal(ret, &m)
  57. case <-time.After(10 * time.Millisecond):
  58. }
  59. ch <- true
  60. }(ch)
  61. return ch
  62. }():
  63. case <-time.After(40 * time.Millisecond):
  64. }
  65. arr := []string{}
  66. keyword := []string{}
  67. keywordnew := []string{}
  68. for _, tmp := range m {
  69. if reg.MatchString(tmp[0]) {
  70. arr = append(arr, tmp[0])
  71. } else {
  72. if len(arr) > 0 {
  73. str := strings.Join(arr, "")
  74. keyword = append(keyword, str)
  75. arr = []string{}
  76. }
  77. if len(tmp[0]) > 3 && (strings.HasPrefix(tmp[1], "n") || tmp[1] == "v" || tmp[1] == "vn" || strings.HasPrefix(tmp[1], "g")) {
  78. keyword = append(keyword, tmp[0])
  79. }
  80. }
  81. }
  82. for _, v := range keyword {
  83. v = regNo.ReplaceAllString(v, "")
  84. if len(v) > 0 {
  85. keywordnew = append(keywordnew, v)
  86. }
  87. }
  88. keywords := strings.Join(keywordnew, ",")
  89. data["keywords"] = keywords
  90. content := ""
  91. if bid["detail_bak"] != nil {
  92. content = u.ObjToString(bid["detail_bak"])
  93. } else {
  94. content = u.ObjToString(bid["detail"])
  95. }
  96. //内容替换
  97. content = strings.Replace(content, " ", "", -1)
  98. content = regSpace.ReplaceAllString(content, "")
  99. content = regRow.ReplaceAllString(content, ",")
  100. content = regDh.ReplaceAllString(content, ",")
  101. content = regNewDb.ReplaceAllString(content, "$1")
  102. if strings.HasPrefix(content, ",") {
  103. content = content[1:]
  104. }
  105. tc := []rune(content)
  106. ltc := len(tc)
  107. description := content
  108. if ltc > DesLen {
  109. description = string(tc[:DesLen])
  110. }
  111. data["description"] = description
  112. //保存到数据库
  113. return
  114. }