main.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/nats-io/nats.go"
  6. cu "jygit.jydev.jianyu360.cn/data_capture/myself_util/commonutil"
  7. iu "jygit.jydev.jianyu360.cn/data_capture/myself_util/initutil"
  8. su "jygit.jydev.jianyu360.cn/data_capture/myself_util/spiderutil"
  9. "log"
  10. "net/http"
  11. "time"
  12. )
  13. var (
  14. Config map[string]interface{}
  15. Webport string
  16. Subscribe string
  17. Api string
  18. To string
  19. )
  20. func init() {
  21. iu.ReadConfig(&Config)
  22. InitFileInfo() //初始化附件解析信息
  23. InitOss() //oss
  24. InitNats() //nats
  25. Webport = cu.ObjToString(Config["webport"])
  26. Subscribe = cu.ObjToString(Config["subscribe"])
  27. Api = cu.ObjToString(Config["api"])
  28. To = cu.ObjToString(Config["to"])
  29. }
  30. func main() {
  31. go http.ListenAndServe(":"+Webport, nil)
  32. SubscribeNats()
  33. ch := make(chan bool, 1)
  34. <-ch
  35. }
  36. func SubscribeNats() {
  37. //先消费,带压缩
  38. Jnats.SubZip(Subscribe, func(msg *nats.Msg) {
  39. data := &MsgInfo{}
  40. err := json.Unmarshal(msg.Data, &data)
  41. if err != nil {
  42. log.Println("解析数据失败:", err)
  43. data.Err = err
  44. //SaveData()//保存异常数据
  45. } else {
  46. //处理数据
  47. data.Stime = time.Now().Unix()
  48. data.CurrSetp = Subscribe
  49. DealFile(data.Data)
  50. data.Etime = time.Now().Unix()
  51. }
  52. //消息回写
  53. bs, _ := json.Marshal(data)
  54. err = msg.Respond(bs)
  55. if err != nil {
  56. fmt.Println("回执失败:", data.Id)
  57. //SaveData()//保存异常数据
  58. }
  59. })
  60. }
  61. func DealFile(tmp map[string]interface{}) {
  62. site := cu.ObjToString(tmp["site"]) //解析附件站点
  63. if limitRatio := OssSite[site]; limitRatio > 0 { //配置站点解析附件,根据准确率情况替换正文
  64. replace, filetext := AnalysisFile(true, limitRatio, tmp)
  65. if replace { //替换正文
  66. tmp["detail"] = filetext
  67. }
  68. } else { //其它网站附件信息,detail无效,只有一个附件且不是ocr识别的,替换正文
  69. //判断detail是否有效
  70. detail := cu.ObjToString(tmp["detail"])
  71. detail = su.FilterDetail(detail) //只保留文本内容
  72. if len([]rune(detail)) <= 5 || (len([]rune(detail)) <= 50 && SpecialTextReg.MatchString(detail)) {
  73. replace, filetext := AnalysisFile(false, 0, tmp)
  74. if replace { //替换正文
  75. tmp["detail"] = filetext
  76. }
  77. }
  78. }
  79. }
  80. /*func SendMail(bodyTextAll string) {
  81. res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", Api, To, "ocr_file_over", bodyTextAll))
  82. if err == nil {
  83. defer res.Body.Close()
  84. read, err := ioutil.ReadAll(res.Body)
  85. fmt.Println("邮件发送成功:", string(read), err)
  86. } else {
  87. fmt.Println("邮件发送失败:", err)
  88. }
  89. }
  90. */