extract.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. package main
  2. import (
  3. "app.yhyue.com/moapp/jybase/encrypt"
  4. . "app.yhyue.com/moapp/jybase/mongodb"
  5. . "dataIdentify/db"
  6. . "dataIdentify/service"
  7. "github.com/gogf/gf/v2/frame/g"
  8. "github.com/gogf/gf/v2/os/gctx"
  9. "github.com/gogf/gf/v2/util/gconv"
  10. "github.com/gogf/gf/v2/util/grand"
  11. "log"
  12. "strings"
  13. "time"
  14. )
  15. func main() {
  16. var maxSize = 200
  17. zblhtSize := 0
  18. log.Println("start...")
  19. sess := Mgo_Main.GetMgoConn()
  20. defer Mgo_Main.DestoryMongoConn(sess)
  21. SelectField["publishtime"] = 1
  22. SelectField["href"] = 1
  23. SelectField["s_winner"] = 1
  24. it := sess.DB(Mgo_Main.DbName).C(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String()).Find(map[string]interface{}{
  25. "_id": map[string]interface{}{
  26. "$lt": StringTOBsonId("686ce21dd5d8e4081f8f2d98"),
  27. },
  28. //"_id": StringTOBsonId("6763aa5555a3d7e571cda133"),
  29. "extracttype": 1,
  30. }).Select(SelectField).Sort("-_id").Iter()
  31. types := "中标、成交、合同"
  32. all := map[string]int{}
  33. for _, v := range strings.Split(types, "、") {
  34. all[v] = 0
  35. }
  36. var isOver = func() bool {
  37. for _, v := range all {
  38. if v < maxSize {
  39. return false
  40. }
  41. }
  42. return true
  43. }
  44. index := 0
  45. for m := make(map[string]interface{}); it.Next(m); {
  46. index++
  47. if index%500 == 0 {
  48. log.Println("index", index)
  49. }
  50. subtype, _ := m["subtype"].(string)
  51. if _, ok := all[subtype]; !ok {
  52. continue
  53. }
  54. if !isOver() {
  55. publishtime := gconv.Int(m["publishtime"])
  56. if publishtime%grand.N(1, 1000) != 0 {
  57. continue
  58. }
  59. if all[subtype] >= maxSize {
  60. continue
  61. }
  62. }
  63. _id := BsonIdToSId(m["_id"])
  64. href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", _id) + ".html"
  65. m["jybxhref"] = href
  66. var flag bool
  67. var quoteMode string
  68. var bidCommonwealth int
  69. if isOver() {
  70. flag, _, _, bidCommonwealth, _ = Pretreatment(_id, m, 2)
  71. if bidCommonwealth == 1 {
  72. flag, quoteMode, _, _, _ = Pretreatment(_id, m, 1)
  73. if quoteMode == "" || quoteMode == QuoteMode_Other {
  74. continue
  75. }
  76. }
  77. } else {
  78. flag, quoteMode, _, bidCommonwealth, _ = Pretreatment(_id, m, 0)
  79. if quoteMode == "" || quoteMode == QuoteMode_Other {
  80. continue
  81. }
  82. }
  83. if !flag {
  84. return
  85. }
  86. m["quote_mode"] = quoteMode
  87. if bidCommonwealth == 1 {
  88. zblhtSize++
  89. }
  90. m["bid_commonwealth"] = bidCommonwealth
  91. delete(m, "detail")
  92. if !isOver() || bidCommonwealth == 1 {
  93. newId := Mgo_Main.SaveByOriID("wcj_bidding_"+time.Now().Format("20060102"), m)
  94. log.Println("save", newId)
  95. }
  96. all[subtype]++
  97. if isOver() && zblhtSize == maxSize {
  98. break
  99. }
  100. }
  101. log.Println("over...", index)
  102. }