extract.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. package main
  2. import (
  3. "app.yhyue.com/moapp/jybase/encrypt"
  4. . "app.yhyue.com/moapp/jybase/mongodb"
  5. . "dataIdentify/db"
  6. . "dataIdentify/service"
  7. "flag"
  8. "github.com/gogf/gf/v2/frame/g"
  9. "github.com/gogf/gf/v2/os/gctx"
  10. "github.com/gogf/gf/v2/util/gconv"
  11. "github.com/gogf/gf/v2/util/grand"
  12. "log"
  13. "sync"
  14. "time"
  15. )
  16. func main11() {
  17. maxSize := flag.Int("c", 0, "")
  18. poolSize := flag.Int("p", 5, "")
  19. lastId := flag.String("id", "", "")
  20. flag.Parse()
  21. log.Println("start...")
  22. sess := Mgo_Main.GetMgoConn()
  23. defer Mgo_Main.DestoryMongoConn(sess)
  24. SelectField["publishtime"] = 1
  25. SelectField["href"] = 1
  26. SelectField["s_winner"] = 1
  27. query := map[string]interface{}{
  28. //"_id": StringTOBsonId("6763aa5555a3d7e571cda133"),
  29. "extracttype": 1,
  30. }
  31. if *lastId != "" {
  32. query["_id"] = map[string]interface{}{
  33. "$lt": StringTOBsonId(*lastId),
  34. }
  35. }
  36. it := sess.DB(Mgo_Main.DbName).C(g.Config().MustGet(gctx.New(), "mongodb.main.collection").String()).Find(query).Select(SelectField).Sort("-_id").Iter()
  37. all := map[string]int{
  38. "是中标联合体": 0,
  39. }
  40. for k, _ := range AllQuoteMode {
  41. all[k] = 0
  42. }
  43. var isOver = func() bool {
  44. for _, v := range all {
  45. if v < *maxSize {
  46. return false
  47. }
  48. }
  49. return true
  50. }
  51. index := 0
  52. pool := make(chan bool, *poolSize)
  53. wait := &sync.WaitGroup{}
  54. lock := &sync.Mutex{}
  55. isAllOver := false
  56. for mm := make(map[string]interface{}); it.Next(mm); {
  57. index++
  58. if index%50000 == 0 {
  59. log.Println("index", index, all)
  60. }
  61. pool <- true
  62. wait.Add(1)
  63. go func(m map[string]interface{}) {
  64. defer func() {
  65. <-pool
  66. wait.Done()
  67. }()
  68. subtype, _ := m["subtype"].(string)
  69. if subtype != "中标" && subtype != "成交" && subtype != "合同" {
  70. return
  71. }
  72. publishtime := gconv.Int(m["publishtime"])
  73. if publishtime%grand.N(1, 1000) != 0 {
  74. return
  75. }
  76. _id := BsonIdToSId(m["_id"])
  77. href := "https://www.jianyu360.com/nologin/content/" + encrypt.CommonEncodeArticle("content", _id) + ".html"
  78. m["jybxhref"] = href
  79. quoteMode, bidCommonwealth := Pretreatment(_id, m, 0)
  80. if quoteMode == "" && bidCommonwealth != 1 {
  81. return
  82. }
  83. m["quote_mode"] = quoteMode
  84. m["bid_commonwealth"] = bidCommonwealth
  85. delete(m, "detail")
  86. lock.Lock()
  87. if all[quoteMode] < *maxSize || (all["是中标联合体"] < *maxSize && bidCommonwealth == 1) {
  88. if Mgo_Main.SaveByOriID("wcj_bidding_"+time.Now().Format("20060102"), m) {
  89. log.Println("save", _id)
  90. if all[quoteMode] < *maxSize {
  91. all[quoteMode]++
  92. }
  93. if all["是中标联合体"] < *maxSize && bidCommonwealth == 1 {
  94. all["是中标联合体"]++
  95. }
  96. if isOver() {
  97. isAllOver = true
  98. }
  99. }
  100. }
  101. lock.Unlock()
  102. }(mm)
  103. mm = make(map[string]interface{})
  104. if isAllOver {
  105. break
  106. }
  107. }
  108. wait.Wait()
  109. log.Println("over...", index)
  110. }