test.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. package extract
  2. import (
  3. "data_ai/prompt"
  4. "data_ai/ul"
  5. "fmt"
  6. log "github.com/donnie4w/go-logger/logger"
  7. new_xlsx "github.com/tealeg/xlsx/v3"
  8. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  9. "sync"
  10. "time"
  11. "unicode/utf8"
  12. )
  13. // 验证单条数据···
  14. func TestSingleFieldInfo(name string, tmpid string) {
  15. log.Debug("测试单条大模型数据···")
  16. now := time.Now().Unix()
  17. tmp := ul.BidMgo.FindById(name, tmpid)
  18. if len(tmp) == 0 || tmp == nil {
  19. log.Debug("未查询到数据...", tmpid)
  20. return
  21. }
  22. data := ResolveInfo(tmp)
  23. //最终结果...
  24. if data != nil {
  25. log.Debug(data["s_area"], "~", data["s_city"])
  26. }
  27. log.Debug("耗时···", time.Now().Unix()-now)
  28. }
  29. // 验证单条分包数据···
  30. func TestSinglePurchasingInfo(name string, tmpid string) {
  31. now := time.Now().Unix()
  32. tmp := ul.BidMgo.FindById(name, tmpid)
  33. if len(tmp) == 0 || tmp == nil {
  34. log.Debug("未查询到数据...", tmpid)
  35. return
  36. }
  37. detail := getDetailText(tmp, tmpid) //获取正文文本
  38. p_list := getPurList(tmp, detail, map[string]interface{}{})
  39. log.Debug(p_list)
  40. //最终结果...
  41. for k, v := range p_list {
  42. log.Debug(k, "~", v)
  43. }
  44. log.Debug("耗时···", time.Now().Unix()-now)
  45. }
  46. // 验证单条分包数据···
  47. func TestSinglePackageInfo(name string, tmpid string) {
  48. now := time.Now().Unix()
  49. tmp := ul.BidMgo.FindById(name, tmpid)
  50. if len(tmp) == 0 || tmp == nil {
  51. log.Debug("未查询到数据...", tmpid)
  52. return
  53. }
  54. detail := qu.ObjToString(tmp["detail"])
  55. filetext := qu.ObjToString(tmp["filetext"]) //此处为附件信息···
  56. if utf8.RuneCountInString(detail) < 100 {
  57. detail = filetext
  58. }
  59. detail = ul.HttpConvertToMarkdown(detail)
  60. pkg := prompt.AcquireNewMultiplePackageInfo(detail, false)
  61. //最终结果...
  62. for k, v := range pkg {
  63. log.Debug(k, "~", v)
  64. }
  65. log.Debug("耗时···", time.Now().Unix()-now)
  66. }
  67. // 新分包数据···
  68. func TestNewPackageInfo1010() {
  69. dataArr, _ := ul.SourceMgo.Find("zktest_info_0930", map[string]interface{}{}, nil, nil)
  70. log.Debug("数量···", len(dataArr))
  71. pool_mgo := make(chan bool, 50)
  72. wg_mgo := &sync.WaitGroup{}
  73. for k, v := range dataArr {
  74. if k%50 == 0 {
  75. log.Debug("cur index ", k)
  76. }
  77. pool_mgo <- true
  78. wg_mgo.Add(1)
  79. go func(v map[string]interface{}) {
  80. defer func() {
  81. <-pool_mgo
  82. wg_mgo.Done()
  83. }()
  84. new_v := v
  85. data := ResolveInfo(v)
  86. if len(data) > 0 {
  87. new_v["ai_zhipu"] = data
  88. }
  89. ul.SourceMgo.Save("zktest_info_0930_new", new_v)
  90. }(v)
  91. }
  92. wg_mgo.Wait()
  93. log.Debug("is over ...")
  94. }
  95. // 更新链接
  96. func TestUpdateJyhref(name string) {
  97. dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  98. for _, v := range dataArr {
  99. tmpid := ul.BsonTOStringId(v["_id"])
  100. jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
  101. ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
  102. "$set": map[string]interface{}{
  103. "jyhref": jyhref,
  104. },
  105. })
  106. }
  107. log.Debug("is over ...")
  108. }
  109. func writeRow(row *new_xlsx.Row, arr []string) {
  110. for _, v := range arr {
  111. row.AddCell().Value = v
  112. }
  113. }