test.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. package extract
  2. import (
  3. "data_ai/prompt"
  4. "data_ai/ul"
  5. "fmt"
  6. log "github.com/donnie4w/go-logger/logger"
  7. new_xlsx "github.com/tealeg/xlsx/v3"
  8. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  9. "os"
  10. "sync"
  11. "unicode/utf8"
  12. )
  13. func TestSingleFieldInfo(name string, tmpid string) {
  14. tmp := ul.SourceMgo.FindById(name, tmpid)
  15. if len(tmp) == 0 || tmp == nil {
  16. log.Debug("未查询到数据...", tmpid)
  17. return
  18. }
  19. data := ResolveInfo(tmp)
  20. //最终结果...
  21. for k, v := range data {
  22. log.Debug(k, "~", v)
  23. }
  24. }
  25. func TestIsPackage() {
  26. tmpArr := []string{
  27. "669e83fe66cf0db42a6520b3",
  28. "669e892066cf0db42a652c9b",
  29. "669e904966cf0db42a653b5d",
  30. "669f16f466cf0db42a669069",
  31. "669f186c66cf0db42a669bf0",
  32. "669efb6766cf0db42a65e0b4",
  33. "669f004266cf0db42a65f201",
  34. "669f02a666cf0db42a65fff3",
  35. "669f172766cf0db42a669193",
  36. "669ec89566cf0db42a659020",
  37. "669e86b266cf0db42a6526ac",
  38. "669e86e466cf0db42a6527b7",
  39. "669e87b766cf0db42a652a3e",
  40. "669f082d66cf0db42a662323",
  41. "669e95e966cf0db42a654dd1",
  42. "669ea39466cf0db42a656311",
  43. "669f140366cf0db42a66772f",
  44. "669ee59466cf0db42a65b8aa",
  45. "669f05a166cf0db42a66117b",
  46. "669e90d666cf0db42a653e0a",
  47. "669f08c466cf0db42a66273c",
  48. "669f155166cf0db42a6682c7",
  49. "669ef0ff66cf0db42a65c83a",
  50. "669efdc166cf0db42a65e8f3",
  51. "669f090066cf0db42a6629d0",
  52. "669f111366cf0db42a665ce7",
  53. "669f15fb66cf0db42a668901",
  54. "669f0baa66cf0db42a663a72",
  55. "669f039766cf0db42a66044e",
  56. "669eff3e66cf0db42a65ee73",
  57. "669f12c366cf0db42a666b9d",
  58. "669e913b66cf0db42a653ffc",
  59. "669e833466cf0db42a651e3a",
  60. "669f071e66cf0db42a661b03",
  61. "669f1a1266cf0db42a66a892",
  62. "669f0aec66cf0db42a6635e8",
  63. "669f169c66cf0db42a668e1d",
  64. "669ed6c966cf0db42a65a75d",
  65. "669f072866cf0db42a661b26",
  66. "669f185866cf0db42a669af0",
  67. "669f15d366cf0db42a6687aa",
  68. "669f182466cf0db42a669960",
  69. "669f0ed066cf0db42a664e5c",
  70. "669f076466cf0db42a661cd4",
  71. "669f172966cf0db42a6691c0",
  72. "669f198466cf0db42a66a385",
  73. "669f1ad366cf0db42a66afb9",
  74. "669f156666cf0db42a668403",
  75. "669f093c66cf0db42a662c08",
  76. "669f0d8266cf0db42a6646cb",
  77. "669f06e866cf0db42a661a1d",
  78. "669f1bd766cf0db42a66b86e",
  79. "669efcd066cf0db42a65e4f4",
  80. }
  81. pkgArr := []int{
  82. 1,
  83. 1,
  84. 1,
  85. 1,
  86. 0,
  87. 1,
  88. 1,
  89. 1,
  90. 1,
  91. 1,
  92. 1,
  93. 1,
  94. 1,
  95. 0,
  96. 0,
  97. 1,
  98. 1,
  99. 1,
  100. 1,
  101. 1,
  102. 0,
  103. 0,
  104. 0,
  105. 1,
  106. 0,
  107. 0,
  108. 0,
  109. 1,
  110. 0,
  111. 1,
  112. 0,
  113. 0,
  114. 1,
  115. 0,
  116. 1,
  117. 0,
  118. 1,
  119. 1,
  120. 0,
  121. 1,
  122. 0,
  123. 0,
  124. 1,
  125. 1,
  126. 1,
  127. 1,
  128. 1,
  129. 1,
  130. 0,
  131. 1,
  132. 1,
  133. 0,
  134. 1,
  135. }
  136. ok := 0
  137. for k, v := range tmpArr {
  138. data := ul.SourceMgo.FindById("ai_41411", v)
  139. if len(data) == 0 {
  140. data = ul.SourceMgo.FindById("ai_294", v)
  141. }
  142. detail := qu.ObjToString(data["detail"])
  143. ispkg := prompt.AcquireIsPackageInfo(detail)
  144. if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) {
  145. ok++
  146. } else {
  147. log.Debug("错误~", v)
  148. }
  149. }
  150. log.Debug("is over ~ ", len(tmpArr)-ok)
  151. }
  152. func TestPackageInfo() {
  153. query := map[string]interface{}{
  154. "new_pkg": map[string]interface{}{
  155. "$exists": 1,
  156. },
  157. }
  158. dataArr, _ := ul.SourceMgo.Find("ai_41411_zhipu", query, nil, map[string]interface{}{})
  159. log.Debug("查询数量...", len(dataArr))
  160. os.Remove("test.xlsx")
  161. f := new_xlsx.NewFile()
  162. sheet, _ := f.AddSheet("数据信息")
  163. row := sheet.AddRow()
  164. writeRow(row, []string{"唯一标识", "站点", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
  165. for _, v := range dataArr {
  166. tmpid := ul.BsonTOStringId(v["_id"])
  167. ttt := ul.SourceMgo.FindById("ai_41411", tmpid)
  168. site := qu.ObjToString(ttt["site"])
  169. jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
  170. p_info := *qu.ObjToMap(v["new_pkg"])
  171. p_arr := ul.IsMarkInterfaceMap(p_info["分包信息"])
  172. for _, v1 := range p_arr {
  173. row = sheet.AddRow()
  174. arr := []string{}
  175. arr = append(arr, tmpid)
  176. arr = append(arr, site)
  177. arr = append(arr, jyhref)
  178. arr = append(arr, qu.ObjToString(v1["包项目名称"]))
  179. arr = append(arr, qu.ObjToString(v1["中标单位"]))
  180. arr = append(arr, qu.ObjToString(v1["中标金额"]))
  181. writeRow(row, arr)
  182. }
  183. }
  184. if err := f.Save("test.xlsx"); err != nil {
  185. fmt.Println("保存xlsx失败:", err)
  186. } else {
  187. fmt.Println("保存xlsx成功:", err)
  188. }
  189. log.Debug("is over ...")
  190. return
  191. //分包判断,获取信息
  192. pool_mgo := make(chan bool, 80)
  193. wg_mgo := &sync.WaitGroup{}
  194. for k, v := range dataArr {
  195. if k%10 == 0 {
  196. log.Debug(k, "~", v["_id"])
  197. }
  198. pool_mgo <- true
  199. wg_mgo.Add(1)
  200. go func(v map[string]interface{}) {
  201. defer func() {
  202. <-pool_mgo
  203. wg_mgo.Done()
  204. }()
  205. tmpid := ul.BsonTOStringId(v["_id"])
  206. data := ul.SourceMgo.FindById("ai_41411", tmpid)
  207. if detail := qu.ObjToString(data["detail"]); utf8.RuneCountInString(detail) > 100 {
  208. pkg := prompt.AcquireMultiplePackageInfo(detail)
  209. //最终结果...
  210. ul.SourceMgo.UpdateById("ai_41411_zhipu", tmpid, map[string]interface{}{
  211. "$set": map[string]interface{}{
  212. "new_pkg": pkg,
  213. },
  214. })
  215. }
  216. }(v)
  217. }
  218. wg_mgo.Wait()
  219. }
  220. // 更新链接
  221. func TestUpdateJyhref(name string) {
  222. dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  223. for _, v := range dataArr {
  224. tmpid := ul.BsonTOStringId(v["_id"])
  225. jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
  226. ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
  227. "$set": map[string]interface{}{
  228. "jyhref": jyhref,
  229. },
  230. })
  231. }
  232. log.Debug("is over ...")
  233. }
  234. func writeRow(row *new_xlsx.Row, arr []string) {
  235. for _, v := range arr {
  236. row.AddCell().Value = v
  237. }
  238. }