main.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. package main
  2. import (
  3. "data_ai/extract"
  4. "data_ai/tool"
  5. "data_ai/udp"
  6. "data_ai/ul"
  7. "fmt"
  8. log "github.com/donnie4w/go-logger/logger"
  9. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "strings"
  11. "sync"
  12. )
  13. func init() {
  14. ul.IsLocal = true //是否本地
  15. ul.IsTool = false //是否工具
  16. ul.IsFull = false //是否全量
  17. if ul.IsTool {
  18. ul.InitToolVar()
  19. } else {
  20. ul.InitGlobalVar()
  21. if !ul.IsFull {
  22. if ul.ModelType == "deepseek" {
  23. go extract.RunDeepSeek()
  24. } else {
  25. udp.InitProcessVar()
  26. }
  27. }
  28. }
  29. if !ul.IsTool {
  30. ul.InitOss(ul.IsLocal)
  31. }
  32. log.Debug("测试一下转换MD方法···")
  33. detail := `<table border=\"1\"><tbody><tr><th>采购人名称</th><td>云南中医药大学</td></tr><tr><th>中标(成交)供应商名称</th><td>云南智蓝云鸽信息科技有限公司</td></tr><tr><th>合同金额</th><td>224.99万元 人民币</td></tr><tr><th>合同期限</th><td>年</td></tr><tr><th>合同签署时间</th><td>2023-12-29 00:00:00</td></tr></tbody></table>`
  34. new_detail := ul.HttpConvertToMarkdown(detail)
  35. log.Debug(new_detail)
  36. }
  37. func main() {
  38. if ul.IsTool {
  39. tool.StartToolInfo()
  40. return
  41. }
  42. update1()
  43. lock := make(chan bool)
  44. <-lock
  45. }
  46. func update1() {
  47. dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  48. for _, v := range dataArr {
  49. tmpid := ul.BsonTOStringId(v["_id"])
  50. data := ul.BidMgo.FindById("zktest_sample_data_source_0", tmpid)
  51. title := qu.ObjToString(data["title"])
  52. detail := qu.ObjToString(data["detail"])
  53. ul.BidMgo.UpdateById("zktest_sample_data", tmpid, map[string]interface{}{
  54. "$set": map[string]interface{}{
  55. "detail": title + "\n" + detail,
  56. },
  57. })
  58. }
  59. log.Debug("is over ...")
  60. }
  61. func export1() {
  62. dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  63. pool_mgo := make(chan bool, 1)
  64. wg_mgo := &sync.WaitGroup{}
  65. for _, v := range dataArr {
  66. pool_mgo <- true
  67. wg_mgo.Add(1)
  68. go func(tmp map[string]interface{}) {
  69. defer func() {
  70. <-pool_mgo
  71. wg_mgo.Done()
  72. }()
  73. tmpid := ul.BsonTOStringId(v["_id"])
  74. data := ul.BidMgo.FindById("bidding", tmpid)
  75. if len(data) == 0 || data == nil {
  76. log.Debug("异常")
  77. }
  78. ul.BidMgo.Save("zktest_sample_data_source_4", data)
  79. }(v)
  80. }
  81. wg_mgo.Wait()
  82. log.Debug("is over ...")
  83. }
  84. // 对比程序
  85. func compare1() {
  86. fields := map[string]string{
  87. "toptype": "string",
  88. "subtype": "string",
  89. "area": "string",
  90. "city": "string",
  91. "projectname": "string",
  92. "projectcode": "string",
  93. "buyer": "string",
  94. "s_winner": "string",
  95. "budget": "float",
  96. "bidamount": "float",
  97. }
  98. dataArr1, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
  99. dataArr2, _ := ul.BidMgo.Find("zktest_deepseek_0122", map[string]interface{}{}, nil, map[string]interface{}{})
  100. biaozhu := creat(dataArr1, false) //标注数据···
  101. deepseek := creat(dataArr2, true)
  102. dataArr1 = nil
  103. dataArr2 = nil
  104. //计数
  105. tj := duibi(fields, biaozhu, deepseek)
  106. log.Debug("...................")
  107. arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
  108. for _, v := range arr {
  109. t1, s1 := tj[v]["total"], tj[v]["same"]
  110. f1 := fmt.Sprintf("模型flash~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t1, s1, (float64(s1)/float64(t1))*100.0, "%")
  111. log.Debug(f1)
  112. }
  113. }
  114. // 构建数据
  115. func creat(dataArr []map[string]interface{}, deepseek bool) map[string]map[string]interface{} {
  116. dict := map[string]map[string]interface{}{}
  117. for _, biaozhu := range dataArr {
  118. if deepseek {
  119. tmpid := ul.BsonTOStringId(biaozhu["_id"])
  120. toptype := qu.ObjToString(biaozhu["s_toptype"])
  121. subtype := qu.ObjToString(biaozhu["s_subtype"])
  122. area := qu.ObjToString(biaozhu["s_area"])
  123. city := qu.ObjToString(biaozhu["s_city"])
  124. projectname := qu.ObjToString(biaozhu["s_projectname"])
  125. projectcode := qu.ObjToString(biaozhu["s_projectcode"])
  126. budget := qu.Float64All(biaozhu["s_budget"])
  127. bidamount := qu.Float64All(biaozhu["s_bidamount"])
  128. buyer := qu.ObjToString(biaozhu["s_buyer"])
  129. s_winner := qu.ObjToString(biaozhu["s_winner"])
  130. info := map[string]interface{}{}
  131. info["toptype"] = toptype
  132. info["subtype"] = subtype
  133. info["area"] = area
  134. info["city"] = city
  135. info["projectname"] = projectname
  136. info["projectcode"] = projectcode
  137. info["budget"] = budget
  138. info["bidamount"] = bidamount
  139. info["buyer"] = buyer
  140. info["s_winner"] = s_winner
  141. dict[tmpid] = info
  142. } else {
  143. tmpid := ul.BsonTOStringId(biaozhu["_id"])
  144. toptype := qu.ObjToString(biaozhu["toptype"])
  145. subtype := qu.ObjToString(biaozhu["subtype"])
  146. area := qu.ObjToString(biaozhu["area"])
  147. city := qu.ObjToString(biaozhu["city"])
  148. projectname := qu.ObjToString(biaozhu["projectname"])
  149. projectcode := qu.ObjToString(biaozhu["projectcode"])
  150. budget := qu.Float64All(biaozhu["budget"])
  151. bidamount := qu.Float64All(biaozhu["bidamount"])
  152. buyer := qu.ObjToString(biaozhu["buyer"])
  153. s_winner := qu.ObjToString(biaozhu["s_winner"])
  154. info := map[string]interface{}{}
  155. info["toptype"] = toptype
  156. info["subtype"] = subtype
  157. info["area"] = area
  158. info["city"] = city
  159. info["projectname"] = projectname
  160. info["projectcode"] = projectcode
  161. info["budget"] = budget
  162. info["bidamount"] = bidamount
  163. info["buyer"] = buyer
  164. info["s_winner"] = s_winner
  165. dict[tmpid] = info
  166. }
  167. }
  168. return dict
  169. }
  170. func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}) map[string]map[string]int {
  171. //计数
  172. tj := map[string]map[string]int{}
  173. for tmpid, tmp := range source {
  174. bz := biaozhu[tmpid]
  175. for filed, typeof := range fields {
  176. nums := tj[filed]
  177. if nums == nil {
  178. nums = map[string]int{}
  179. }
  180. if typeof == "string" {
  181. b_value := qu.ObjToString(bz[filed])
  182. s_value := qu.ObjToString(tmp[filed])
  183. if b_value == "" && s_value == "" {
  184. } else {
  185. nums["total"] = qu.IntAll(nums["total"]) + 1
  186. if b_value == s_value {
  187. nums["same"] = qu.IntAll(nums["same"]) + 1
  188. }
  189. }
  190. } else if typeof == "float" {
  191. b_value := qu.Float64All(bz[filed])
  192. s_value := qu.Float64All(tmp[filed])
  193. if b_value == 0.0 && s_value == 0.0 {
  194. } else {
  195. nums["total"] = qu.IntAll(nums["total"]) + 1
  196. if b_value == s_value {
  197. nums["same"] = qu.IntAll(nums["same"]) + 1
  198. } else {
  199. //if filed == "budget" {
  200. // if b_value == 0.0 {
  201. // log.Debug(tmpid)
  202. // }
  203. //}
  204. }
  205. }
  206. } else {
  207. }
  208. tj[filed] = nums
  209. }
  210. }
  211. return tj
  212. }
  213. // 测试调试数据
  214. func c(s string) string {
  215. s = strings.ReplaceAll(s, "(", "(")
  216. s = strings.ReplaceAll(s, ")", ")")
  217. s = strings.ReplaceAll(s, ",", ",")
  218. return s
  219. }