main.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. package main
  2. import (
  3. "data_ai/tool"
  4. "data_ai/udp"
  5. "data_ai/ul"
  6. "fmt"
  7. log "github.com/donnie4w/go-logger/logger"
  8. "github.com/gogf/gf/v2/util/gconv"
  9. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "strings"
  11. "sync"
  12. "unicode/utf8"
  13. )
  14. func init() {
  15. ul.IsLocal = false //是否本地
  16. ul.IsTool = false //是否工具
  17. ul.IsFull = false //是否全量
  18. if ul.IsTool {
  19. ul.InitToolVar()
  20. } else {
  21. ul.InitGlobalVar()
  22. if !ul.IsFull {
  23. udp.InitProcessVar()
  24. }
  25. }
  26. if !ul.IsTool {
  27. ul.InitOss(ul.IsLocal)
  28. }
  29. log.Debug("测试一下转换MD方法···")
  30. detail := `<table border=\"1\"><tbody><tr><th>采购人名称</th><td>云南中医药大学</td></tr><tr><th>中标(成交)供应商名称</th><td>云南智蓝云鸽信息科技有限公司</td></tr><tr><th>合同金额</th><td>224.99万元 人民币</td></tr><tr><th>合同期限</th><td>年</td></tr><tr><th>合同签署时间</th><td>2023-12-29 00:00:00</td></tr></tbody></table>`
  31. new_detail := ul.HttpConvertToMarkdown(detail)
  32. log.Debug(new_detail)
  33. }
  34. func main() {
  35. if ul.IsTool {
  36. tool.StartToolInfo()
  37. return
  38. }
  39. //extract.TestSingleFieldInfo("bidding", "677cf41c3309c0998bb6ddda")
  40. lock := make(chan bool)
  41. <-lock
  42. }
  43. // 对比程序
  44. func compare1() {
  45. fields := map[string]string{
  46. "toptype": "string",
  47. "subtype": "string",
  48. "area": "string",
  49. "city": "string",
  50. "projectname": "string",
  51. "projectcode": "string",
  52. "buyer": "string",
  53. "s_winner": "string",
  54. "budget": "float",
  55. "bidamount": "float",
  56. }
  57. dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
  58. dataArr1, _ := ul.BidMgo.Find("zktest_sample_data_source_1", map[string]interface{}{}, nil, map[string]interface{}{})
  59. dataArr2, _ := ul.BidMgo.Find("zktest_sample_data_source_2", map[string]interface{}{}, nil, map[string]interface{}{})
  60. dataArr3, _ := ul.BidMgo.Find("zktest_sample_data_source_3", map[string]interface{}{}, nil, map[string]interface{}{})
  61. biaozhu := creat(dataArr, false) //标注数据···
  62. source1 := creat(dataArr1, true)
  63. source2 := creat(dataArr2, true)
  64. source3 := creat(dataArr3, true)
  65. log.Debug("数据源:", len(biaozhu))
  66. log.Debug("对比源:", len(source1))
  67. log.Debug("对比源:", len(source2))
  68. log.Debug("对比源:", len(source3))
  69. dataArr = nil
  70. dataArr1 = nil
  71. dataArr2 = nil
  72. dataArr3 = nil
  73. //计数
  74. tj1 := duibi(fields, biaozhu, source1)
  75. tj2 := duibi(fields, biaozhu, source2)
  76. tj3 := duibi(fields, biaozhu, source3)
  77. log.Debug("...................")
  78. arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
  79. for _, v := range arr {
  80. t1, s1 := tj1[v]["total"], tj1[v]["same"]
  81. t2, s2 := tj2[v]["total"], tj2[v]["same"]
  82. t3, s3 := tj3[v]["total"], tj3[v]["same"]
  83. f1 := fmt.Sprintf("模型flash~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t1, s1, (float64(s1)/float64(t1))*100.0, "%")
  84. f2 := fmt.Sprintf("模型air~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
  85. f3 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t3, s3, (float64(s3)/float64(t3))*100.0, "%")
  86. log.Debug(f1)
  87. log.Debug(f2)
  88. log.Debug(f3)
  89. }
  90. }
  91. // 构建数据
  92. func creat(dataArr []map[string]interface{}, is_zhipu bool) map[string]map[string]interface{} {
  93. dict := map[string]map[string]interface{}{}
  94. for _, biaozhu := range dataArr {
  95. if is_zhipu {
  96. ai_zhipu := *qu.ObjToMap(biaozhu["ai_zhipu"])
  97. if len(ai_zhipu) > 0 {
  98. } else {
  99. continue
  100. }
  101. }
  102. tmpid := ul.BsonTOStringId(biaozhu["_id"])
  103. toptype := qu.ObjToString(biaozhu["toptype"])
  104. subtype := qu.ObjToString(biaozhu["subtype"])
  105. area := qu.ObjToString(biaozhu["area"])
  106. city := qu.ObjToString(biaozhu["city"])
  107. projectname := qu.ObjToString(biaozhu["projectname"])
  108. projectcode := qu.ObjToString(biaozhu["projectcode"])
  109. budget := qu.Float64All(biaozhu["budget"])
  110. bidamount := qu.Float64All(biaozhu["bidamount"])
  111. buyer := qu.ObjToString(biaozhu["buyer"])
  112. s_winner := qu.ObjToString(biaozhu["s_winner"])
  113. info := map[string]interface{}{}
  114. info["toptype"] = toptype
  115. info["subtype"] = subtype
  116. info["area"] = area
  117. info["city"] = city
  118. info["projectname"] = projectname
  119. info["projectcode"] = projectcode
  120. info["budget"] = budget
  121. info["bidamount"] = bidamount
  122. info["buyer"] = buyer
  123. info["s_winner"] = s_winner
  124. dict[tmpid] = info
  125. }
  126. return dict
  127. }
  128. func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}) map[string]map[string]int {
  129. //计数
  130. tj := map[string]map[string]int{}
  131. for tmpid, tmp := range source {
  132. bz := biaozhu[tmpid]
  133. for filed, typeof := range fields {
  134. nums := tj[filed]
  135. if nums == nil {
  136. nums = map[string]int{}
  137. }
  138. if typeof == "string" {
  139. b_value := qu.ObjToString(bz[filed])
  140. s_value := qu.ObjToString(tmp[filed])
  141. if b_value == "" && s_value == "" {
  142. } else {
  143. nums["total"] = qu.IntAll(nums["total"]) + 1
  144. if b_value == s_value {
  145. nums["same"] = qu.IntAll(nums["same"]) + 1
  146. }
  147. }
  148. } else if typeof == "float" {
  149. b_value := qu.Float64All(bz[filed])
  150. s_value := qu.Float64All(tmp[filed])
  151. if b_value == 0.0 && s_value == 0.0 {
  152. } else {
  153. nums["total"] = qu.IntAll(nums["total"]) + 1
  154. if b_value == s_value {
  155. nums["same"] = qu.IntAll(nums["same"]) + 1
  156. } else {
  157. //if filed == "budget" {
  158. // if b_value == 0.0 {
  159. // log.Debug(tmpid)
  160. // }
  161. //}
  162. }
  163. }
  164. } else {
  165. }
  166. tj[filed] = nums
  167. }
  168. }
  169. return tj
  170. }
  171. func export1() {
  172. dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  173. pool_mgo := make(chan bool, 1)
  174. wg_mgo := &sync.WaitGroup{}
  175. for _, v := range dataArr {
  176. pool_mgo <- true
  177. wg_mgo.Add(1)
  178. go func(tmp map[string]interface{}) {
  179. defer func() {
  180. <-pool_mgo
  181. wg_mgo.Done()
  182. }()
  183. tmpid := ul.BsonTOStringId(v["_id"])
  184. data := ul.BidMgo.FindById("bidding", tmpid)
  185. if len(data) == 0 || data == nil {
  186. log.Debug("异常")
  187. }
  188. ul.BidMgo.Save("zktest_sample_data_source_3", data)
  189. //ul.BidMgo.Save("zktest_sample_data_source_2", data)
  190. }(v)
  191. }
  192. wg_mgo.Wait()
  193. log.Debug("is over ...")
  194. }
  195. // 测试调试数据
  196. func test1() {
  197. q, total := map[string]interface{}{
  198. "_id": map[string]interface{}{
  199. "$gt": ul.StringTOBsonId("677638b13309c0998ba2488f"),
  200. "$lte": ul.StringTOBsonId("6777a3d23309c0998ba89797"),
  201. },
  202. }, 0
  203. count, _ := ul.BidMgo.Count("bidding", q)
  204. log.Debug("查询数量:", count)
  205. pool_mgo := make(chan bool, 10)
  206. wg_mgo := &sync.WaitGroup{}
  207. sess := ul.BidMgo.GetMgoConn()
  208. defer ul.BidMgo.DestoryMongoConn(sess)
  209. it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Iter()
  210. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  211. if total%1000 == 0 {
  212. log.Debug("cur ai index ", total)
  213. }
  214. pool_mgo <- true
  215. wg_mgo.Add(1)
  216. go func(tmp map[string]interface{}) {
  217. defer func() {
  218. <-pool_mgo
  219. wg_mgo.Done()
  220. }()
  221. //tmpid := ul.BsonTOStringId(tmp["_id"])
  222. s1, s2 := gconv.String(tmp["budget"]), gconv.String(tmp["bidamount"])
  223. r1, r2 := strings.Split(s1, "."), strings.Split(s2, ".")
  224. if len(r1) == 2 {
  225. l1 := utf8.RuneCountInString(r1[1])
  226. if l1 > 3 {
  227. //log.Debug("问题金额:", tmpid, "~", s1)
  228. ul.BidMgo.Save("zktest_err_0106", tmp)
  229. return
  230. }
  231. }
  232. if len(r2) == 2 {
  233. l2 := utf8.RuneCountInString(r2[1])
  234. if l2 > 3 {
  235. //log.Debug("问题金额:", tmpid, "~", s2)
  236. ul.BidMgo.Save("zktest_err_0106", tmp)
  237. return
  238. }
  239. }
  240. }(tmp)
  241. tmp = make(map[string]interface{})
  242. }
  243. wg_mgo.Wait()
  244. log.Debug("is over ...", total)
  245. }
  246. func c(s string) string {
  247. s = strings.ReplaceAll(s, "(", "(")
  248. s = strings.ReplaceAll(s, ")", ")")
  249. s = strings.ReplaceAll(s, ",", ",")
  250. return s
  251. }