test1.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. package extract
  2. import (
  3. "bytes"
  4. "data_ai/ul"
  5. "encoding/json"
  6. "fmt"
  7. log "github.com/donnie4w/go-logger/logger"
  8. "io"
  9. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "net/http"
  11. "strings"
  12. "sync"
  13. "time"
  14. )
  15. func duibifenbao() {
  16. dataArr, _ := ul.BidMgo.Find("zktest_sample_data_source_deepseek", map[string]interface{}{}, nil, map[string]interface{}{})
  17. dict1, dict2, dict3 := map[string]interface{}{}, map[string]interface{}{}, map[string]interface{}{}
  18. for _, v := range dataArr {
  19. tmpid := ul.BsonTOStringId(v["_id"])
  20. ai_zhipu := qu.ObjToMap(v["ai_zhipu"])
  21. ai_deepseek := qu.ObjToMap(v["ai_deepseek"])
  22. ext_package := qu.ObjToMap(v["package"])
  23. data := map[string]interface{}{}
  24. if ai_zhipu != nil {
  25. s_pkg := *qu.ObjToMap((*ai_zhipu)["s_pkg"])
  26. com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
  27. if len(com_package) > 1 {
  28. dict1[tmpid] = tmpid
  29. data["zhipu"] = 1
  30. } else {
  31. data["zhipu"] = 0
  32. }
  33. }
  34. if ai_deepseek != nil {
  35. s_pkg := *qu.ObjToMap((*ai_deepseek)["s_pkg"])
  36. com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
  37. if len(com_package) > 1 {
  38. dict2[tmpid] = tmpid
  39. data["deepseek"] = 1
  40. } else {
  41. data["deepseek"] = 0
  42. }
  43. }
  44. if ext_package != nil {
  45. if len(*ext_package) > 1 {
  46. dict3[tmpid] = tmpid
  47. data["extract"] = 1
  48. } else {
  49. data["extract"] = 0
  50. }
  51. }
  52. if len(data) > 0 {
  53. data["_id"] = v["_id"]
  54. data["href"] = v["href"]
  55. data["jyhref"] = fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
  56. ul.BidMgo.Save("zktest_0225", data)
  57. }
  58. }
  59. }
  60. func test1() {
  61. dict1 := map[string]interface{}{}
  62. dict2 := map[string]interface{}{}
  63. for k, _ := range dict2 {
  64. if dict1[k] == nil {
  65. log.Debug(k)
  66. }
  67. }
  68. log.Debug("is over ...")
  69. }
  70. // 合并
  71. func merge1(ai_coll string, ai_key string) {
  72. dataArr, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
  73. log.Debug("查询量···", len(dataArr))
  74. pool_mgo := make(chan bool, 10)
  75. wg_mgo := &sync.WaitGroup{}
  76. for k, v := range dataArr {
  77. if k%100 == 0 {
  78. log.Debug("cur index ", k)
  79. }
  80. if v[ai_key] == nil {
  81. continue
  82. }
  83. pool_mgo <- true
  84. wg_mgo.Add(1)
  85. go func(tmp map[string]interface{}) {
  86. defer func() {
  87. <-pool_mgo
  88. wg_mgo.Done()
  89. }()
  90. u_id := ul.BsonTOStringId(tmp["_id"])
  91. if u_id != "664af2af66cf0db42a3d217e" {
  92. return
  93. }
  94. data := *qu.ObjToMap(tmp[ai_key])
  95. tmp["ai_zhipu"] = data
  96. update_info := make(map[string]interface{}, 0)
  97. ul.ChooseCheckDataAI(tmp, &update_info)
  98. if u_id != "" {
  99. //ul.SourceMgo.UpdateById(ai_coll, u_id, map[string]interface{}{
  100. // "$set": update_info,
  101. //})
  102. }
  103. }(v)
  104. }
  105. wg_mgo.Wait()
  106. log.Debug("is over ... ")
  107. }
  108. // 对比程序
  109. func compare1(ai_coll string) {
  110. fields := map[string]string{
  111. "toptype": "string",
  112. "subtype": "string",
  113. "area": "string",
  114. "city": "string",
  115. "projectname": "string",
  116. "projectcode": "string",
  117. "buyer": "string",
  118. "s_winner": "string",
  119. "budget": "float",
  120. "bidamount": "float",
  121. }
  122. dataArr1, _ := ul.PyMgo.Find("standard_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
  123. dataArr2, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
  124. log.Debug("查询数量:", len(dataArr1), len(dataArr2))
  125. biaozhu, check_exclude, exclude_all := creat(dataArr1, false) //标注数据···
  126. deepseek, _, _ := creat(dataArr2, false)
  127. dataArr1 = nil
  128. dataArr2 = nil
  129. //计数
  130. tj_deepseek := duibi(fields, biaozhu, deepseek, check_exclude, exclude_all)
  131. log.Debug("...................")
  132. arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
  133. for _, v := range arr {
  134. t2, s2 := tj_deepseek[v]["total"], tj_deepseek[v]["same"]
  135. f2 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
  136. log.Debug(f2)
  137. }
  138. }
  139. // 构建数据
  140. func creat(dataArr []map[string]interface{}, zhipu bool) (map[string]map[string]interface{}, map[string]map[string]interface{}, map[string]interface{}) {
  141. dict := map[string]map[string]interface{}{}
  142. check_exclude := map[string]map[string]interface{}{}
  143. exclude_all := map[string]interface{}{}
  144. for _, biaozhu := range dataArr {
  145. tmpid := ul.BsonTOStringId(biaozhu["_id"])
  146. if biaozhu["check_exclude"] != nil {
  147. check_exclude[tmpid] = *qu.ObjToMap(biaozhu["check_exclude"])
  148. }
  149. if biaozhu["exclude_all"] != nil {
  150. exclude_all[tmpid] = qu.IntAll(biaozhu["exclude_all"])
  151. }
  152. if zhipu {
  153. if biaozhu["ai_deepseek"] != nil {
  154. biaozhu = *qu.ObjToMap(biaozhu["ai_deepseek"])
  155. }
  156. toptype := qu.ObjToString(biaozhu["s_toptype"])
  157. subtype := qu.ObjToString(biaozhu["s_subtype"])
  158. area := qu.ObjToString(biaozhu["s_area"])
  159. city := qu.ObjToString(biaozhu["s_city"])
  160. projectname := qu.ObjToString(biaozhu["s_projectname"])
  161. projectcode := qu.ObjToString(biaozhu["s_projectcode"])
  162. budget := qu.Float64All(biaozhu["s_budget"])
  163. bidamount := qu.Float64All(biaozhu["s_bidamount"])
  164. buyer := qu.ObjToString(biaozhu["s_buyer"])
  165. s_winner := qu.ObjToString(biaozhu["s_winner"])
  166. info := map[string]interface{}{}
  167. info["toptype"] = toptype
  168. info["subtype"] = subtype
  169. info["area"] = area
  170. info["city"] = city
  171. info["projectname"] = projectname
  172. info["projectcode"] = projectcode
  173. info["budget"] = budget
  174. info["bidamount"] = bidamount
  175. info["buyer"] = buyer
  176. info["s_winner"] = s_winner
  177. dict[tmpid] = info
  178. } else {
  179. toptype := qu.ObjToString(biaozhu["toptype"])
  180. subtype := qu.ObjToString(biaozhu["subtype"])
  181. area := qu.ObjToString(biaozhu["area"])
  182. city := qu.ObjToString(biaozhu["city"])
  183. projectname := qu.ObjToString(biaozhu["projectname"])
  184. projectcode := qu.ObjToString(biaozhu["projectcode"])
  185. budget := qu.Float64All(biaozhu["budget"])
  186. bidamount := qu.Float64All(biaozhu["bidamount"])
  187. buyer := qu.ObjToString(biaozhu["buyer"])
  188. s_winner := qu.ObjToString(biaozhu["s_winner"])
  189. info := map[string]interface{}{}
  190. info["toptype"] = toptype
  191. info["subtype"] = subtype
  192. info["area"] = area
  193. info["city"] = city
  194. info["projectname"] = projectname
  195. info["projectcode"] = projectcode
  196. info["budget"] = budget
  197. info["bidamount"] = bidamount
  198. info["buyer"] = buyer
  199. info["s_winner"] = s_winner
  200. dict[tmpid] = info
  201. }
  202. }
  203. return dict, check_exclude, exclude_all
  204. }
  205. func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}, check_exclude map[string]map[string]interface{}, exclude_all map[string]interface{}) map[string]map[string]int {
  206. //计数
  207. tj := map[string]map[string]int{}
  208. for tmpid, tmp := range source {
  209. bz := biaozhu[tmpid]
  210. exclude := check_exclude[tmpid]
  211. if qu.IntAll(exclude_all[tmpid]) == 1 {
  212. continue //整条过滤
  213. }
  214. for filed, typeof := range fields {
  215. if exclude[filed] != nil {
  216. continue
  217. }
  218. nums := tj[filed]
  219. if nums == nil {
  220. nums = map[string]int{}
  221. }
  222. if typeof == "string" {
  223. b_value := qu.ObjToString(bz[filed])
  224. s_value := qu.ObjToString(tmp[filed])
  225. //字符串通用转换
  226. b_value, s_value = c(b_value), c(s_value)
  227. if b_value == "" && s_value == "" {
  228. } else {
  229. nums["total"] = qu.IntAll(nums["total"]) + 1
  230. if b_value == s_value {
  231. nums["same"] = qu.IntAll(nums["same"]) + 1
  232. } else {
  233. if filed == "buyer" {
  234. //log.Debug("标注:", b_value, "~", "模板:", s_value)
  235. }
  236. }
  237. }
  238. } else if typeof == "float" {
  239. b_value := qu.Float64All(bz[filed])
  240. s_value := qu.Float64All(tmp[filed])
  241. if b_value == 0.0 && s_value == 0.0 {
  242. } else {
  243. nums["total"] = qu.IntAll(nums["total"]) + 1
  244. if b_value == s_value {
  245. nums["same"] = qu.IntAll(nums["same"]) + 1
  246. } else {
  247. if filed == "budget" {
  248. //log.Debug(fmt.Sprintf("%f", b_value), "~", fmt.Sprintf("%f", s_value), "~", tmpid)
  249. }
  250. }
  251. }
  252. } else {
  253. }
  254. tj[filed] = nums
  255. }
  256. }
  257. return tj
  258. }
  259. func update1() {
  260. dataArr, _ := ul.BidMgo.Find("zktest_deepseek_0124", map[string]interface{}{}, nil, map[string]interface{}{})
  261. for _, v := range dataArr {
  262. //tmpid := ul.BsonTOStringId(v["_id"])
  263. if v["ai_zhipu"] != nil {
  264. ai_zhipu := *qu.ObjToMap(v["ai_zhipu"])
  265. if ai_zhipu["s_pkg"] != nil {
  266. s_pkg := *qu.ObjToMap(ai_zhipu["s_pkg"])
  267. s_budget := qu.Float64All(s_pkg["s_budget"])
  268. s_bidamount := qu.Float64All(s_pkg["s_bidamount"])
  269. s_winner := qu.ObjToString(s_pkg["s_winner"])
  270. if s_budget > 0.0 && s_budget > qu.Float64All(ai_zhipu["s_budget"]) {
  271. ai_zhipu["s_budget"] = s_budget
  272. }
  273. if s_bidamount > 0.0 && s_bidamount > qu.Float64All(ai_zhipu["s_bidamount"]) {
  274. ai_zhipu["s_bidamount"] = s_bidamount
  275. }
  276. if s_winner != "" {
  277. ai_zhipu["s_winner"] = s_winner
  278. }
  279. }
  280. ul.BidMgo.Save("zktest_deepseek_0124_1", map[string]interface{}{
  281. "ai_zhipu": ai_zhipu,
  282. "_id": v["_id"],
  283. })
  284. }
  285. }
  286. log.Debug("is over ...")
  287. }
  288. func export1() {
  289. dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  290. pool_mgo := make(chan bool, 1)
  291. wg_mgo := &sync.WaitGroup{}
  292. for _, v := range dataArr {
  293. pool_mgo <- true
  294. wg_mgo.Add(1)
  295. go func(tmp map[string]interface{}) {
  296. defer func() {
  297. <-pool_mgo
  298. wg_mgo.Done()
  299. }()
  300. tmpid := ul.BsonTOStringId(v["_id"])
  301. data := ul.BidMgo.FindById("bidding", tmpid)
  302. if len(data) == 0 || data == nil {
  303. log.Debug("异常")
  304. }
  305. ul.BidMgo.Save("zktest_sample_data_source_4", data)
  306. }(v)
  307. }
  308. wg_mgo.Wait()
  309. log.Debug("is over ...")
  310. }
  311. // 替换字符串数据
  312. func c(s string) string {
  313. s = strings.ReplaceAll(s, "(", "(")
  314. s = strings.ReplaceAll(s, ")", ")")
  315. s = strings.ReplaceAll(s, ",", ",")
  316. s = strings.ReplaceAll(s, " ", "")
  317. s = strings.ReplaceAll(s, "、", "")
  318. return s
  319. }
  320. func post1(data map[string]interface{}) map[string]interface{} {
  321. info := map[string]interface{}{}
  322. client := &http.Client{Timeout: 2 * time.Second}
  323. jsonStr, _ := json.Marshal(data)
  324. resp, err := client.Post("http://127.0.0.1:12321/clean/deepseek", "application/json", bytes.NewBuffer(jsonStr))
  325. if err != nil {
  326. return info
  327. }
  328. res, err := io.ReadAll(resp.Body)
  329. if err != nil {
  330. return info
  331. }
  332. err = json.Unmarshal(res, &info)
  333. if err != nil {
  334. return info
  335. }
  336. return info
  337. }