inspect_area.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. from bson import ObjectId
  2. from pymongo import MongoClient
  3. from BasicMethods.area_quality import au
  4. db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
  5. coll_user = db["bidding_20230707"]
  6. # coll_user_inserd = db["user_insder"]
  7. def pankong(key, item):
  8. if key in item and item[key]:
  9. value = item[key]
  10. else:
  11. value = ""
  12. return value
  13. #正确数量
  14. correct_count=0
  15. count = 0
  16. #find里面 单条数据用法:{"_id":ObjectId("64a8bb45990ffa1883accd78")}
  17. for item in coll_user.find().batch_size(1000):
  18. count += 1
  19. if count % 1000==0:
  20. print(count)
  21. title = pankong("title", item)
  22. #根据title抽取出来的省份
  23. expect_area1 = au.export_area(title)
  24. # print(expect_area1)
  25. # 根据buyer抽取出来的省份
  26. buyer = pankong("buyer", item)
  27. expect_area = au.export_area(buyer)
  28. # 根据buyeraddr抽取出来的省份
  29. buyeraddr = pankong("buyeraddr", item)
  30. expect_area4 = au.export_area(buyeraddr)
  31. # print(expect_area)
  32. #根据s_winner抽取出来的省份
  33. s_winner=pankong("s_winner",item)
  34. expect_area2 = au.export_area(s_winner)
  35. # 根据winneradder抽取出来的省份
  36. winneraddr = pankong("winneraddr", item)
  37. expect_area3 = au.export_area(winneraddr)
  38. # 根据agency抽取出来的省份
  39. agency = pankong("agency", item)
  40. expect_area5 = au.export_area(agency)
  41. # 根据agencyaddr抽取出来的省份
  42. agencyaddr = pankong("agencyaddr", item)
  43. expect_area6 = au.export_area(agencyaddr)
  44. #数据组抽取出来的省份,需要验证的字段
  45. tmp_area = pankong("area", item)
  46. #抽取出来的地区,通过函数在转换一遍
  47. actual_area = au.export_area(tmp_area)
  48. flag_buyer = 0
  49. flag_title = 0
  50. flag_s_winner = 0
  51. flag_winneraddr = 0
  52. flag_buyeraddr = 0
  53. flag_agency = 0
  54. flag_agencyaddr = 0
  55. if expect_area and actual_area and expect_area == actual_area:
  56. flag_buyer = 1
  57. if expect_area1 and actual_area and expect_area1 == actual_area:
  58. flag_title = 1
  59. if expect_area2 and actual_area and expect_area2 == actual_area:
  60. flag_s_winner = 1
  61. if expect_area3 and actual_area and expect_area3 == actual_area:
  62. flag_winneraddr = 1
  63. if expect_area4 and actual_area and expect_area4 == actual_area:
  64. flag_buyeraddr = 1
  65. if expect_area5 and actual_area and expect_area5 == actual_area:
  66. flag_agency = 1
  67. if expect_area6 and actual_area and expect_area6 == actual_area:
  68. flag_agencyaddr = 1
  69. if flag_buyer == 1 or flag_title == 1 or flag_s_winner == 1 or flag_winneraddr == 1 or flag_buyeraddr == 1 or flag_agency ==1 or flag_agencyaddr == 1:
  70. correct_count+=1
  71. coll_user.update_one({"_id":item["_id"]},{"$set":{"flag_buyer":flag_buyer,"flag_title":flag_title,"flag_s_winner":flag_s_winner,"flag_winneraddr":flag_winneraddr,"flag_buyeraddr":flag_buyeraddr,"flag_agency":flag_agency,"flag_agencyaddr":flag_agencyaddr}})
  72. # coll_user.update({"_id":item["_id"]},{"$set":{"flag":flag_title}})
  73. print(correct_count)