12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- from pymongo import MongoClient
- from bson.objectid import ObjectId
- from util.mogodb_helper import MongoDBInterface # 调整导入以正确引用文件名
- # 参考库配置
- ref_config = {
- "ip_port": "127.0.0.1:27088",
- "user": "viewdata",
- "password": "viewdata",
- "db": "qfw",
- "collection": "zktest_data_info_0307"
- }
- # 对比库配置
- comp_config = {
- "ip_port": "127.0.0.1:27088",
- "user": "viewdata",
- "password": "viewdata",
- "db": "qfw",
- "collection": "bidding"
- }
- # 最终存储库配置
- final_config = {
- "host": "192.168.3.206",
- "port": 27080,
- "dbname": "data_quality",
- "collection_name": "fields_optimize"
- }
- # 连接到MongoDB(参考库和对比库使用MongoDBInterface类)
- ref_mdb = MongoDBInterface(ref_config)
- comp_mdb = MongoDBInterface(comp_config)
- final_client = MongoClient(final_config["host"], final_config["port"], socketTimeoutMS=30000)
- final_db = final_client[final_config["dbname"]]
- final_collection = final_db[final_config["collection_name"]]
- # 比较字段
- fields_to_compare = ['area', 'city', 'district']
- # 获取参考库集合中的所有文档
- ref_collection_name = ref_config['collection']
- comp_collection_name = comp_config['collection']
- for ref_doc in ref_mdb.db[ref_collection_name].find(): # 使用pymongo的find()方法
- comp_doc = comp_mdb.find_by_id(comp_collection_name, ref_doc['_id']) # 使用MongoDBInterface方法
- if not comp_doc:
- # 如果在对比库中找不到文档,可能需要记录或处理
- continue
- differences = {}
- for field in fields_to_compare:
- if ref_doc.get(field) != comp_doc.get(field):
- differences[field] = {'ref': ref_doc.get(field), 'comp': comp_doc.get(field)}
- if differences:
- # 如果有差异,将它们添加到最终的集合中
- final_collection.insert_one({
- 'id': ref_doc['_id'], # 保留原始的_id作为id字段
- 'ref_doc': ref_doc,
- 'comp_doc': comp_doc,
- 'differences': differences
- })
- # 断开数据库连接
- ref_mdb.disconnect()
- comp_mdb.disconnect()
- final_client.close()
|