from pymongo import MongoClient from bson.objectid import ObjectId from util.mogodb_helper import MongoDBInterface # 调整导入以正确引用文件名 # 参考库配置 ref_config = { "ip_port": "127.0.0.1:27088", "user": "viewdata", "password": "viewdata", "db": "qfw", "collection": "zktest_data_info_0307" } # 对比库配置 comp_config = { "ip_port": "127.0.0.1:27088", "user": "viewdata", "password": "viewdata", "db": "qfw", "collection": "bidding" } # 最终存储库配置 final_config = { "host": "192.168.3.206", "port": 27080, "dbname": "data_quality", "collection_name": "fields_optimize" } # 连接到MongoDB(参考库和对比库使用MongoDBInterface类) ref_mdb = MongoDBInterface(ref_config) comp_mdb = MongoDBInterface(comp_config) final_client = MongoClient(final_config["host"], final_config["port"], socketTimeoutMS=30000) final_db = final_client[final_config["dbname"]] final_collection = final_db[final_config["collection_name"]] # 比较字段 fields_to_compare = ['area', 'city', 'district'] # 获取参考库集合中的所有文档 ref_collection_name = ref_config['collection'] comp_collection_name = comp_config['collection'] for ref_doc in ref_mdb.db[ref_collection_name].find(): # 使用pymongo的find()方法 comp_doc = comp_mdb.find_by_id(comp_collection_name, ref_doc['_id']) # 使用MongoDBInterface方法 if not comp_doc: # 如果在对比库中找不到文档,可能需要记录或处理 continue differences = {} for field in fields_to_compare: if ref_doc.get(field) != comp_doc.get(field): differences[field] = {'ref': ref_doc.get(field), 'comp': comp_doc.get(field)} if differences: # 如果有差异,将它们添加到最终的集合中 final_collection.insert_one({ 'id': ref_doc['_id'], # 保留原始的_id作为id字段 'ref_doc': ref_doc, 'comp_doc': comp_doc, 'differences': differences }) # 断开数据库连接 ref_mdb.disconnect() comp_mdb.disconnect() final_client.close()