# 导入必要的库 from pymongo import MongoClient import pandas as pd import pytz # 用于处理时区 def export_to_excel(db_name, collection_name, fields, output_file): """ 从MongoDB导出特定字段到Excel文件。 参数: - db_name: 数据库名称 - collection_name: 集合名称 - fields: 要导出的字段列表(例如 ['name', 'age']) - output_file: 输出的Excel文件名 """ # 连接到MongoDB # client = MongoClient('172.20.45.129', 27002, unicode_decode_error_handler="ignore") client = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True) # 修改为你的连接地址 # client = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True) # 清洗库 db = client[db_name] collection = db[collection_name] # 构建查询和投影 projection = {field: 1 for field in fields} # 查询数据 data = collection.find({ "tag_new" : 1 },projection).sort("_id", 1) # 将数据转换为DataFrame df = pd.DataFrame(list(data)) # 转换时间戳字段 time_fields = ['bidopentime', 'bidendtime'] for field in time_fields: if field in df.columns: # 1. 转换为 datetime 对象(秒级时间戳) df[field] = pd.to_datetime(df[field], unit='s', errors='coerce') # 2. 转换为北京时间(UTC+8) beijing_tz = pytz.timezone('Asia/Shanghai') df[field] = df[field].dt.tz_localize('UTC').dt.tz_convert(beijing_tz) # 3. 去掉时区信息,保留纯时间(否则Excel可能显示异常) df[field] = df[field].dt.tz_localize(None) # 导出到Excel文件 df.to_excel(output_file, index=False) if __name__ == "__main__": # 连接到 MongoDB db_name = 'jyqyfw' # 替换为你的数据库名称 # collection_name = 'standard_sample_data_all_ai' # 替换为你的集合名称 # collection_name = 'xzh_20250714' # 替换为你的集合名称 collection_name = 'usermail_Unicom_0714_1' # 替换为你的集合名称 # 定义参数 fields = ['_id','id','site','toptype','subtype','area','city','buyer','projectname','projectcode','budget','s_winner','bidamount','label','href','jybxhref','package','bidopentime', 'bidendtime','pkg_tag'] # 替换为你需要导出的字段 # fields = ['id','site','toptype','subtype','area','city','buyer','projectname','projectcode','budget','s_winner','bidamount','label','href','jybxhref'] # 替换为你需要导出的字段 # fields = ['id','title','projectname','href','projectcode','subtype','s_winner','bidamount','buyer'] # 替换为你需要导出的字段 # fields = ['id','toptype','subtype','title','owner','total_investment','project_startdate','project_completedate','construction_area','floor_area','projectaddr','project_scale','approval_date','note','href','jytest_href'] # 替换为你需要导出的字段 output_file = 'output.xlsx' # 调用函数导出数据 export_to_excel(db_name, collection_name, fields, output_file) print(f"数据已成功导出到 {output_file}")