12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- import pandas as pd
- import pymysql
- import logging
- # 设置日志记录
- logging.basicConfig(filename='insert_errors.log', level=logging.ERROR, format='%(asctime)s - %(message)s')
- # 读取Excel文件
- data = pd.read_excel('标准样本数据汇总.xlsx', engine='openpyxl', keep_default_na=False, na_values=[''])
- # 替换空值为None
- data = data.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
- # 连接数据库
- try:
- connection = pymysql.connect(
- host='192.168.3.217',
- user='root',
- password='=PDT49#80Z!RVv52_z',
- database='quality',
- port=4000,
- connect_timeout=60
- )
- cursor = connection.cursor()
- sql = """
- INSERT INTO bid_llizhikun
- (_id, site, toptype, subtype, area, city, buyer, projectname, projectcode, budget,
- s_winner, bidamount, multipackage, label, href, jyhref)
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
- """
- # 批量插入
- batch_size = 100
- total_inserted = 0
- for i in range(0, len(data), batch_size):
- batch_data = data.iloc[i:i + batch_size]
- try:
- cursor.executemany(sql, [tuple(row) for _, row in batch_data.iterrows()])
- connection.commit()
- total_inserted += len(batch_data)
- print(f"成功插入 {len(batch_data)} 行, 总计: {total_inserted}")
- except Exception as e:
- logging.error(f"插入错误 (批次 {i}-{i + batch_size}): {e}")
- logging.error(f"失败的数据: {batch_data}")
- connection.rollback()
- print(f"数据插入完成: 成功插入 {total_inserted} 行")
- except Exception as e:
- logging.error(f"数据库连接失败: {e}")
- finally:
- if 'cursor' in locals():
- cursor.close()
- if 'connection' in locals():
- connection.close()
|