File_import_mysql.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import pandas as pd
  2. import pymysql
  3. import logging
  4. # 设置日志记录
  5. logging.basicConfig(filename='insert_errors.log', level=logging.ERROR, format='%(asctime)s - %(message)s')
  6. # 读取Excel文件
  7. data = pd.read_excel('标准样本数据汇总.xlsx', engine='openpyxl', keep_default_na=False, na_values=[''])
  8. # 替换空值为None
  9. data = data.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
  10. # 连接数据库
  11. try:
  12. connection = pymysql.connect(
  13. host='192.168.3.217',
  14. user='root',
  15. password='=PDT49#80Z!RVv52_z',
  16. database='quality',
  17. port=4000,
  18. connect_timeout=60
  19. )
  20. cursor = connection.cursor()
  21. sql = """
  22. INSERT INTO bid_llizhikun
  23. (_id, site, toptype, subtype, area, city, buyer, projectname, projectcode, budget,
  24. s_winner, bidamount, multipackage, label, href, jyhref)
  25. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  26. """
  27. # 批量插入
  28. batch_size = 100
  29. total_inserted = 0
  30. for i in range(0, len(data), batch_size):
  31. batch_data = data.iloc[i:i + batch_size]
  32. try:
  33. cursor.executemany(sql, [tuple(row) for _, row in batch_data.iterrows()])
  34. connection.commit()
  35. total_inserted += len(batch_data)
  36. print(f"成功插入 {len(batch_data)} 行, 总计: {total_inserted}")
  37. except Exception as e:
  38. logging.error(f"插入错误 (批次 {i}-{i + batch_size}): {e}")
  39. logging.error(f"失败的数据: {batch_data}")
  40. connection.rollback()
  41. print(f"数据插入完成: 成功插入 {total_inserted} 行")
  42. except Exception as e:
  43. logging.error(f"数据库连接失败: {e}")
  44. finally:
  45. if 'cursor' in locals():
  46. cursor.close()
  47. if 'connection' in locals():
  48. connection.close()