spider_statistics.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. from util.mysql_tool import MysqlUtil
  2. import pandas as pd
  3. from datetime import datetime,timedelta
  4. # 连接到MySQL数据库
  5. conn = MysqlUtil.connect_to_mysql(host='192.168.3.217', port='4000', user='root', password='=PDT49#80Z!RVv52_z',database='quality')
  6. cursor = conn.cursor()
  7. # 获取当天的日期,作为batch字段
  8. today_date = datetime.now().strftime('%Y-%m-%d')
  9. # 获取当前时间
  10. now = datetime.now()
  11. current_datetime = now.strftime("%Y-%m-%d %H:%M:%S")
  12. # 获取今天的日期
  13. today = datetime.today()
  14. # 获取昨天的日期
  15. yesterday = today - timedelta(days=1)
  16. # 获取昨天0点的时间
  17. yesterday_midnight = datetime(yesterday.year, yesterday.month, yesterday.day)
  18. # 获取今天0点的时间
  19. today_midnight = datetime(today.year, today.month, today.day)
  20. # 转换为Unix时间戳
  21. start_date = int(yesterday_midnight.timestamp())
  22. end_date = int(today_midnight.timestamp())
  23. # 查询去重后的site, channel和spidercode
  24. query = """
  25. SELECT DISTINCT site, channel, spidercode
  26. FROM bid_analysis where comeintime< end_date ;
  27. """
  28. cursor.execute(query)
  29. # 获取查询结果
  30. data = cursor.fetchall()
  31. # 构建DataFrame,用于处理和插入
  32. statistics_data = []
  33. for row in data:
  34. site, channel, spidercode = row
  35. statistics_data.append({
  36. 'batch': today_date,
  37. 'site': site,
  38. 'channel': channel,
  39. 'spidercode': spidercode,
  40. 'created_at': today_date # 使用当天日期作为创建时间
  41. })
  42. # 将数据转换为pandas DataFrame
  43. df = pd.DataFrame(statistics_data)
  44. # 准备插入statistics表的SQL语句
  45. insert_query = """
  46. INSERT INTO statistics (batch, site, channel, spidercode, created_at)
  47. VALUES (%s, %s, %s, %s, %s);
  48. """
  49. # 将DataFrame的数据插入到数据库
  50. for index, row in df.iterrows():
  51. cursor.execute(insert_query, (row['batch'], row['site'], row['channel'], row['spidercode'], row['created_at']))
  52. # 提交事务
  53. conn.commit()
  54. # 关闭数据库连接
  55. cursor.close()
  56. conn.close()
  57. print(f"成功将 {len(df)} 条记录插入到statistics表中.")