12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- from util.mysql_tool import MysqlUtil
- import pandas as pd
- from datetime import datetime,timedelta
- # 连接到MySQL数据库
- conn = MysqlUtil.connect_to_mysql(host='192.168.3.217', port='4000', user='root', password='=PDT49#80Z!RVv52_z',database='quality')
- cursor = conn.cursor()
- # 获取当天的日期,作为batch字段
- today_date = datetime.now().strftime('%Y-%m-%d')
- # 获取当前时间
- now = datetime.now()
- current_datetime = now.strftime("%Y-%m-%d %H:%M:%S")
- # 获取今天的日期
- today = datetime.today()
- # 获取昨天的日期
- yesterday = today - timedelta(days=1)
- # 获取昨天0点的时间
- yesterday_midnight = datetime(yesterday.year, yesterday.month, yesterday.day)
- # 获取今天0点的时间
- today_midnight = datetime(today.year, today.month, today.day)
- # 转换为Unix时间戳
- start_date = int(yesterday_midnight.timestamp())
- end_date = int(today_midnight.timestamp())
- # 查询去重后的site, channel和spidercode
- query = """
- SELECT DISTINCT site, channel, spidercode
- FROM bid_analysis where comeintime< end_date ;
- """
- cursor.execute(query)
- # 获取查询结果
- data = cursor.fetchall()
- # 构建DataFrame,用于处理和插入
- statistics_data = []
- for row in data:
- site, channel, spidercode = row
- statistics_data.append({
- 'batch': today_date,
- 'site': site,
- 'channel': channel,
- 'spidercode': spidercode,
- 'created_at': today_date # 使用当天日期作为创建时间
- })
- # 将数据转换为pandas DataFrame
- df = pd.DataFrame(statistics_data)
- # 准备插入statistics表的SQL语句
- insert_query = """
- INSERT INTO statistics (batch, site, channel, spidercode, created_at)
- VALUES (%s, %s, %s, %s, %s);
- """
- # 将DataFrame的数据插入到数据库
- for index, row in df.iterrows():
- cursor.execute(insert_query, (row['batch'], row['site'], row['channel'], row['spidercode'], row['created_at']))
- # 提交事务
- conn.commit()
- # 关闭数据库连接
- cursor.close()
- conn.close()
- print(f"成功将 {len(df)} 条记录插入到statistics表中.")
|