|
@@ -86,8 +86,8 @@ def insert_batch_data(conn, params):
|
|
|
"""
|
|
|
执行批量插入数据
|
|
|
"""
|
|
|
- query = """INSERT IGNORE INTO bid_analysis (mongoid, site, spidercode, comeintime, area, city, district, score, error_type, spider_modified_time)
|
|
|
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
|
|
|
+ query = """INSERT IGNORE INTO bid_analysis (mongoid,toptype,subtype, site, spidercode, channel,comeintime, area, city, district, score, error_type, spider_modified_time)
|
|
|
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s,%s,%s)"""
|
|
|
MysqlUtil.insert_data(conn, query, params)
|
|
|
|
|
|
def insert_dynamic_error_field(conn, cleaned_key, error_ids, mongoid):
|
|
@@ -199,8 +199,11 @@ def batch_load_data():
|
|
|
data = result.get("data", {})
|
|
|
|
|
|
# 数据插入到 MySQL
|
|
|
+ toptype = item.get("toptype", "")
|
|
|
+ subtype = item.get("subtype", "")
|
|
|
site = item.get("site", "")
|
|
|
spidercode = item.get("spidercode", "")
|
|
|
+ channel = item.get("channel", "")
|
|
|
comeintime = item.get("comeintime", "")
|
|
|
comeintime = datetime.fromtimestamp(comeintime)
|
|
|
area = item.get("area", "")
|
|
@@ -215,7 +218,7 @@ def batch_load_data():
|
|
|
spider_modified_time = info.get("modifytime", "")
|
|
|
spider_modified_time = datetime.fromtimestamp(spider_modified_time)
|
|
|
|
|
|
- params = (item["_id"], site, spidercode, comeintime, area, city, district, score, error_type_data,spider_modified_time)
|
|
|
+ params = (item["_id"], toptype, subtype, site, spidercode,channel, comeintime, area, city, district, score, error_type_data,spider_modified_time)
|
|
|
insert_batch_data(conn, params)
|
|
|
|
|
|
# 遍历错误原因字典并提取非空字典中的值
|
|
@@ -242,7 +245,7 @@ def batch_load_data():
|
|
|
# 获取下一批数据
|
|
|
search_after = hits[-1]["_id"] # 获取当前批次最后一条数据的 _id 作为下一批的起始点
|
|
|
es_query["search_after"] = [search_after] # 保持 _id 类型一致
|
|
|
- response = es_client.search(index="bidding", body=es_query, scroll="1m")
|
|
|
+ response = es_client.search(index="bidding", body=es_query, size="100")
|
|
|
hits = response['hits']['hits']
|
|
|
|
|
|
# 如果没有更多数据,跳出循环
|