|
@@ -2,19 +2,18 @@
|
|
|
"""爬虫配置文件"""
|
|
|
import os
|
|
|
|
|
|
-# redis 表名
|
|
|
# 列表任务表模版
|
|
|
TAB_REQUESTS = "{redis_key}:z_requests"
|
|
|
# 详情待处理任务表模版
|
|
|
TAB_ITEMS = "{redis_key}:z_items"
|
|
|
-# 任务失败模板
|
|
|
-TAB_FAILED_REQUESTS = "{redis_key}:z_failed_requests"
|
|
|
-# 数据保存失败模板
|
|
|
-TAB_FAILED_ITEMS = "{redis_key}:s_failed_items"
|
|
|
-# 爬虫状态表模版
|
|
|
-TAB_SPIDER_STATUS = "{redis_key}:z_spider_status"
|
|
|
-# 爬虫时间记录表
|
|
|
-TAB_SPIDER_TIME = "{redis_key}:h_spider_time"
|
|
|
+# 任务失败表
|
|
|
+TAB_FAILED_REQUESTS = os.getenv("TAB_FAILED_REQUESTS", "spider:z_failed_requests")
|
|
|
+# 数据保存失败表
|
|
|
+TAB_FAILED_ITEMS = os.getenv("TAB_FAILED_ITEMS", "spider:s_failed_items")
|
|
|
+# 任务状态记录表
|
|
|
+TASK_PROCESS_STATE = os.getenv("TASK_PROCESS_STATE", "spider:t_crawl_state")
|
|
|
+# 失败任务记录表
|
|
|
+TASK_FAILED = os.getenv("TASK_FAILED", "spider_listdata_err")
|
|
|
|
|
|
# MYSQL
|
|
|
MYSQL_IP = os.getenv("MYSQL_IP")
|
|
@@ -94,7 +93,7 @@ RETRY_FAILED_REQUESTS = False
|
|
|
# 爬虫启动时,重新入库失败的item
|
|
|
RETRY_FAILED_ITEMS = False
|
|
|
# 保存失败的request
|
|
|
-SAVE_FAILED_REQUEST = True
|
|
|
+SAVE_FAILED_REQUEST = False
|
|
|
# request防丢机制。(指定的REQUEST_LOST_TIMEOUT时间内request还没做完,会重新下发 重做)
|
|
|
REQUEST_LOST_TIMEOUT = 600 # 10分钟
|
|
|
# request网络请求超时时间
|