|
@@ -110,18 +110,15 @@ class DebugSpider(Spider):
|
|
__debug_custom_setting__ = dict(
|
|
__debug_custom_setting__ = dict(
|
|
COLLECTOR_SLEEP_TIME=1,
|
|
COLLECTOR_SLEEP_TIME=1,
|
|
COLLECTOR_TASK_COUNT=1,
|
|
COLLECTOR_TASK_COUNT=1,
|
|
- # SPIDER
|
|
|
|
- SPIDER_THREAD_COUNT=1,
|
|
|
|
|
|
+ SPIDER_THREAD_COUNT=1, # SPIDER
|
|
SPIDER_SLEEP_TIME=0,
|
|
SPIDER_SLEEP_TIME=0,
|
|
SPIDER_TASK_COUNT=1,
|
|
SPIDER_TASK_COUNT=1,
|
|
SPIDER_MAX_RETRY_TIMES=10,
|
|
SPIDER_MAX_RETRY_TIMES=10,
|
|
REQUEST_LOST_TIMEOUT=600, # 10分钟
|
|
REQUEST_LOST_TIMEOUT=600, # 10分钟
|
|
PROXY_ENABLE=False,
|
|
PROXY_ENABLE=False,
|
|
RETRY_FAILED_REQUESTS=False,
|
|
RETRY_FAILED_REQUESTS=False,
|
|
- # 保存失败的request
|
|
|
|
- SAVE_FAILED_REQUEST=False,
|
|
|
|
- # 过滤
|
|
|
|
- ITEM_FILTER_ENABLE=False,
|
|
|
|
|
|
+ SAVE_FAILED_REQUEST=False, # 保存失败的request
|
|
|
|
+ ITEM_FILTER_ENABLE=False, # 过滤
|
|
REQUEST_FILTER_ENABLE=False,
|
|
REQUEST_FILTER_ENABLE=False,
|
|
OSS_UPLOAD_TABLES=(),
|
|
OSS_UPLOAD_TABLES=(),
|
|
DELETE_KEYS=True,
|
|
DELETE_KEYS=True,
|
|
@@ -335,13 +332,18 @@ class BaseBusinessDetailSpider(Spider):
|
|
failed_item.table_name = setting.TASK_FAILED
|
|
failed_item.table_name = setting.TASK_FAILED
|
|
yield failed_item
|
|
yield failed_item
|
|
|
|
|
|
- def consume_tasks(self, limit=100):
|
|
|
|
|
|
+ def consume_tasks(self, limit=None, auto_ack=True):
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ @param limit: 获取消息数量
|
|
|
|
+ @param auto_ack: 消息自动回复确认
|
|
|
|
+ """
|
|
task_lst = []
|
|
task_lst = []
|
|
queue_name = setting.TAB_ITEMS.format(
|
|
queue_name = setting.TAB_ITEMS.format(
|
|
redis_key=self._redis_key.replace('_detailc', '')
|
|
redis_key=self._redis_key.replace('_detailc', '')
|
|
)
|
|
)
|
|
limit = limit or setting.COLLECTOR_TASK_COUNT
|
|
limit = limit or setting.COLLECTOR_TASK_COUNT
|
|
- messages = self._rabbitmq.get(queue_name, limit, False, to_str=False)
|
|
|
|
|
|
+ messages = self._rabbitmq.get(queue_name, limit, auto_ack, to_str=False)
|
|
for message in messages:
|
|
for message in messages:
|
|
body = message.body
|
|
body = message.body
|
|
if isinstance(body, Item):
|
|
if isinstance(body, Item):
|