|
@@ -32,7 +32,7 @@ class Details(feapder.Spider):
|
|
|
|
|
|
def start_requests(self):
|
|
def start_requests(self):
|
|
while True:
|
|
while True:
|
|
- data_lsit = self.to_db.find(self.db_name,{"parser_name":"details_webcookie"},sort={"date":-1},limit=1)
|
|
|
|
|
|
+ data_lsit = self.to_db.find(self.db_name,{"parser_name":"details_webcookie"},sort={"date":-1},limit=100)
|
|
for item in data_lsit:
|
|
for item in data_lsit:
|
|
request_params = item.get("request_params")
|
|
request_params = item.get("request_params")
|
|
|
|
|
|
@@ -120,7 +120,7 @@ class Details(feapder.Spider):
|
|
cookie_pool = WebCookiePool(redis_key=key, page_url=page_url, cookie_key=cookie_key)
|
|
cookie_pool = WebCookiePool(redis_key=key, page_url=page_url, cookie_key=cookie_key)
|
|
cookie_pool.del_cookie(request.cookies)
|
|
cookie_pool.del_cookie(request.cookies)
|
|
yield request
|
|
yield request
|
|
- if response.code in (request.down_mid.get("code")):
|
|
|
|
|
|
+ elif response.status_code in request.down_mid.get("code"):
|
|
'''失败处理,response——code不为正确的状态码时,删除当前cookie并重新生产cookie'''
|
|
'''失败处理,response——code不为正确的状态码时,删除当前cookie并重新生产cookie'''
|
|
down_mid = copy.copy(request.down_mid)
|
|
down_mid = copy.copy(request.down_mid)
|
|
key = down_mid.get("key")
|
|
key = down_mid.get("key")
|
|
@@ -129,15 +129,13 @@ class Details(feapder.Spider):
|
|
cookie_pool = WebCookiePool(redis_key=key, page_url=page_url, cookie_key=cookie_key)
|
|
cookie_pool = WebCookiePool(redis_key=key, page_url=page_url, cookie_key=cookie_key)
|
|
cookie_pool.del_cookie(request.cookies)
|
|
cookie_pool.del_cookie(request.cookies)
|
|
yield request
|
|
yield request
|
|
- items = request.item
|
|
|
|
- list_item = DataBakItem()
|
|
|
|
- for key in items:
|
|
|
|
- list_item.__setitem__(key,items[key])
|
|
|
|
- html = ''
|
|
|
|
- exec(request.deal_detail)
|
|
|
|
-
|
|
|
|
- list_item.contenthtml = html
|
|
|
|
- yield list_item
|
|
|
|
|
|
+ else:
|
|
|
|
+ items = request.item
|
|
|
|
+ list_item = DataBakItem()
|
|
|
|
+ for key in items:
|
|
|
|
+ list_item.__setitem__(key,items[key])
|
|
|
|
+ exec(request.deal_detail)
|
|
|
|
+ yield list_item
|
|
|
|
|
|
def failed_request(self, request, response):
|
|
def failed_request(self, request, response):
|
|
'''请求、解析次数超过上限后,将原信息重新保存至mongo,并修改failed字段'''
|
|
'''请求、解析次数超过上限后,将原信息重新保存至mongo,并修改failed字段'''
|