|
@@ -15,6 +15,7 @@ import feapder
|
|
|
import feapder.utils.tools as tools
|
|
|
from feapder import Item
|
|
|
from feapder.db.mongodb import MongoDB
|
|
|
+from feapder.network.proxy_pool import swordfish_proxy
|
|
|
from feapder.network.request import requests
|
|
|
from feapder.network.response import Response
|
|
|
from feapder.utils.cleaner import cleaner
|
|
@@ -88,7 +89,8 @@ class DetailSpider(feapder.AirSpider):
|
|
|
|
|
|
@property
|
|
|
def proxy(self):
|
|
|
- return pay_proxy()
|
|
|
+ # return pay_proxy()
|
|
|
+ return swordfish_proxy()
|
|
|
|
|
|
def get_response(self, request, response):
|
|
|
"""
|
|
@@ -281,18 +283,18 @@ class DetailSpider(feapder.AirSpider):
|
|
|
request.count = 0
|
|
|
yield request
|
|
|
else:
|
|
|
- # 清cookies切代理
|
|
|
- request.session.cookies.clear_session_cookies()
|
|
|
- self._proxies = self.proxy
|
|
|
- yield request
|
|
|
-
|
|
|
- # # 情况1.2、acw_3
|
|
|
- # self.ali_robots(request)
|
|
|
- # if request.count > 4:
|
|
|
- # log.error(f'阿里人机验证失败,尝试次数:{request.count}')
|
|
|
- # return
|
|
|
- # request.count += 1
|
|
|
+ # # 清cookies切代理
|
|
|
+ # request.session.cookies.clear_session_cookies()
|
|
|
+ # self._proxies = self.proxy
|
|
|
# yield request
|
|
|
+
|
|
|
+ # 情况1.2、acw_3
|
|
|
+ self.ali_robots(request)
|
|
|
+ if request.count > 4:
|
|
|
+ log.error(f'阿里人机验证失败,尝试次数:{request.count}')
|
|
|
+ return
|
|
|
+ request.count += 1
|
|
|
+ yield request
|
|
|
else:
|
|
|
contenthtml, state = self.extract_html(request, response)
|
|
|
# 删除页面中的图片或者base64
|