|
@@ -14,6 +14,7 @@ import requests
|
|
from requests.adapters import HTTPAdapter
|
|
from requests.adapters import HTTPAdapter
|
|
from requests.cookies import RequestsCookieJar
|
|
from requests.cookies import RequestsCookieJar
|
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
|
|
|
+from requests.packages.urllib3.util.ssl_ import create_urllib3_context
|
|
|
|
|
|
import feapder.setting as setting
|
|
import feapder.setting as setting
|
|
import feapder.utils.tools as tools
|
|
import feapder.utils.tools as tools
|
|
@@ -27,6 +28,29 @@ from feapder.utils.webdriver import WebDriverPool
|
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
|
|
|
|
|
|
|
|
|
|
|
+class DESAdapter(HTTPAdapter):
|
|
|
|
+
|
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ A TransportAdapter that re-enables 3DES support in Requests.
|
|
|
|
+ """
|
|
|
|
+ ciphers = ":".join(setting.JA3_REQUEST_CIPHERS).split(':')
|
|
|
|
+ tools.random.shuffle(ciphers)
|
|
|
|
+ ciphers = ':'.join(ciphers)
|
|
|
|
+ self.ciphers = ciphers + ':!aNULL:!eNULL:!MD5'
|
|
|
|
+ super().__init__(*args, **kwargs)
|
|
|
|
+
|
|
|
|
+ def init_poolmanager(self, *args, **kwargs):
|
|
|
|
+ context = create_urllib3_context(ciphers=self.ciphers)
|
|
|
|
+ kwargs['ssl_context'] = context
|
|
|
|
+ return super(DESAdapter, self).init_poolmanager(*args, **kwargs)
|
|
|
|
+
|
|
|
|
+ def proxy_manager_for(self, *args, **kwargs):
|
|
|
|
+ context = create_urllib3_context(ciphers=self.ciphers)
|
|
|
|
+ kwargs['ssl_context'] = context
|
|
|
|
+ return super(DESAdapter, self).proxy_manager_for(*args, **kwargs)
|
|
|
|
+
|
|
|
|
+
|
|
class Request(object):
|
|
class Request(object):
|
|
session = None
|
|
session = None
|
|
webdriver_pool: WebDriverPool = None
|
|
webdriver_pool: WebDriverPool = None
|
|
@@ -67,6 +91,7 @@ class Request(object):
|
|
auto_request=True,
|
|
auto_request=True,
|
|
request_sync=False,
|
|
request_sync=False,
|
|
use_session=None,
|
|
use_session=None,
|
|
|
|
+ use_ja3_session=None,
|
|
random_user_agent=True,
|
|
random_user_agent=True,
|
|
download_midware=None,
|
|
download_midware=None,
|
|
is_abandoned=False,
|
|
is_abandoned=False,
|
|
@@ -85,6 +110,7 @@ class Request(object):
|
|
auto_request=True,
|
|
auto_request=True,
|
|
request_sync=False,
|
|
request_sync=False,
|
|
use_session=None,
|
|
use_session=None,
|
|
|
|
+ use_ja3_session=None,
|
|
random_user_agent=True,
|
|
random_user_agent=True,
|
|
download_midware=None,
|
|
download_midware=None,
|
|
is_abandoned=False,
|
|
is_abandoned=False,
|
|
@@ -108,6 +134,7 @@ class Request(object):
|
|
@param auto_request: 是否需要自动请求下载网页 默认是。设置为False时返回的response为空,需要自己去请求网页
|
|
@param auto_request: 是否需要自动请求下载网页 默认是。设置为False时返回的response为空,需要自己去请求网页
|
|
@param request_sync: 是否同步请求下载网页,默认异步。如果该请求url过期时间快,可设置为True,相当于yield的reqeust会立即响应,而不是去排队
|
|
@param request_sync: 是否同步请求下载网页,默认异步。如果该请求url过期时间快,可设置为True,相当于yield的reqeust会立即响应,而不是去排队
|
|
@param use_session: 是否使用session方式
|
|
@param use_session: 是否使用session方式
|
|
|
|
+ @param use_ja3_session: 是否使用ja3_session方式
|
|
@param random_user_agent: 是否随机User-Agent (True/False) 当setting中的RANDOM_HEADERS设置为True时该参数生效 默认True
|
|
@param random_user_agent: 是否随机User-Agent (True/False) 当setting中的RANDOM_HEADERS设置为True时该参数生效 默认True
|
|
@param download_midware: 下载中间件。默认为parser中的download_midware
|
|
@param download_midware: 下载中间件。默认为parser中的download_midware
|
|
@param is_abandoned: 当发生异常时是否放弃重试 True/False. 默认False
|
|
@param is_abandoned: 当发生异常时是否放弃重试 True/False. 默认False
|
|
@@ -149,6 +176,7 @@ class Request(object):
|
|
self.auto_request = auto_request
|
|
self.auto_request = auto_request
|
|
self.request_sync = request_sync
|
|
self.request_sync = request_sync
|
|
self.use_session = use_session
|
|
self.use_session = use_session
|
|
|
|
+ self.use_ja3_session = use_ja3_session
|
|
self.random_user_agent = random_user_agent
|
|
self.random_user_agent = random_user_agent
|
|
self.download_midware = download_midware
|
|
self.download_midware = download_midware
|
|
self.is_abandoned = is_abandoned
|
|
self.is_abandoned = is_abandoned
|
|
@@ -192,12 +220,23 @@ class Request(object):
|
|
use_session = (
|
|
use_session = (
|
|
setting.USE_SESSION if self.use_session is None else self.use_session
|
|
setting.USE_SESSION if self.use_session is None else self.use_session
|
|
) # self.use_session 优先级高
|
|
) # self.use_session 优先级高
|
|
|
|
+ use_ja3_session = (
|
|
|
|
+ setting.USE_JA3_SESSION if self.use_ja3_session is None else self.use_ja3_session
|
|
|
|
+ ) # self.use_ja3_session 优先级高
|
|
|
|
+ use_session = use_session or use_ja3_session
|
|
if use_session and not self.__class__.session:
|
|
if use_session and not self.__class__.session:
|
|
self.__class__.session = requests.Session()
|
|
self.__class__.session = requests.Session()
|
|
- # pool_connections – 缓存的 urllib3 连接池个数 pool_maxsize – 连接池中保存的最大连接数
|
|
|
|
- http_adapter = HTTPAdapter(pool_connections=1000, pool_maxsize=1000)
|
|
|
|
- # 任何使用该session会话的 HTTP 请求,只要其 URL 是以给定的前缀开头,该传输适配器就会被使用到。
|
|
|
|
- self.__class__.session.mount("http", http_adapter)
|
|
|
|
|
|
+ if use_ja3_session:
|
|
|
|
+ # pool_connections – 缓存的 urllib3 连接池个数 pool_maxsize – 连接池中保存的最大连接数
|
|
|
|
+ des_adapter = DESAdapter(pool_connections=1000, pool_maxsize=1000)
|
|
|
|
+ # 任何使用该session会话的 HTTP/HTTPS 请求,只要其 URL 是以给定的前缀开头,该传输适配器就会被使用到。
|
|
|
|
+ self.__class__.session.mount("https://", des_adapter)
|
|
|
|
+ self.__class__.session.mount("http://", des_adapter)
|
|
|
|
+ else:
|
|
|
|
+ # pool_connections – 缓存的 urllib3 连接池个数 pool_maxsize – 连接池中保存的最大连接数
|
|
|
|
+ http_adapter = HTTPAdapter(pool_connections=1000, pool_maxsize=1000)
|
|
|
|
+ # 任何使用该session会话的 HTTP 请求,只要其 URL 是以给定的前缀开头,该传输适配器就会被使用到。
|
|
|
|
+ self.__class__.session.mount("http", http_adapter)
|
|
|
|
|
|
return self.__class__.session
|
|
return self.__class__.session
|
|
|
|
|
|
@@ -339,6 +378,10 @@ class Request(object):
|
|
use_session = (
|
|
use_session = (
|
|
setting.USE_SESSION if self.use_session is None else self.use_session
|
|
setting.USE_SESSION if self.use_session is None else self.use_session
|
|
) # self.use_session 优先级高
|
|
) # self.use_session 优先级高
|
|
|
|
+ use_ja3_session = (
|
|
|
|
+ setting.USE_JA3_SESSION if self.use_ja3_session is None else self.use_ja3_session
|
|
|
|
+ ) # self.use_ja3_session 优先级高
|
|
|
|
+ use_session = use_session or use_ja3_session
|
|
|
|
|
|
if self.render:
|
|
if self.render:
|
|
# 使用request的user_agent、cookies、proxy
|
|
# 使用request的user_agent、cookies、proxy
|