|
@@ -294,15 +294,14 @@ class Request(object):
|
|
|
|
|
|
# 代理
|
|
|
proxies = self.requests_kwargs.get("proxies", -1)
|
|
|
- if not self.render:
|
|
|
- if proxies == -1 and setting.PROXY_ENABLE and setting.PROXY_EXTRACT_API:
|
|
|
- while True:
|
|
|
- proxies = self.get_proxy()
|
|
|
- if proxies:
|
|
|
- self.requests_kwargs.update(proxies=proxies)
|
|
|
- break
|
|
|
- else:
|
|
|
- log.debug("暂无可用代理 ...")
|
|
|
+ if proxies == -1 and setting.PROXY_ENABLE and setting.PROXY_EXTRACT_API:
|
|
|
+ while True:
|
|
|
+ proxies = self.get_proxy()
|
|
|
+ if proxies:
|
|
|
+ self.requests_kwargs.update(proxies=proxies)
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ log.debug("暂无可用代理 ...")
|
|
|
|
|
|
log.debug(
|
|
|
"""
|
|
@@ -339,7 +338,6 @@ class Request(object):
|
|
|
# 使用request的user_agent、cookies、proxy
|
|
|
user_agent = headers.get("User-Agent") or headers.get("user-agent")
|
|
|
cookies = self.requests_kwargs.get("cookies")
|
|
|
- print(f'cookies >>> {cookies}')
|
|
|
if cookies and isinstance(cookies, RequestsCookieJar):
|
|
|
cookies = cookies.get_dict()
|
|
|
|
|
@@ -348,10 +346,15 @@ class Request(object):
|
|
|
if cookie_str:
|
|
|
cookies = tools.get_cookies_from_str(cookie_str)
|
|
|
|
|
|
- browser = self._webdriver_pool.get(user_agent=user_agent, proxy=False)
|
|
|
+ proxy = None
|
|
|
+ if proxies and proxies != -1:
|
|
|
+ proxy = proxies.get("http", "").strip("http://") or proxies.get(
|
|
|
+ "https", ""
|
|
|
+ ).strip("https://")
|
|
|
+
|
|
|
+ browser = self._webdriver_pool.get(user_agent=user_agent, proxy=proxy)
|
|
|
+
|
|
|
try:
|
|
|
- if proxies:
|
|
|
- self.chage_ip(browser)
|
|
|
browser.get(self.url)
|
|
|
if cookies:
|
|
|
browser.cookies = cookies
|
|
@@ -359,36 +362,49 @@ class Request(object):
|
|
|
tools.delay_time(self.render_time)
|
|
|
|
|
|
html = browser.page_source
|
|
|
- response = Response.from_dict(
|
|
|
- {
|
|
|
- "url": browser.current_url,
|
|
|
- "cookies": browser.cookies,
|
|
|
- "_content": html.encode(),
|
|
|
- "status_code": 200,
|
|
|
- "elapsed": 666,
|
|
|
- "headers": {
|
|
|
- "User-Agent": browser.execute_script(
|
|
|
- "return navigator.userAgent"
|
|
|
- ),
|
|
|
- "Cookie": tools.cookies2str(browser.cookies),
|
|
|
- },
|
|
|
- }
|
|
|
- )
|
|
|
+ response = Response.from_dict({
|
|
|
+ "url": browser.current_url,
|
|
|
+ "cookies": browser.cookies,
|
|
|
+ "_content": html.encode(),
|
|
|
+ "status_code": 200,
|
|
|
+ "elapsed": 666,
|
|
|
+ "headers": {
|
|
|
+ "User-Agent": browser.execute_script(
|
|
|
+ "return navigator.userAgent"
|
|
|
+ ),
|
|
|
+ "Cookie": tools.cookies2str(browser.cookies),
|
|
|
+ },
|
|
|
+ })
|
|
|
response.browser = browser
|
|
|
except Exception as e:
|
|
|
self._webdriver_pool.remove(browser)
|
|
|
raise e
|
|
|
+
|
|
|
elif use_session:
|
|
|
response = self._session.request(method, self.url, **self.requests_kwargs)
|
|
|
response = Response(response)
|
|
|
elif self.splash:
|
|
|
- resp = requests.get(setting.SWORDFISH_RENDER_URL, params={
|
|
|
+ headers = self.requests_kwargs.get('headers')
|
|
|
+ if not headers:
|
|
|
+ headers = {'User-Agent': self.user_agent()}
|
|
|
+ headers = [(key, val) for key, val in headers.items()]
|
|
|
+
|
|
|
+ proxy = None
|
|
|
+ if proxies and proxies != -1:
|
|
|
+ proxy = proxies.get("http", "").strip("http://") or proxies.get(
|
|
|
+ "https", ""
|
|
|
+ ).strip("https://")
|
|
|
+
|
|
|
+ params = {
|
|
|
'iframes': self.iframes,
|
|
|
'wait': self.render_time,
|
|
|
'html': 1,
|
|
|
- 'proxy': {} if self.proxies == False else self.get_proxy().get("http"),
|
|
|
- 'url': self.url
|
|
|
- })
|
|
|
+ 'proxy': proxy,
|
|
|
+ 'url': self.url,
|
|
|
+ }
|
|
|
+ data = {'headers': headers}
|
|
|
+ splash_url = setting.SWORDFISH_RENDER_URL
|
|
|
+ resp = requests.get(splash_url, params=params, json=data)
|
|
|
response = Response(resp)
|
|
|
|
|
|
# if self.iframes:
|
|
@@ -429,6 +445,7 @@ class Request(object):
|
|
|
|
|
|
if save_cached:
|
|
|
self.save_cached(response, expire_time=self.__class__.cached_expire_time)
|
|
|
+
|
|
|
return response
|
|
|
|
|
|
def proxies(self):
|
|
@@ -452,25 +469,8 @@ class Request(object):
|
|
|
def get_proxy(self):
|
|
|
headers = {"Authorization": setting.SWORDFISH_PROXY_AUTHOR}
|
|
|
proxy = requests.get(setting.SWORDFISH_PROXY_URL, headers=headers).json()
|
|
|
- print(f"切换代理:{proxy.get('data')}")
|
|
|
return proxy.get("data")
|
|
|
|
|
|
- def chage_ip(self, browser):
|
|
|
- ip = self.get_proxy().get("http") # ip格式"127.0.0.1:80"
|
|
|
- ip = ip.split("//")[-1]
|
|
|
- browser.get("about:config")
|
|
|
- tools.delay_time(0.5)
|
|
|
- browser.find_element_by_id("warningButton").click()
|
|
|
- # js代码
|
|
|
- setupScript = '''
|
|
|
- var prefs = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);
|
|
|
- prefs.setIntPref("network.proxy.type", 1);
|
|
|
- prefs.setCharPref("network.proxy.socks", "%s");
|
|
|
- prefs.setIntPref("network.proxy.socks_port", "%s");
|
|
|
- ''' % (ip.split(':')[0], ip.split(':')[1])
|
|
|
- # 执行js
|
|
|
- browser.execute_script(setupScript)
|
|
|
-
|
|
|
def user_agent(self):
|
|
|
headers = self.requests_kwargs.get("headers")
|
|
|
if headers:
|