|
@@ -72,9 +72,9 @@ class Downloader:
|
|
|
if 'verify' in request_params:
|
|
|
del request_params['verify']
|
|
|
url = url.replace('https', 'http')
|
|
|
- retries += 1
|
|
|
except requests.RequestException as e:
|
|
|
response.reason = e.__class__.__name__
|
|
|
+ finally:
|
|
|
retries += 1
|
|
|
if not disable_debug_log:
|
|
|
t_name = threading.currentThread().getName()
|
|
@@ -102,3 +102,58 @@ class Downloader:
|
|
|
session.mount('http://', adapter)
|
|
|
session.mount('https://', adapter)
|
|
|
return session
|
|
|
+
|
|
|
+
|
|
|
+class RenderDownloader(Downloader):
|
|
|
+
|
|
|
+ # def get(self, url, **kw):
|
|
|
+ # splash_url = 'http://8.131.72.226:8998/render.html'
|
|
|
+ # args = {
|
|
|
+ # 'url': url,
|
|
|
+ # 'timeout': 60,
|
|
|
+ # 'wait': 0.5,
|
|
|
+ # }
|
|
|
+ # resp = requests.get(splash_url, params=args, headers=headers)
|
|
|
+ # return resp
|
|
|
+
|
|
|
+ def get(self, url, **kw):
|
|
|
+ splash_url = 'http://8.131.72.226:8998/render.json'
|
|
|
+ args = {
|
|
|
+ 'url': url,
|
|
|
+ 'html': 1,
|
|
|
+ 'iframes': 1,
|
|
|
+ }
|
|
|
+ # splash_url = 'http://8.131.72.226:8998/render.html'
|
|
|
+ # args = {
|
|
|
+ # 'url': url,
|
|
|
+ # 'timeout': 60,
|
|
|
+ # 'wait': 0.5,
|
|
|
+ # }
|
|
|
+ resp = requests.get(splash_url, params=args, headers=headers)
|
|
|
+ return resp
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ render = RenderDownloader()
|
|
|
+ href = 'http://113.230.236.116:5002/mvvm/src/ebid/gcjs/combine/jypt.html?type=%e6%8b%9b%e6%a0%87%e5%85%ac%e5%91%8a&tpid=62c2943104c74c0e34cacef9&tpTitle=%e5%bb%ba%e5%b9%b3%e5%8e%bf%e7%ac%ac%e5%9b%9b%e5%b0%8f%e5%ad%a6%e8%bf%90%e5%8a%a8%e5%9c%ba%e5%8d%87%e7%ba%a7%e6%94%b9%e9%80%a0%e9%a1%b9%e7%9b%ae'
|
|
|
+ resp = render.get(href)
|
|
|
+ resp_json = resp.json()
|
|
|
+
|
|
|
+ for k, val in resp_json.items():
|
|
|
+ print(f">> {k}", val)
|
|
|
+
|
|
|
+ childFrames = resp_json['childFrames']
|
|
|
+ index = 0
|
|
|
+ for child in childFrames:
|
|
|
+ print(child)
|
|
|
+ title = child['title']
|
|
|
+ if len(title) == 0:
|
|
|
+ title = index
|
|
|
+ index += 1
|
|
|
+
|
|
|
+ with open(f'{title}.html', 'w') as fp:
|
|
|
+ fp.write(child['html'])
|
|
|
+
|
|
|
+ html = resp_json['html']
|
|
|
+ with open('p1.html', 'w') as fp:
|
|
|
+ fp.write(html)
|