|
@@ -6,32 +6,30 @@ Created on 2023-04-24
|
|
---------
|
|
---------
|
|
@author: jsl
|
|
@author: jsl
|
|
"""
|
|
"""
|
|
-import re
|
|
|
|
-import sys
|
|
|
|
import json
|
|
import json
|
|
|
|
+import re
|
|
|
|
+
|
|
import execjs
|
|
import execjs
|
|
import requests
|
|
import requests
|
|
-sys.path.append('/app/spiders/sword_feapder/FworkSpider')
|
|
|
|
-from untils.cookie_pool import PageCookiePool
|
|
|
|
|
|
|
|
|
|
+from untils.cookie_pool import PageCookiePool
|
|
|
|
|
|
|
|
|
|
class DTCookiePool(PageCookiePool):
|
|
class DTCookiePool(PageCookiePool):
|
|
- def __init__(self,redis_key,header,page_url=None,**kwargs):
|
|
|
|
- super(DTCookiePool, self).__init__(redis_key,page_url=None,
|
|
|
|
- min_cookies=10000,must_contained_keys=(),keep_alive=False,**kwargs)
|
|
|
|
- self.headers=header
|
|
|
|
|
|
+ def __init__(self, redis_key, header, page_url=None, **kwargs):
|
|
|
|
+ super(DTCookiePool, self).__init__(redis_key, page_url=None,
|
|
|
|
+ min_cookies=10000,
|
|
|
|
+ must_contained_keys=(),
|
|
|
|
+ keep_alive=False, **kwargs)
|
|
|
|
+ self.headers = header
|
|
self.page_url = page_url
|
|
self.page_url = page_url
|
|
self.proxies = kwargs.get('proxies') or False
|
|
self.proxies = kwargs.get('proxies') or False
|
|
|
|
|
|
def create_cookie(self):
|
|
def create_cookie(self):
|
|
-
|
|
|
|
- proxies = self.proxies
|
|
|
|
-
|
|
|
|
session = requests.Session()
|
|
session = requests.Session()
|
|
- session.proxies = proxies
|
|
|
|
|
|
+ session.proxies = self.proxies
|
|
start_url = self.page_url
|
|
start_url = self.page_url
|
|
- res = session.get(start_url, headers=self.headers,timeout=120, verify=False)
|
|
|
|
|
|
+ res = session.get(start_url, headers=self.headers, timeout=120, verify=False)
|
|
js_func = "".join(re.findall("document.cookie=(.*?)location.href", res.text))
|
|
js_func = "".join(re.findall("document.cookie=(.*?)location.href", res.text))
|
|
js_func = 'function sd() { return ' + js_func + "}"
|
|
js_func = 'function sd() { return ' + js_func + "}"
|
|
ctx = execjs.compile(js_func)
|
|
ctx = execjs.compile(js_func)
|