|
@@ -1,62 +0,0 @@
|
|
|
-import requests
|
|
|
-from playwright.sync_api import sync_playwright
|
|
|
-from feapder.network.response import Response
|
|
|
-class PlayWright(object):
|
|
|
- def __init__(self,headless=False,timeout=10,slow_mo=0.1,proxy=None):
|
|
|
- # super().__init__(self)
|
|
|
- self._driver =None
|
|
|
- self.headless = headless
|
|
|
- self.timeout = timeout
|
|
|
- self.slow_mo = slow_mo
|
|
|
- self.proxy = proxy
|
|
|
- # self.playwright = sync_playwright().start()
|
|
|
-
|
|
|
- def test(self,url,redict=False):
|
|
|
- with sync_playwright() as playwright:
|
|
|
- args = ['--disable-infobars']
|
|
|
- if self.proxy:
|
|
|
- args.append('--proxy-server=' + self.get_proxy())
|
|
|
- self.driver = playwright.chromium.launch(headless=self.headless,slow_mo=self.slow_mo*1000,
|
|
|
- timeout=self.timeout*1000,args=args)
|
|
|
- self.context = self.driver.new_context()
|
|
|
- self.page = self.context.new_page()
|
|
|
- self.page.add_init_script('Object.defineProperties(navigator, {webdriver:{get:()=>undefined}})')
|
|
|
- html = self.page.goto(url)
|
|
|
- # frame = page.frame('mini-iframe-6')
|
|
|
- # frame.content()
|
|
|
- if redict:
|
|
|
- with self.page.expect_event("requestfinished") as request_info:
|
|
|
- html = self.page.goto(url)
|
|
|
- frames = {}
|
|
|
- for frame in self.page.frames:
|
|
|
- frames[frame.name] = frame.content()
|
|
|
- response = Response.from_dict({"url": html.url,
|
|
|
- "cookies": html.all_headers(),
|
|
|
- "_content": html.body(),
|
|
|
- "status_code": 200,
|
|
|
- "elapsed": 666,
|
|
|
- "headers": html.all_headers()})
|
|
|
- self.page.close()
|
|
|
- self.context.close()
|
|
|
- self.driver.close()
|
|
|
-
|
|
|
- return response,frames
|
|
|
-
|
|
|
- def get_proxy(self):
|
|
|
- headers = {
|
|
|
- "Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
|
|
|
- }
|
|
|
- proxy = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json()
|
|
|
- return proxy.get("data").get("http")
|
|
|
- # def __del__(self):
|
|
|
- # self.page.close()
|
|
|
- # self.context.close()
|
|
|
- # self.driver.close()
|
|
|
-
|
|
|
-if __name__ == '__main__':
|
|
|
- url = "http://connect.cebpubservice.com/PSPFrame/infobasemis/socialpublic/publicyewu/Frame_yewuDetail?rowguid=eb210896-fbe8-47f3-ae1c-61c888bec27b"
|
|
|
- # url = "http://www.chaohu.gov.cn/public/column/13731?type=4&action=list&nav=&sub=&catId=7004611"
|
|
|
- driver = PlayWright(slow_mo=4,timeout=10)
|
|
|
- response,page = driver.test(url)
|
|
|
- print(response)
|
|
|
- page.goto('https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html')
|