import requests from playwright.sync_api import sync_playwright from feapder.network.response import Response class PlayWright(object): def __init__(self,headless=False,timeout=10,slow_mo=0.1,proxy=None): # super().__init__(self) self._driver =None self.headless = headless self.timeout = timeout self.slow_mo = slow_mo self.proxy = proxy # self.playwright = sync_playwright().start() def test(self,url,redict=False): with sync_playwright() as playwright: args = ['--disable-infobars'] if self.proxy: args.append('--proxy-server=' + self.get_proxy()) self.driver = playwright.chromium.launch(headless=self.headless,slow_mo=self.slow_mo*1000, timeout=self.timeout*1000,args=args) self.context = self.driver.new_context() self.page = self.context.new_page() self.page.add_init_script('Object.defineProperties(navigator, {webdriver:{get:()=>undefined}})') html = self.page.goto(url) # frame = page.frame('mini-iframe-6') # frame.content() if redict: with self.page.expect_event("requestfinished") as request_info: html = self.page.goto(url) frames = {} for frame in self.page.frames: frames[frame.name] = frame.content() response = Response.from_dict({"url": html.url, "cookies": html.all_headers(), "_content": html.body(), "status_code": 200, "elapsed": 666, "headers": html.all_headers()}) self.page.close() self.context.close() self.driver.close() return response,frames def get_proxy(self): headers = { "Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB" } proxy = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json() return proxy.get("data").get("http") # def __del__(self): # self.page.close() # self.context.close() # self.driver.close() if __name__ == '__main__': url = "http://connect.cebpubservice.com/PSPFrame/infobasemis/socialpublic/publicyewu/Frame_yewuDetail?rowguid=eb210896-fbe8-47f3-ae1c-61c888bec27b" # url = "http://www.chaohu.gov.cn/public/column/13731?type=4&action=list&nav=&sub=&catId=7004611" driver = PlayWright(slow_mo=4,timeout=10) response,page = driver.test(url) print(response) page.goto('https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html')