1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- import requests
- from playwright.sync_api import sync_playwright
- from feapder.network.response import Response
- class PlayWright(object):
- def __init__(self,headless=False,timeout=10,slow_mo=0.1,proxy=None):
- # super().__init__(self)
- self._driver =None
- self.headless = headless
- self.timeout = timeout
- self.slow_mo = slow_mo
- self.proxy = proxy
- # self.playwright = sync_playwright().start()
- def test(self,url,redict=False):
- with sync_playwright() as playwright:
- args = ['--disable-infobars']
- if self.proxy:
- args.append('--proxy-server=' + self.get_proxy())
- self.driver = playwright.chromium.launch(headless=self.headless,slow_mo=self.slow_mo*1000,
- timeout=self.timeout*1000,args=args)
- self.context = self.driver.new_context()
- self.page = self.context.new_page()
- self.page.add_init_script('Object.defineProperties(navigator, {webdriver:{get:()=>undefined}})')
- html = self.page.goto(url)
- # frame = page.frame('mini-iframe-6')
- # frame.content()
- if redict:
- with self.page.expect_event("requestfinished") as request_info:
- html = self.page.goto(url)
- frames = {}
- for frame in self.page.frames:
- frames[frame.name] = frame.content()
- response = Response.from_dict({"url": html.url,
- "cookies": html.all_headers(),
- "_content": html.body(),
- "status_code": 200,
- "elapsed": 666,
- "headers": html.all_headers()})
- self.page.close()
- self.context.close()
- self.driver.close()
- return response,frames
- def get_proxy(self):
- headers = {
- "Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
- }
- proxy = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json()
- return proxy.get("data").get("http")
- # def __del__(self):
- # self.page.close()
- # self.context.close()
- # self.driver.close()
- if __name__ == '__main__':
- url = "http://connect.cebpubservice.com/PSPFrame/infobasemis/socialpublic/publicyewu/Frame_yewuDetail?rowguid=eb210896-fbe8-47f3-ae1c-61c888bec27b"
- # url = "http://www.chaohu.gov.cn/public/column/13731?type=4&action=list&nav=&sub=&catId=7004611"
- driver = PlayWright(slow_mo=4,timeout=10)
- response,page = driver.test(url)
- print(response)
- page.goto('https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html')
|