webplaywright.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import requests
  2. from playwright.sync_api import sync_playwright
  3. from feapder.network.response import Response
  4. class PlayWright(object):
  5. def __init__(self,headless=False,timeout=10,slow_mo=0.1,proxy=None):
  6. # super().__init__(self)
  7. self._driver =None
  8. self.headless = headless
  9. self.timeout = timeout
  10. self.slow_mo = slow_mo
  11. self.proxy = proxy
  12. # self.playwright = sync_playwright().start()
  13. def test(self,url,redict=False):
  14. with sync_playwright() as playwright:
  15. args = ['--disable-infobars']
  16. if self.proxy:
  17. args.append('--proxy-server=' + self.get_proxy())
  18. self.driver = playwright.chromium.launch(headless=self.headless,slow_mo=self.slow_mo*1000,
  19. timeout=self.timeout*1000,args=args)
  20. self.context = self.driver.new_context()
  21. self.page = self.context.new_page()
  22. self.page.add_init_script('Object.defineProperties(navigator, {webdriver:{get:()=>undefined}})')
  23. html = self.page.goto(url)
  24. # frame = page.frame('mini-iframe-6')
  25. # frame.content()
  26. if redict:
  27. with self.page.expect_event("requestfinished") as request_info:
  28. html = self.page.goto(url)
  29. frames = {}
  30. for frame in self.page.frames:
  31. frames[frame.name] = frame.content()
  32. response = Response.from_dict({"url": html.url,
  33. "cookies": html.all_headers(),
  34. "_content": html.body(),
  35. "status_code": 200,
  36. "elapsed": 666,
  37. "headers": html.all_headers()})
  38. self.page.close()
  39. self.context.close()
  40. self.driver.close()
  41. return response,frames
  42. def get_proxy(self):
  43. headers = {
  44. "Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
  45. }
  46. proxy = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json()
  47. return proxy.get("data").get("http")
  48. # def __del__(self):
  49. # self.page.close()
  50. # self.context.close()
  51. # self.driver.close()
  52. if __name__ == '__main__':
  53. url = "http://connect.cebpubservice.com/PSPFrame/infobasemis/socialpublic/publicyewu/Frame_yewuDetail?rowguid=eb210896-fbe8-47f3-ae1c-61c888bec27b"
  54. # url = "http://www.chaohu.gov.cn/public/column/13731?type=4&action=list&nav=&sub=&catId=7004611"
  55. driver = PlayWright(slow_mo=4,timeout=10)
  56. response,page = driver.test(url)
  57. print(response)
  58. page.goto('https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html')