|
@@ -4,14 +4,46 @@ from PIL import Image, ImageChops
|
|
from bs4 import BeautifulSoup
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
import requests
|
|
class APILink():
|
|
class APILink():
|
|
- #打开链接
|
|
|
|
- def open_url(self,url):
|
|
|
|
|
|
+ #打开链接,返回title
|
|
|
|
+ def obtain_url_title(self,url):
|
|
response = requests.get(url)
|
|
response = requests.get(url)
|
|
response.encoding = 'utf-8' # 设置编码为gbk
|
|
response.encoding = 'utf-8' # 设置编码为gbk
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
title = soup.title.string
|
|
title = soup.title.string
|
|
return title
|
|
return title
|
|
|
|
|
|
|
|
+ #打开链接,status=200,返回true
|
|
|
|
+ def open_url_status(self,url):
|
|
|
|
+ response = requests.get(url)
|
|
|
|
+ status_code = response.status_code
|
|
|
|
+ if status_code==200:
|
|
|
|
+ return True
|
|
|
|
+ else:
|
|
|
|
+ return False
|
|
|
|
+
|
|
|
|
+ def fetch_page_source(self,url):
|
|
|
|
+ try:
|
|
|
|
+ response = requests.get(url)
|
|
|
|
+ response.raise_for_status() # 检查请求是否成功
|
|
|
|
+ return response.text # 返回网页源代码
|
|
|
|
+ except requests.RequestException as e:
|
|
|
|
+ print(f"Error fetching the URL: {e}")
|
|
|
|
+ return None
|
|
|
|
+
|
|
|
|
+ def check_value_in_source(self,url,value):
|
|
|
|
+ page_source = self.fetch_page_source(url)
|
|
|
|
+ if page_source:
|
|
|
|
+ if value in page_source:
|
|
|
|
+ # print(f"'{value_to_check}' exists in the page source.")
|
|
|
|
+ return True
|
|
|
|
+ else:
|
|
|
|
+ # print(f"'{value_to_check}' does not exist in the page source.")
|
|
|
|
+ return False
|
|
|
|
+ else:
|
|
|
|
+ # print("Failed to retrieve the page source.")
|
|
|
|
+ return False
|
|
|
|
+
|
|
|
|
+
|
|
def setup(self):
|
|
def setup(self):
|
|
# 初始化 Playwright
|
|
# 初始化 Playwright
|
|
self.playwright = sync_playwright().start()
|
|
self.playwright = sync_playwright().start()
|
|
@@ -23,6 +55,21 @@ class APILink():
|
|
self.browser.close()
|
|
self.browser.close()
|
|
self.playwright.stop()
|
|
self.playwright.stop()
|
|
|
|
|
|
|
|
+ def obtain_element_text(self,url,element):
|
|
|
|
+ self.page.goto(url)
|
|
|
|
+ # 等待元素出现
|
|
|
|
+ self.page.wait_for_selector(element)
|
|
|
|
+ # 查找元素
|
|
|
|
+ element_handle = self.page.locator(element)
|
|
|
|
+
|
|
|
|
+ # 获取元素的文本或属性值
|
|
|
|
+ element_text = element_handle.text_content()
|
|
|
|
+ # element_attribute = element_handle.get_attribute('属性名')
|
|
|
|
+ # 设置超时时间
|
|
|
|
+ self.page.wait_for_timeout(3000)
|
|
|
|
+ return element_text
|
|
|
|
+ # print(f'元素的属性值: {element_attribute}')
|
|
|
|
+
|
|
#网页截图模糊遮罩方法,适用于网页有动态元素,进行遮罩比较
|
|
#网页截图模糊遮罩方法,适用于网页有动态元素,进行遮罩比较
|
|
def save_screenshot_mask(self,url, output_path, elements, clip=None):
|
|
def save_screenshot_mask(self,url, output_path, elements, clip=None):
|
|
locs = []
|
|
locs = []
|