123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- from abc import abstractmethod, ABCMeta
- from crawler.analysis import parse_urls
- from crawler.download import Downloader
- class JySearchEngine(Downloader, metaclass=ABCMeta):
- def __init__(self):
- pass
- @abstractmethod
- def search(self, keyword: str):
- raise NotImplementedError
- class BaiDuSearchEngine(JySearchEngine):
- def search(self, keyword: str):
- pass
- class BingSearchEngine(JySearchEngine):
- def search(self, keyword: str):
- urls = []
- headers = {
- "authority": "cn.bing.com",
- "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
- "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
- "cache-control": "no-cache",
- "pragma": "no-cache",
- "referer": "https://cn.bing.com/?scope=web",
- "upgrade-insecure-requests": "1",
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
- }
- url = "https://cn.bing.com/search"
- params = {
- "q": keyword,
- }
- response = self.get(url, headers=headers, params=params)
- response.encoding = response.apparent_encoding
- if response.status_code == 200:
- urls = parse_urls(response.text, 'https://cn.bing.com/')
- return urls
|