search_engine.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. from abc import abstractmethod, ABCMeta
  2. from crawler.analysis import parse_urls
  3. from crawler.download import Downloader
  4. class JySearchEngine(Downloader, metaclass=ABCMeta):
  5. def __init__(self):
  6. pass
  7. @abstractmethod
  8. def search(self, keyword: str):
  9. raise NotImplementedError
  10. class BaiDuSearchEngine(JySearchEngine):
  11. def search(self, keyword: str):
  12. pass
  13. class BingSearchEngine(JySearchEngine):
  14. def search(self, keyword: str):
  15. urls = []
  16. headers = {
  17. "authority": "cn.bing.com",
  18. "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
  19. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
  20. "cache-control": "no-cache",
  21. "pragma": "no-cache",
  22. "referer": "https://cn.bing.com/?scope=web",
  23. "upgrade-insecure-requests": "1",
  24. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
  25. }
  26. url = "https://cn.bing.com/search"
  27. params = {
  28. "q": keyword,
  29. }
  30. response = self.get(url, headers=headers, params=params)
  31. response.encoding = response.apparent_encoding
  32. if response.status_code == 200:
  33. urls = parse_urls(response.text, 'https://cn.bing.com/')
  34. return urls