from abc import abstractmethod, ABCMeta from common.analysis import parse_urls from crawler.constants import RBF, MGO_VISIT from crawler.downloader import Downloader class JySearchEngine(Downloader, metaclass=ABCMeta): def __init__(self): self.filter_instance = RBF self.mgo_instance = MGO_VISIT def is_exists(self, val): return self.filter_instance.is_exists(val) @abstractmethod def search(self, keyword: str): raise NotImplementedError class BaiDuSearchEngine(JySearchEngine): def search(self, keyword: str): pass class BingSearchEngine(JySearchEngine): def search(self, keyword: str): urls = [] headers = { "authority": "cn.bing.com", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", "cache-control": "no-cache", "pragma": "no-cache", "referer": "https://cn.bing.com/?scope=web", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36" } url = "https://cn.bing.com/search" params = { "q": keyword, } response = self.get(url, headers=headers, params=params) response.encoding = response.apparent_encoding if response.status_code == 200: urls = parse_urls(response.text, 'https://cn.bing.com/') return urls # if __name__ == '__main__': # b = BingSearchEngine() # for i in b.search('招标'): # print(i)