|
@@ -0,0 +1,56 @@
|
|
|
+from abc import abstractmethod, ABCMeta
|
|
|
+
|
|
|
+from common.analysis import parse_urls
|
|
|
+from crawler.constants import RBF, MGO_VISIT
|
|
|
+from crawler.downloader import Downloader
|
|
|
+
|
|
|
+
|
|
|
+class JySearchEngine(Downloader, metaclass=ABCMeta):
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.filter_instance = RBF
|
|
|
+ self.mgo_instance = MGO_VISIT
|
|
|
+
|
|
|
+ def is_exists(self, val):
|
|
|
+ return self.filter_instance.is_exists(val)
|
|
|
+
|
|
|
+ @abstractmethod
|
|
|
+ def search(self, keyword: str):
|
|
|
+ raise NotImplementedError
|
|
|
+
|
|
|
+
|
|
|
+class BaiDuSearchEngine(JySearchEngine):
|
|
|
+
|
|
|
+ def search(self, keyword: str):
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
+class BingSearchEngine(JySearchEngine):
|
|
|
+
|
|
|
+ def search(self, keyword: str):
|
|
|
+ urls = []
|
|
|
+ headers = {
|
|
|
+ "authority": "cn.bing.com",
|
|
|
+ "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
|
|
+ "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "cache-control": "no-cache",
|
|
|
+ "pragma": "no-cache",
|
|
|
+ "referer": "https://cn.bing.com/?scope=web",
|
|
|
+ "upgrade-insecure-requests": "1",
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
|
|
|
+ }
|
|
|
+ url = "https://cn.bing.com/search"
|
|
|
+ params = {
|
|
|
+ "q": keyword,
|
|
|
+ }
|
|
|
+ response = self.get(url, headers=headers, params=params)
|
|
|
+ response.encoding = response.apparent_encoding
|
|
|
+ if response.status_code == 200:
|
|
|
+ urls = parse_urls(response.text, 'https://cn.bing.com/')
|
|
|
+ return urls
|
|
|
+
|
|
|
+
|
|
|
+# if __name__ == '__main__':
|
|
|
+# b = BingSearchEngine()
|
|
|
+# for i in b.search('招标'):
|
|
|
+# print(i)
|