|
@@ -131,7 +131,7 @@ def crawl_spider(area: str, types: int, page: int, **kwargs):
|
|
"summaryType": 0
|
|
"summaryType": 0
|
|
}
|
|
}
|
|
data = json.dumps(data)
|
|
data = json.dumps(data)
|
|
- url = "http://search.vip.qianlima.com/rest/service/website/search/solr"
|
|
|
|
|
|
+ url = "https://search.vip.qianlima.com/rest/service/website/search/solr"
|
|
response = crawl_request(url, data)
|
|
response = crawl_request(url, data)
|
|
row_count = 0
|
|
row_count = 0
|
|
if response is not None and response.status_code == 200:
|
|
if response is not None and response.status_code == 200:
|
|
@@ -178,6 +178,8 @@ def crawl_spider(area: str, types: int, page: int, **kwargs):
|
|
logger.info(resp_json['msg'])
|
|
logger.info(resp_json['msg'])
|
|
elif response is not None and response.status_code in [401, 403, 404]:
|
|
elif response is not None and response.status_code in [401, 403, 404]:
|
|
request_status = 'disable'
|
|
request_status = 'disable'
|
|
|
|
+ elif response is not None and response.status_code == 405:
|
|
|
|
+ request_status = 'method_not_allowed'
|
|
|
|
|
|
if len(results) > 0:
|
|
if len(results) > 0:
|
|
qlm.insert_many(results)
|
|
qlm.insert_many(results)
|
|
@@ -219,6 +221,10 @@ def by_area_crawl_data(area="", types=0, **kwargs):
|
|
logger.warning(f"账号被禁止访问第{area}区-第{page}页数据")
|
|
logger.warning(f"账号被禁止访问第{area}区-第{page}页数据")
|
|
disable_page += 1
|
|
disable_page += 1
|
|
break
|
|
break
|
|
|
|
+ elif success == 'method_not_allowed':
|
|
|
|
+ logger.warning("服务器禁止使用当前 HTTP 方法的请求")
|
|
|
|
+ disable_page += 1
|
|
|
|
+ break
|
|
elif success == 'stop':
|
|
elif success == 'stop':
|
|
close_spider = True
|
|
close_spider = True
|
|
logger.info(f"第{area}区-第{page}页数据采集成功")
|
|
logger.info(f"第{area}区-第{page}页数据采集成功")
|