|
@@ -17,90 +17,95 @@ from untils.get_imgcode import get_code
|
|
|
|
|
|
|
|
|
def get_ck(proxies=False):
|
|
|
- session = requests.session()
|
|
|
- session.proxies = proxies
|
|
|
-
|
|
|
- headers = {
|
|
|
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.9",
|
|
|
- "Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Pragma": "no-cache",
|
|
|
- "Upgrade-Insecure-Requests": "1",
|
|
|
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
|
|
|
- }
|
|
|
- url = "http://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp"
|
|
|
- params = {
|
|
|
- "cid": "2010",
|
|
|
- "sid": "1"
|
|
|
- }
|
|
|
- res = session.get(url, headers=headers, params=params,timeout=30, verify=False)
|
|
|
-
|
|
|
- url1 = "http://www.ccgp-ningxia.gov.cn/TrafficStatistics.do"
|
|
|
- res1 = requests.get(url1, headers=headers,timeout=30, verify=False)
|
|
|
-
|
|
|
- yzm_url = "http://www.ccgp-ningxia.gov.cn/admin/AuthCode_too.do"
|
|
|
- res_yzm = session.get(yzm_url, headers=headers,timeout=30,verify=False)
|
|
|
-
|
|
|
- code = get_code(res_yzm.content).upper()
|
|
|
-
|
|
|
- headers = {
|
|
|
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.9",
|
|
|
- "Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Content-Type": "application/x-www-form-urlencoded",
|
|
|
- "Origin": "http://www.ccgp-ningxia.gov.cn",
|
|
|
- "Pragma": "no-cache",
|
|
|
- "Upgrade-Insecure-Requests": "1",
|
|
|
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
|
|
|
- }
|
|
|
-
|
|
|
- url = "http://www.ccgp-ningxia.gov.cn//site/InteractionQuestion_findVNoticeNew.do"
|
|
|
- data = {
|
|
|
- "type": "ALL",
|
|
|
- "page": "0",
|
|
|
- "tab": "SX",
|
|
|
- "authCode": f"{code}",
|
|
|
- "noticeTab": "CGYX",
|
|
|
- "keyword_all": "",
|
|
|
- "departmentName_all": "",
|
|
|
- "date1_all": "",
|
|
|
- "date2_all": "",
|
|
|
- "regionId_all": "",
|
|
|
- "keyword_each": "",
|
|
|
- "departmentName_each": "",
|
|
|
- "agentName_each": "",
|
|
|
- "projectNumber_each": "",
|
|
|
- "planNumber_each": "",
|
|
|
- "date1_each": "",
|
|
|
- "date2_each": "",
|
|
|
- "title_cgyx": "",
|
|
|
- "departmentName_cgyx": "",
|
|
|
- "date1_cgyx": "",
|
|
|
- "date2_cgyx": "",
|
|
|
- "projectName_cgyxxm": "",
|
|
|
- "departmentName_cgyxxm": "",
|
|
|
- "yjcgsj_cgyxxm": "",
|
|
|
- "date1_cgyxxm": "",
|
|
|
- "date2_cgyxxm": "",
|
|
|
- "purchaseItem_cgyxxm": "",
|
|
|
- "agreCode_htgs": "",
|
|
|
- "departmentName_htgs": "",
|
|
|
- "supplierName_htgs": "",
|
|
|
- "date1_htgs": "",
|
|
|
- "date2_htgs": "",
|
|
|
- "agreCode_ysjggg": "",
|
|
|
- "reportCode_ysjggg": "",
|
|
|
- "departmentName_ysjggg": "",
|
|
|
- "supplierName_ysjggg": "",
|
|
|
- "date1_ysjggg": "",
|
|
|
- "date2_ysjggg": ""
|
|
|
- }
|
|
|
- resp = session.post(url, headers=headers, params=params,timeout=30, data=data, verify=False)
|
|
|
-
|
|
|
- cookies = session.cookies.get_dict()
|
|
|
- return cookies
|
|
|
+ with requests.session() as session:
|
|
|
+ session.proxies = proxies
|
|
|
+
|
|
|
+ ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
|
|
|
+ headers = {
|
|
|
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9",
|
|
|
+ "Upgrade-Insecure-Requests": "1",
|
|
|
+ "User-Agent": ua
|
|
|
+ }
|
|
|
+ url = "https://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp?cid=2010&sid=1"
|
|
|
+ session.get(url, headers=headers, timeout=30, verify=False)
|
|
|
+
|
|
|
+ # 下载验证码
|
|
|
+ yzm_url = "https://www.ccgp-ningxia.gov.cn/admin/AuthCode_too.do"
|
|
|
+ yzm_res = session.get(yzm_url, headers=headers, timeout=30, verify=False)
|
|
|
+ yzm_code = get_code(yzm_res.content).upper()
|
|
|
+
|
|
|
+ headers = {
|
|
|
+ 'Accept': '*/*',
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
|
+ 'Origin': 'https://www.ccgp-ningxia.gov.cn',
|
|
|
+ 'Referer': url,
|
|
|
+ "User-Agent": ua,
|
|
|
+ 'X-Requested-With': 'XMLHttpRequest',
|
|
|
+ }
|
|
|
+ verify_url = "https://www.ccgp-ningxia.gov.cn/site/InteractionQuestion_findVNoticeNew.do"
|
|
|
+ data = {
|
|
|
+ 'type': 'ALL_CG',
|
|
|
+ 'type2': 'ALL_FA',
|
|
|
+ 'menuType': 'CG',
|
|
|
+ 'page': '0',
|
|
|
+ 'tab': 'SX',
|
|
|
+ 'authCode': f'{yzm_code}',
|
|
|
+ 'noticeTab': 'CGYX',
|
|
|
+ 'keyword_all': '',
|
|
|
+ 'departmentName_all': '',
|
|
|
+ 'date1_all': '',
|
|
|
+ 'date2_all': '',
|
|
|
+ 'regionId_all': '',
|
|
|
+ 'keyword_each': '',
|
|
|
+ 'departmentName_each': '',
|
|
|
+ 'agentName_each': '',
|
|
|
+ 'projectNumber_each': '',
|
|
|
+ 'planNumber_each': '',
|
|
|
+ 'date1_each': '',
|
|
|
+ 'date2_each': '',
|
|
|
+ 'keyword_fa': '',
|
|
|
+ 'departmentName_fa': '',
|
|
|
+ 'agentName_fa': '',
|
|
|
+ 'projectNumber_fa': '',
|
|
|
+ 'schemeNumber_fa': '',
|
|
|
+ 'date1_fa': '',
|
|
|
+ 'date2_fa': '',
|
|
|
+ 'title_cgyx': '',
|
|
|
+ 'departmentName_cgyx': '',
|
|
|
+ 'date1_cgyx': '',
|
|
|
+ 'date2_cgyx': '',
|
|
|
+ 'projectName_cgyxxm': '',
|
|
|
+ 'departmentName_cgyxxm': '',
|
|
|
+ 'yjcgsj_cgyxxm': '',
|
|
|
+ 'date1_cgyxxm': '',
|
|
|
+ 'date2_cgyxxm': '',
|
|
|
+ 'purchaseItem_cgyxxm': '',
|
|
|
+ 'planNumber_cgxq': '',
|
|
|
+ 'departmentName_cgxq': '',
|
|
|
+ 'agentName_cgxq': '',
|
|
|
+ 'date1_cgxq': '',
|
|
|
+ 'date2_cgxq': '',
|
|
|
+ 'schemeNumber_facgxq': '',
|
|
|
+ 'departmentName_facgxq': '',
|
|
|
+ 'agentName_facgxq': '',
|
|
|
+ 'date1_facgxq': '',
|
|
|
+ 'date2_facgxq': '',
|
|
|
+ 'agreCode_htgs': '',
|
|
|
+ 'departmentName_htgs': '',
|
|
|
+ 'supplierName_htgs': '',
|
|
|
+ 'date1_htgs': '',
|
|
|
+ 'date2_htgs': '',
|
|
|
+ 'agreCode_ysjggg': '',
|
|
|
+ 'reportCode_ysjggg': '',
|
|
|
+ 'departmentName_ysjggg': '',
|
|
|
+ 'supplierName_ysjggg': '',
|
|
|
+ 'date1_ysjggg': '',
|
|
|
+ 'date2_ysjggg': '',
|
|
|
+ }
|
|
|
+ session.post(verify_url, headers=headers, timeout=30, data=data, verify=False)
|
|
|
+ return session.cookies.get_dict()
|
|
|
|
|
|
|
|
|
class Spider(feapder.BiddingListSpider):
|
|
@@ -112,80 +117,98 @@ class Spider(feapder.BiddingListSpider):
|
|
|
]
|
|
|
self.site = "宁夏政府采购公共服务平台"
|
|
|
self.headers = {
|
|
|
- "Accept": "*/*",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.9",
|
|
|
- "Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
|
- "Origin": "http://www.ccgp-ningxia.gov.cn",
|
|
|
- "Pragma": "no-cache",
|
|
|
- "Referer": "http://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp?cid=2010&sid=1",
|
|
|
+ 'Accept': '*/*',
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
|
+ 'Origin': 'https://www.ccgp-ningxia.gov.cn',
|
|
|
+ 'Referer': 'https://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp?cid=2010&sid=1',
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
|
|
|
- "X-Requested-With": "XMLHttpRequest"
|
|
|
+ 'X-Requested-With': 'XMLHttpRequest',
|
|
|
}
|
|
|
+
|
|
|
self.cookies = get_ck()
|
|
|
|
|
|
def start_requests(self):
|
|
|
- url = "http://www.ccgp-ningxia.gov.cn//site/InteractionQuestion_findVNoticeNew.do"
|
|
|
+ url = "https://www.ccgp-ningxia.gov.cn/site/InteractionQuestion_findVNoticeNew.do"
|
|
|
for menu in self.menus:
|
|
|
yield feapder.Request(url, item=menu._asdict(), page=1, proxies=False)
|
|
|
|
|
|
def download_midware(self, request):
|
|
|
page = request.page
|
|
|
data = {
|
|
|
- "type": "ALL",
|
|
|
- "page": f"{page-1}",
|
|
|
- "tab": "QBJ",
|
|
|
- "authCode": "",
|
|
|
- "noticeTab": "CGYX",
|
|
|
- "keyword_all": "",
|
|
|
- "departmentName_all": "",
|
|
|
- "date1_all": "",
|
|
|
- "date2_all": "",
|
|
|
- "regionId_all": "640000",
|
|
|
- "keyword_each": "",
|
|
|
- "departmentName_each": "",
|
|
|
- "agentName_each": "",
|
|
|
- "projectNumber_each": "",
|
|
|
- "planNumber_each": "",
|
|
|
- "date1_each": "",
|
|
|
- "date2_each": "",
|
|
|
- "title_cgyx": "",
|
|
|
- "departmentName_cgyx": "",
|
|
|
- "date1_cgyx": "",
|
|
|
- "date2_cgyx": "",
|
|
|
- "projectName_cgyxxm": "",
|
|
|
- "departmentName_cgyxxm": "",
|
|
|
- "yjcgsj_cgyxxm": "",
|
|
|
- "date1_cgyxxm": "",
|
|
|
- "date2_cgyxxm": "",
|
|
|
- "purchaseItem_cgyxxm": "",
|
|
|
- "agreCode_htgs": "",
|
|
|
- "departmentName_htgs": "",
|
|
|
- "supplierName_htgs": "",
|
|
|
- "date1_htgs": "",
|
|
|
- "date2_htgs": "",
|
|
|
- "agreCode_ysjggg": "",
|
|
|
- "reportCode_ysjggg": "",
|
|
|
- "departmentName_ysjggg": "",
|
|
|
- "supplierName_ysjggg": "",
|
|
|
- "date1_ysjggg": "",
|
|
|
- "date2_ysjggg": ""
|
|
|
+ 'type': 'ALL_CG',
|
|
|
+ 'type2': 'ALL_FA',
|
|
|
+ 'menuType': 'CG',
|
|
|
+ 'page': f"{page-1}",
|
|
|
+ 'tab': 'QBJ',
|
|
|
+ 'authCode': '',
|
|
|
+ 'noticeTab': 'CGYX',
|
|
|
+ 'keyword_all': '',
|
|
|
+ 'departmentName_all': '',
|
|
|
+ 'date1_all': '',
|
|
|
+ 'date2_all': '',
|
|
|
+ 'regionId_all': '640000',
|
|
|
+ 'keyword_each': '',
|
|
|
+ 'departmentName_each': '',
|
|
|
+ 'agentName_each': '',
|
|
|
+ 'projectNumber_each': '',
|
|
|
+ 'planNumber_each': '',
|
|
|
+ 'date1_each': '',
|
|
|
+ 'date2_each': '',
|
|
|
+ 'keyword_fa': '',
|
|
|
+ 'departmentName_fa': '',
|
|
|
+ 'agentName_fa': '',
|
|
|
+ 'projectNumber_fa': '',
|
|
|
+ 'schemeNumber_fa': '',
|
|
|
+ 'date1_fa': '',
|
|
|
+ 'date2_fa': '',
|
|
|
+ 'title_cgyx': '',
|
|
|
+ 'departmentName_cgyx': '',
|
|
|
+ 'date1_cgyx': '',
|
|
|
+ 'date2_cgyx': '',
|
|
|
+ 'projectName_cgyxxm': '',
|
|
|
+ 'departmentName_cgyxxm': '',
|
|
|
+ 'yjcgsj_cgyxxm': '',
|
|
|
+ 'date1_cgyxxm': '',
|
|
|
+ 'date2_cgyxxm': '',
|
|
|
+ 'purchaseItem_cgyxxm': '',
|
|
|
+ 'planNumber_cgxq': '',
|
|
|
+ 'departmentName_cgxq': '',
|
|
|
+ 'agentName_cgxq': '',
|
|
|
+ 'date1_cgxq': '',
|
|
|
+ 'date2_cgxq': '',
|
|
|
+ 'schemeNumber_facgxq': '',
|
|
|
+ 'departmentName_facgxq': '',
|
|
|
+ 'agentName_facgxq': '',
|
|
|
+ 'date1_facgxq': '',
|
|
|
+ 'date2_facgxq': '',
|
|
|
+ 'agreCode_htgs': '',
|
|
|
+ 'departmentName_htgs': '',
|
|
|
+ 'supplierName_htgs': '',
|
|
|
+ 'date1_htgs': '',
|
|
|
+ 'date2_htgs': '',
|
|
|
+ 'agreCode_ysjggg': '',
|
|
|
+ 'reportCode_ysjggg': '',
|
|
|
+ 'departmentName_ysjggg': '',
|
|
|
+ 'supplierName_ysjggg': '',
|
|
|
+ 'date1_ysjggg': '',
|
|
|
+ 'date2_ysjggg': '',
|
|
|
}
|
|
|
request.data = data
|
|
|
request.headers = self.headers
|
|
|
- request.cookies = get_ck()
|
|
|
+ request.cookies = self.cookies
|
|
|
+
|
|
|
+ def text2json(self, text):
|
|
|
+ data_str = "[{" + "".join(re.findall('\[\{(.*?)}]', text, re.S)).strip() + "}]"
|
|
|
+ return json.loads(data_str.replace('\\', ''), strict=False)
|
|
|
|
|
|
def validate(self, request, response):
|
|
|
- data = response.content.decode()
|
|
|
- data_str = "[{" + "".join(re.findall('\[\{(.*?)}]', data, re.S)).strip() + "}]"
|
|
|
- info_list = json.loads(data_str.replace('\\', ''), strict=False)
|
|
|
+ info_list = self.text2json(text=response.content.decode())
|
|
|
assert len(info_list) > 0
|
|
|
|
|
|
def parse(self, request, response):
|
|
|
menu = request.item
|
|
|
- data_str = "[{" + "".join(re.findall('\[\{(.*?)}]', response.text, re.S)).strip() + "}]"
|
|
|
- info_list = json.loads(data_str.replace('\\', ''), strict=False)
|
|
|
+ info_list = self.text2json(text=response.content.decode())
|
|
|
for info in info_list:
|
|
|
href = info.get('url')
|
|
|
if 'http' not in href:
|