Browse Source

脚本修复

dzr 3 tuần trước cách đây
mục cha
commit
7bcf98e6fc
1 tập tin đã thay đổi với 161 bổ sung138 xóa
  1. 161 138
      nx_nxzfcgggfwpt_cggg/区本级采购公告-列表页.py

+ 161 - 138
nx_nxzfcgggfwpt_cggg/区本级采购公告-列表页.py

@@ -17,90 +17,95 @@ from untils.get_imgcode import get_code
 
 
 def get_ck(proxies=False):
-    session = requests.session()
-    session.proxies = proxies
-
-    headers = {
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-        "Accept-Language": "zh-CN,zh;q=0.9",
-        "Cache-Control": "no-cache",
-        "Connection": "keep-alive",
-        "Pragma": "no-cache",
-        "Upgrade-Insecure-Requests": "1",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
-    }
-    url = "http://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp"
-    params = {
-        "cid": "2010",
-        "sid": "1"
-    }
-    res = session.get(url, headers=headers, params=params,timeout=30, verify=False)
-
-    url1 = "http://www.ccgp-ningxia.gov.cn/TrafficStatistics.do"
-    res1 = requests.get(url1, headers=headers,timeout=30, verify=False)
-
-    yzm_url = "http://www.ccgp-ningxia.gov.cn/admin/AuthCode_too.do"
-    res_yzm = session.get(yzm_url, headers=headers,timeout=30,verify=False)
-
-    code = get_code(res_yzm.content).upper()
-
-    headers = {
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-        "Accept-Language": "zh-CN,zh;q=0.9",
-        "Cache-Control": "no-cache",
-        "Connection": "keep-alive",
-        "Content-Type": "application/x-www-form-urlencoded",
-        "Origin": "http://www.ccgp-ningxia.gov.cn",
-        "Pragma": "no-cache",
-        "Upgrade-Insecure-Requests": "1",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
-    }
-
-    url = "http://www.ccgp-ningxia.gov.cn//site/InteractionQuestion_findVNoticeNew.do"
-    data = {
-        "type": "ALL",
-        "page": "0",
-        "tab": "SX",
-        "authCode": f"{code}",
-        "noticeTab": "CGYX",
-        "keyword_all": "",
-        "departmentName_all": "",
-        "date1_all": "",
-        "date2_all": "",
-        "regionId_all": "",
-        "keyword_each": "",
-        "departmentName_each": "",
-        "agentName_each": "",
-        "projectNumber_each": "",
-        "planNumber_each": "",
-        "date1_each": "",
-        "date2_each": "",
-        "title_cgyx": "",
-        "departmentName_cgyx": "",
-        "date1_cgyx": "",
-        "date2_cgyx": "",
-        "projectName_cgyxxm": "",
-        "departmentName_cgyxxm": "",
-        "yjcgsj_cgyxxm": "",
-        "date1_cgyxxm": "",
-        "date2_cgyxxm": "",
-        "purchaseItem_cgyxxm": "",
-        "agreCode_htgs": "",
-        "departmentName_htgs": "",
-        "supplierName_htgs": "",
-        "date1_htgs": "",
-        "date2_htgs": "",
-        "agreCode_ysjggg": "",
-        "reportCode_ysjggg": "",
-        "departmentName_ysjggg": "",
-        "supplierName_ysjggg": "",
-        "date1_ysjggg": "",
-        "date2_ysjggg": ""
-    }
-    resp = session.post(url, headers=headers, params=params,timeout=30, data=data, verify=False)
-
-    cookies = session.cookies.get_dict()
-    return cookies
+    with requests.session() as session:
+        session.proxies = proxies
+
+        ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
+        headers = {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Upgrade-Insecure-Requests": "1",
+            "User-Agent": ua
+        }
+        url = "https://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp?cid=2010&sid=1"
+        session.get(url, headers=headers, timeout=30, verify=False)
+
+        # 下载验证码
+        yzm_url = "https://www.ccgp-ningxia.gov.cn/admin/AuthCode_too.do"
+        yzm_res = session.get(yzm_url, headers=headers, timeout=30, verify=False)
+        yzm_code = get_code(yzm_res.content).upper()
+
+        headers = {
+            'Accept': '*/*',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            'Origin': 'https://www.ccgp-ningxia.gov.cn',
+            'Referer': url,
+            "User-Agent": ua,
+            'X-Requested-With': 'XMLHttpRequest',
+        }
+        verify_url = "https://www.ccgp-ningxia.gov.cn/site/InteractionQuestion_findVNoticeNew.do"
+        data = {
+            'type': 'ALL_CG',
+            'type2': 'ALL_FA',
+            'menuType': 'CG',
+            'page': '0',
+            'tab': 'SX',
+            'authCode': f'{yzm_code}',
+            'noticeTab': 'CGYX',
+            'keyword_all': '',
+            'departmentName_all': '',
+            'date1_all': '',
+            'date2_all': '',
+            'regionId_all': '',
+            'keyword_each': '',
+            'departmentName_each': '',
+            'agentName_each': '',
+            'projectNumber_each': '',
+            'planNumber_each': '',
+            'date1_each': '',
+            'date2_each': '',
+            'keyword_fa': '',
+            'departmentName_fa': '',
+            'agentName_fa': '',
+            'projectNumber_fa': '',
+            'schemeNumber_fa': '',
+            'date1_fa': '',
+            'date2_fa': '',
+            'title_cgyx': '',
+            'departmentName_cgyx': '',
+            'date1_cgyx': '',
+            'date2_cgyx': '',
+            'projectName_cgyxxm': '',
+            'departmentName_cgyxxm': '',
+            'yjcgsj_cgyxxm': '',
+            'date1_cgyxxm': '',
+            'date2_cgyxxm': '',
+            'purchaseItem_cgyxxm': '',
+            'planNumber_cgxq': '',
+            'departmentName_cgxq': '',
+            'agentName_cgxq': '',
+            'date1_cgxq': '',
+            'date2_cgxq': '',
+            'schemeNumber_facgxq': '',
+            'departmentName_facgxq': '',
+            'agentName_facgxq': '',
+            'date1_facgxq': '',
+            'date2_facgxq': '',
+            'agreCode_htgs': '',
+            'departmentName_htgs': '',
+            'supplierName_htgs': '',
+            'date1_htgs': '',
+            'date2_htgs': '',
+            'agreCode_ysjggg': '',
+            'reportCode_ysjggg': '',
+            'departmentName_ysjggg': '',
+            'supplierName_ysjggg': '',
+            'date1_ysjggg': '',
+            'date2_ysjggg': '',
+        }
+        session.post(verify_url, headers=headers, timeout=30, data=data, verify=False)
+        return session.cookies.get_dict()
 
 
 class Spider(feapder.BiddingListSpider):
@@ -112,80 +117,98 @@ class Spider(feapder.BiddingListSpider):
         ]
         self.site = "宁夏政府采购公共服务平台"
         self.headers = {
-            "Accept": "*/*",
-            "Accept-Language": "zh-CN,zh;q=0.9",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
-            "Origin": "http://www.ccgp-ningxia.gov.cn",
-            "Pragma": "no-cache",
-            "Referer": "http://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp?cid=2010&sid=1",
+            'Accept': '*/*',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            'Origin': 'https://www.ccgp-ningxia.gov.cn',
+            'Referer': 'https://www.ccgp-ningxia.gov.cn/public/NXGPPNEW/dynamic/contents/SXCGGG/index.jsp?cid=2010&sid=1',
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
-            "X-Requested-With": "XMLHttpRequest"
+            'X-Requested-With': 'XMLHttpRequest',
         }
+
         self.cookies = get_ck()
 
     def start_requests(self):
-        url = "http://www.ccgp-ningxia.gov.cn//site/InteractionQuestion_findVNoticeNew.do"
+        url = "https://www.ccgp-ningxia.gov.cn/site/InteractionQuestion_findVNoticeNew.do"
         for menu in self.menus:
             yield feapder.Request(url, item=menu._asdict(), page=1, proxies=False)
 
     def download_midware(self, request):
         page = request.page
         data = {
-            "type": "ALL",
-            "page": f"{page-1}",
-            "tab": "QBJ",
-            "authCode": "",
-            "noticeTab": "CGYX",
-            "keyword_all": "",
-            "departmentName_all": "",
-            "date1_all": "",
-            "date2_all": "",
-            "regionId_all": "640000",
-            "keyword_each": "",
-            "departmentName_each": "",
-            "agentName_each": "",
-            "projectNumber_each": "",
-            "planNumber_each": "",
-            "date1_each": "",
-            "date2_each": "",
-            "title_cgyx": "",
-            "departmentName_cgyx": "",
-            "date1_cgyx": "",
-            "date2_cgyx": "",
-            "projectName_cgyxxm": "",
-            "departmentName_cgyxxm": "",
-            "yjcgsj_cgyxxm": "",
-            "date1_cgyxxm": "",
-            "date2_cgyxxm": "",
-            "purchaseItem_cgyxxm": "",
-            "agreCode_htgs": "",
-            "departmentName_htgs": "",
-            "supplierName_htgs": "",
-            "date1_htgs": "",
-            "date2_htgs": "",
-            "agreCode_ysjggg": "",
-            "reportCode_ysjggg": "",
-            "departmentName_ysjggg": "",
-            "supplierName_ysjggg": "",
-            "date1_ysjggg": "",
-            "date2_ysjggg": ""
+            'type': 'ALL_CG',
+            'type2': 'ALL_FA',
+            'menuType': 'CG',
+            'page': f"{page-1}",
+            'tab': 'QBJ',
+            'authCode': '',
+            'noticeTab': 'CGYX',
+            'keyword_all': '',
+            'departmentName_all': '',
+            'date1_all': '',
+            'date2_all': '',
+            'regionId_all': '640000',
+            'keyword_each': '',
+            'departmentName_each': '',
+            'agentName_each': '',
+            'projectNumber_each': '',
+            'planNumber_each': '',
+            'date1_each': '',
+            'date2_each': '',
+            'keyword_fa': '',
+            'departmentName_fa': '',
+            'agentName_fa': '',
+            'projectNumber_fa': '',
+            'schemeNumber_fa': '',
+            'date1_fa': '',
+            'date2_fa': '',
+            'title_cgyx': '',
+            'departmentName_cgyx': '',
+            'date1_cgyx': '',
+            'date2_cgyx': '',
+            'projectName_cgyxxm': '',
+            'departmentName_cgyxxm': '',
+            'yjcgsj_cgyxxm': '',
+            'date1_cgyxxm': '',
+            'date2_cgyxxm': '',
+            'purchaseItem_cgyxxm': '',
+            'planNumber_cgxq': '',
+            'departmentName_cgxq': '',
+            'agentName_cgxq': '',
+            'date1_cgxq': '',
+            'date2_cgxq': '',
+            'schemeNumber_facgxq': '',
+            'departmentName_facgxq': '',
+            'agentName_facgxq': '',
+            'date1_facgxq': '',
+            'date2_facgxq': '',
+            'agreCode_htgs': '',
+            'departmentName_htgs': '',
+            'supplierName_htgs': '',
+            'date1_htgs': '',
+            'date2_htgs': '',
+            'agreCode_ysjggg': '',
+            'reportCode_ysjggg': '',
+            'departmentName_ysjggg': '',
+            'supplierName_ysjggg': '',
+            'date1_ysjggg': '',
+            'date2_ysjggg': '',
         }
         request.data = data
         request.headers = self.headers
-        request.cookies = get_ck()
+        request.cookies = self.cookies
+
+    def text2json(self, text):
+        data_str = "[{" + "".join(re.findall('\[\{(.*?)}]', text, re.S)).strip() + "}]"
+        return json.loads(data_str.replace('\\', ''), strict=False)
 
     def validate(self, request, response):
-        data = response.content.decode()
-        data_str = "[{" + "".join(re.findall('\[\{(.*?)}]', data, re.S)).strip() + "}]"
-        info_list = json.loads(data_str.replace('\\', ''), strict=False)
+        info_list = self.text2json(text=response.content.decode())
         assert len(info_list) > 0
 
     def parse(self, request, response):
         menu = request.item
-        data_str = "[{" + "".join(re.findall('\[\{(.*?)}]', response.text, re.S)).strip() + "}]"
-        info_list = json.loads(data_str.replace('\\', ''), strict=False)
+        info_list = self.text2json(text=response.content.decode())
         for info in info_list:
             href = info.get('url')
             if 'http' not in href: