|
@@ -6,63 +6,17 @@ Created on 2024-09-19
|
|
|
---------
|
|
|
@author: lzz
|
|
|
"""
|
|
|
-import time
|
|
|
from collections import namedtuple
|
|
|
|
|
|
import feapder
|
|
|
-import requests
|
|
|
+from feapder.utils.tools import joint_url
|
|
|
from items.spider_item import BidingListItem
|
|
|
-from untils.get_imgcode import get_code
|
|
|
from untils.tools import get_proxy
|
|
|
|
|
|
from fingerprint import get_fingerprint
|
|
|
|
|
|
|
|
|
-def Code(proxies):
|
|
|
- s = requests.session()
|
|
|
- tm = int(time.time()*1000)
|
|
|
- headers = {
|
|
|
- "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
- "Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Pragma": "no-cache",
|
|
|
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
|
|
- }
|
|
|
-
|
|
|
- url = "https://cg.95306.cn/proxy/portal/enterprise/base/loadComplexValidCodeImg"
|
|
|
- params = {
|
|
|
- "validCodeKey": f"{tm}",
|
|
|
- "timestamp": f"{tm}"
|
|
|
- }
|
|
|
- for _ in range(3):
|
|
|
- response = s.get(url, headers=headers, params=params,timeout=20,proxies=proxies,verify=False)
|
|
|
- code = get_code(response.content)
|
|
|
- if len(code) == 5:
|
|
|
- zheaders = {
|
|
|
- "Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
- "Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
|
- "Origin": "https://cg.95306.cn",
|
|
|
- "Pragma": "no-cache",
|
|
|
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
|
|
- "X-Requested-With": "XMLHttpRequest",
|
|
|
- }
|
|
|
-
|
|
|
- zurl = "https://cg.95306.cn/proxy/portal/elasticSearch/checkRequestNumValidateCode"
|
|
|
- zdata = {
|
|
|
- "picValidCodeKey": f"{tm}",
|
|
|
- "picValidCode": f"{code}"
|
|
|
- }
|
|
|
- s.post(zurl, headers=zheaders, data=zdata,timeout=20,proxies=proxies,verify=False)
|
|
|
- return s.cookies.get_dict()
|
|
|
- else:
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-class Gtcgpt(feapder.BiddingListSpider):
|
|
|
+class Spider(feapder.BiddingListSpider):
|
|
|
|
|
|
def start_callback(self):
|
|
|
Menu = namedtuple('Menu', ['channel', 'code', 'noticeType', 'tid', 'crawl_page'])
|
|
@@ -70,14 +24,11 @@ class Gtcgpt(feapder.BiddingListSpider):
|
|
|
|
|
|
self.menus = [
|
|
|
Menu('采购公告', 'a_gtcgpt_cggg', '000', 'queryProcurementNoticeList', 20),
|
|
|
- # Menu('采购结果', 'a_gtcgpt_cgjg', '001', 'queryProcurementResultsList', 20),
|
|
|
]
|
|
|
self.headers = {
|
|
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
"Cache-Control": "no-cache",
|
|
|
- "Connection": "keep-alive",
|
|
|
- "Pragma": "no-cache",
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
}
|
|
@@ -88,28 +39,37 @@ class Gtcgpt(feapder.BiddingListSpider):
|
|
|
|
|
|
def start_requests(self):
|
|
|
for menu in self.menus:
|
|
|
- start_url = f"https://cg.95306.cn/proxy/portal/elasticSearch/{menu.tid}"
|
|
|
- yield feapder.Request(url=start_url, item=menu._asdict(), page=1, proxies=False)
|
|
|
+ referer = "https://cg.95306.cn/baseinfor/notice/procurementNotice"
|
|
|
+ params = {
|
|
|
+ "bidType": "",
|
|
|
+ "noticeType": f"{menu.noticeType}",
|
|
|
+ "transactionType": "01",
|
|
|
+ "wzType": "",
|
|
|
+ "title": "",
|
|
|
+ "bidding": "",
|
|
|
+ "navigation": ""
|
|
|
+ }
|
|
|
+ self.headers["Referer"] = joint_url(referer, params)
|
|
|
+
|
|
|
+ url = f"https://cg.95306.cn/proxy/portal/elasticSearch/{menu.tid}"
|
|
|
+ yield feapder.Request(url, item=menu._asdict(), page=1, proxies=False)
|
|
|
|
|
|
def download_midware(self, request):
|
|
|
- if not self.cookies:
|
|
|
- self.cookies = Code(self.proxy)
|
|
|
+ if self.cookies is None:
|
|
|
+ self.cookies = {
|
|
|
+ 'AlteonPcgmh': '0a03b7f3bb36ad3f1f41',
|
|
|
+ 'mhId': self.fp,
|
|
|
+ }
|
|
|
|
|
|
- page = request.page
|
|
|
- noticeType = request.item.get('noticeType')
|
|
|
- params = {
|
|
|
+ data = {
|
|
|
'mhId': self.fp,
|
|
|
- "projBidType": "01",
|
|
|
- "bidType": "",
|
|
|
- "noticeType": f"{noticeType}",
|
|
|
- "title": "",
|
|
|
- "inforCode": "",
|
|
|
- "pageNum": f"{page}",
|
|
|
- "projType": "",
|
|
|
- "professionalCode": "",
|
|
|
- "createPeopUnit": ""
|
|
|
+ 'projBidType': '01',
|
|
|
+ 'bidType': '',
|
|
|
+ 'noticeType': '000',
|
|
|
+ 'wzType': '',
|
|
|
+ 'title': '',
|
|
|
}
|
|
|
- request.params = params
|
|
|
+ request.data = data
|
|
|
request.headers = self.headers
|
|
|
request.cookies = self.cookies
|
|
|
request.proxies = self.proxy
|
|
@@ -151,7 +111,6 @@ class Gtcgpt(feapder.BiddingListSpider):
|
|
|
}
|
|
|
list_item.request_params = {"params": params_d}
|
|
|
list_item.parse_url = "https://cg.95306.cn/proxy/portal/elasticSearch/indexView"
|
|
|
-
|
|
|
yield list_item
|
|
|
|
|
|
# 无限翻页设置
|
|
@@ -166,4 +125,4 @@ class Gtcgpt(feapder.BiddingListSpider):
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- Gtcgpt(redis_key="lzz:Gtcgpt").start()
|
|
|
+ Spider(redis_key="lzz:Gtcgpt").start()
|