|
@@ -1,12 +1,11 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
"""
|
|
|
-Created on 2023-7-10
|
|
|
+Created on 2024-12-02
|
|
|
---------
|
|
|
@summary: 中国交建物资采购管理信息系统
|
|
|
---------
|
|
|
@author: lzz
|
|
|
"""
|
|
|
-
|
|
|
import feapder
|
|
|
from items.spider_item import DataBakItem
|
|
|
from feapder.utils.tools import log
|
|
@@ -15,51 +14,62 @@ from untils.tools import extract_file_type
|
|
|
import base64
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
def btoa(data):
|
|
|
- if data:
|
|
|
- ss = base64.b64decode(data).decode()
|
|
|
- return ss
|
|
|
- else:
|
|
|
+ try:
|
|
|
+ return base64.b64decode(data).decode()
|
|
|
+ except:
|
|
|
return ''
|
|
|
|
|
|
-
|
|
|
-
|
|
|
+cookies = {
|
|
|
+ "language": "zh-cn"
|
|
|
+}
|
|
|
+headers = {
|
|
|
+ "APP_TOKEN;": "",
|
|
|
+ "Accept": "application/json, text/plain, */*",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Authorization;": "",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Content-Type": "application/json;charset=UTF-8",
|
|
|
+ "Origin": "https://sp.iccec.cn",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
+}
|
|
|
|
|
|
class Details(feapder.BiddingDetailSpider):
|
|
|
|
|
|
def start_requests(self):
|
|
|
- data_list = self.get_tasks_by_rabbitmq(limit=20)
|
|
|
+ data_list = self.get_tasks_by_rabbitmq(limit=50)
|
|
|
for item in data_list:
|
|
|
- log.debug(item)
|
|
|
+ # log.debug(item)
|
|
|
request_params = item.get("request_params")
|
|
|
yield feapder.Request(url=item.get("parse_url"), item=item,
|
|
|
- deal_detail=item.get("deal_detail"),proxies=False,
|
|
|
+ deal_detail=item.get("deal_detail"),
|
|
|
callback=eval(item.get("parse")), **request_params)
|
|
|
|
|
|
|
|
|
-
|
|
|
def detail_get(self, request, response):
|
|
|
|
|
|
items = request.item
|
|
|
list_item = DataBakItem(**items)
|
|
|
+ dt = response.json.get('data')
|
|
|
+ html = dt.get('textInfo')
|
|
|
+ linkInfo = dt.get('linkInfo','')
|
|
|
|
|
|
- html = response.json.get('data').get('textInfo')
|
|
|
-
|
|
|
- list_item.contenthtml = html
|
|
|
+ list_item.contenthtml = html + linkInfo
|
|
|
|
|
|
file_list = response.json.get('data').get('fileinfoBOS')
|
|
|
if file_list:
|
|
|
attachments = {}
|
|
|
for info in file_list:
|
|
|
- fid = info.get('createId')
|
|
|
- file_url = btoa(info.get('fileUrl')) or f"https://sp.iccec.cn/apis/sp/oss/users/signup/downloadByUploadId?uploadId={fid}"
|
|
|
+ fid = info.get('fileUrl')
|
|
|
file_name = info.get('fileName').strip()
|
|
|
+ file_url = f"https://sp.iccec.cn/apis/sp/oss/users/signup/downloadByUploadId?fileUrl={fid}&fileName={file_name}"
|
|
|
file_type = extract_file_type(file_name,file_url)
|
|
|
if file_type:
|
|
|
attachment = AttachmentDownloader().fetch_attachment(
|
|
|
- file_name=file_name, file_type=file_type, download_url=file_url)
|
|
|
+ file_name=file_name, file_type=file_type, download_url=file_url,
|
|
|
+ headers=headers,cookies=cookies)
|
|
|
attachments[str(len(attachments) + 1)] = attachment
|
|
|
if attachments:
|
|
|
list_item.projectinfo = {"attachments": attachments}
|