|
@@ -0,0 +1,187 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""
|
|
|
+Created on 2024-12-06
|
|
|
+---------
|
|
|
+@summary: 辽宁省环保集团有限责任公司
|
|
|
+---------
|
|
|
+@author: lzz
|
|
|
+"""
|
|
|
+import re
|
|
|
+import feapder
|
|
|
+from items.spider_item import DataBakItem
|
|
|
+from untils.tools import extract_file_type
|
|
|
+from untils.attachment import AttachmentDownloader
|
|
|
+import requests
|
|
|
+import random
|
|
|
+import execjs
|
|
|
+import time
|
|
|
+import html
|
|
|
+import warnings
|
|
|
+
|
|
|
+warnings.filterwarnings('ignore')
|
|
|
+
|
|
|
+
|
|
|
+def get_pageId(hid, proxies=False):
|
|
|
+ try:
|
|
|
+ session = requests.session()
|
|
|
+ session.proxies = proxies
|
|
|
+ session.verify = False
|
|
|
+
|
|
|
+ with open('./lnshbjtyxzrgs_pm.js', 'r') as fr:
|
|
|
+ ex_js = fr.read()
|
|
|
+
|
|
|
+ ctx = execjs.compile(ex_js)
|
|
|
+
|
|
|
+ flag = ctx.call('get_flag')
|
|
|
+ f = ctx.call('get_f')
|
|
|
+
|
|
|
+ headers = {
|
|
|
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Upgrade-Insecure-Requests": "1",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
+ }
|
|
|
+ url = "https://lnepg.kdcloud.com/index.html"
|
|
|
+ params = {
|
|
|
+ "formId": "quo_notice",
|
|
|
+ "noticeId": hid,
|
|
|
+ "userId": "guest"
|
|
|
+ }
|
|
|
+ res = session.get(url, headers=headers, params=params, timeout=20)
|
|
|
+ traceId = res.headers.get('traceId')
|
|
|
+
|
|
|
+ headers = {
|
|
|
+ "Accept": "*/*",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Content-Type": "text/json;charset=utf-8;",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Referer": f"https://lnepg.kdcloud.com/index.html?formId=quo_notice¬iceId={hid}&userId=guest",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
+ "ajax": "true",
|
|
|
+ "client-start-time": f"{int(time.time() * 1000)}",
|
|
|
+ "cqappid": "bos",
|
|
|
+ "traceId": traceId,
|
|
|
+ "userId": "1599869187784821760_-1"
|
|
|
+ }
|
|
|
+
|
|
|
+ url = f"https://lnepg.kdcloud.com/form/getConfig.do?params=%7B%22formId%22%3A%22quo_notice%22%2C%22noticeId%22%3A%22{hid}%22%2C%22userId%22%3A%22guest%22%2C%22flag%22%3A%22{flag}%22%2C%22f%22%3A%22{f}%22%7D&random={random.random()}"
|
|
|
+
|
|
|
+ resp = session.get(url, headers=headers, timeout=20)
|
|
|
+
|
|
|
+ cookies = session.cookies.get_dict()
|
|
|
+ token = resp.headers.get('kd-csrf-token')
|
|
|
+ pageId = resp.json().get('pageId')
|
|
|
+ resdate = resp.headers.get('Date')
|
|
|
+ new_traceId = resp.headers.get('traceId')
|
|
|
+
|
|
|
+ ctime = str(int(time.time() * 1000))
|
|
|
+ headers = {
|
|
|
+ "Accept": "*/*",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded;charset=utf-8;",
|
|
|
+ "Origin": "https://lnepg.kdcloud.com",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Referer": f"https://lnepg.kdcloud.com/index.html?formId=quo_notice¬iceId={hid}&userId=guest",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
+ "ajax": "true",
|
|
|
+ "client-start-time": ctime,
|
|
|
+ "cqappid": "quo",
|
|
|
+ "kd-csrf-token": token,
|
|
|
+ "signature": f"{ctx.call('signature', ctime, token, resdate)}__length__60",
|
|
|
+ "traceId": new_traceId,
|
|
|
+ "userId": "1599869187784821760_-1"
|
|
|
+ }
|
|
|
+
|
|
|
+ return token, pageId, headers, cookies
|
|
|
+ except:
|
|
|
+ return ""
|
|
|
+
|
|
|
+fheaders = {
|
|
|
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded",
|
|
|
+ "Origin": "https://lnepg.kdcloud.com",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Upgrade-Insecure-Requests": "1",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
|
+}
|
|
|
+
|
|
|
+class Details(feapder.BiddingDetailSpider):
|
|
|
+ token = ""
|
|
|
+ pageId = ""
|
|
|
+ cookies = None
|
|
|
+
|
|
|
+ def start_requests(self):
|
|
|
+ data_lsit = self.get_tasks_by_rabbitmq(limit=10)
|
|
|
+ for item in data_lsit:
|
|
|
+ request_params = item.get("request_params")
|
|
|
+ timeout = request_params.get('timeout', 20)
|
|
|
+ request_params.pop('timeout', None)
|
|
|
+ yield feapder.Request(url=item.get("parse_url"), item=item,
|
|
|
+ deal_detail=item.get("deal_detail"), callback=eval(item.get("parse")),
|
|
|
+ **request_params, timeout=timeout, proxies=False)
|
|
|
+
|
|
|
+ def download_midware(self, request):
|
|
|
+
|
|
|
+ href = request.item.get('href')
|
|
|
+ hid = "".join(re.findall("noticeId=(.*?)&", href))
|
|
|
+ self.token, self.pageId, headers, self.cookies = get_pageId(hid)
|
|
|
+
|
|
|
+ data = {
|
|
|
+ "pageId": self.pageId,
|
|
|
+ "appId": "quo",
|
|
|
+ "params": "[{\"key\":\"\",\"methodName\":\"loadData\",\"args\":[],\"postData\":[]}]"
|
|
|
+ }
|
|
|
+ request.data = data
|
|
|
+ request.headers = headers
|
|
|
+ request.cookies = self.cookies
|
|
|
+
|
|
|
+ def detail_get(self, request, response):
|
|
|
+ items = request.item
|
|
|
+ list_item = DataBakItem(**items)
|
|
|
+
|
|
|
+ detail_html = ""
|
|
|
+ file_list = []
|
|
|
+ for tmp in response.json[0].get('p'):
|
|
|
+ if tmp.get('k') == "richtexteditor":
|
|
|
+ detail_html = html.unescape(tmp.get('v'))
|
|
|
+ if tmp.get('k') == "attachmentpanel":
|
|
|
+ file_list = tmp.get('data')
|
|
|
+
|
|
|
+ list_item.contenthtml = detail_html
|
|
|
+
|
|
|
+ attachments = {}
|
|
|
+ if file_list:
|
|
|
+ for info in file_list:
|
|
|
+ file_name = info.get('name','').strip()
|
|
|
+ file_url = "https://lnepg.kdcloud.com/" + info.get('downloadFilePath','') + f"&kd_cs_ticket={self.token}"
|
|
|
+ file_type = extract_file_type(file_name, file_url)
|
|
|
+ if file_type:
|
|
|
+ data = {
|
|
|
+ "fileName": file_name,
|
|
|
+ "appId": "quo",
|
|
|
+ "fId": "quo_notice",
|
|
|
+ "pageId": self.pageId
|
|
|
+ }
|
|
|
+ attachment = AttachmentDownloader().fetch_attachment(method="POST",data=data,
|
|
|
+ file_name=file_name, file_type=file_type, download_url=file_url,
|
|
|
+ cookies=self.cookies, headers=fheaders)
|
|
|
+ attachments[str(len(attachments) + 1)] = attachment
|
|
|
+
|
|
|
+ if attachments:
|
|
|
+ list_item.projectinfo = {"attachments": attachments}
|
|
|
+
|
|
|
+ yield list_item
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ Details(redis_key="lzz:lnshbjtyxzrgs_zcpt_zbgg").start()
|