1 周之前 · 69040f53d2
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_details.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_details.py
@@ -1,324 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-08-06
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 详情页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-import json
			
 
				-from utils.attachment import AttachmentDownloader
			
 
				-from threading import Timer
			
 
				-from parsel import Selector
			
 
				-from utils.tools import *
			
 
				-
			
 
				-
			
 
				-
			
 
				-class Details:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.proxy = get_proxy(socks5h=True)
			
 
				-        self.db_table = Mongo_client().py_spider
			
 
				-        self.db_name = self.db_table.theme_list
			
 
				-        self.zt_details = self.db_table.data_bak
			
 
				-        self.rds = Redis_client()
			
 
				-        self.redis_key = "ztpc_ssyq_msg"
			
 
				-        self.delete_key = ""
			
 
				-        self.end_state = False
			
 
				-        self.headers = {
			
 
				-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Upgrade-Insecure-Requests": "1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    def get_time__2652(self, page=None, cid=None, rid=None):
			
 
				-        with open('./ssyq_pm.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-
			
 
				-        return ctx.call('tm', page, cid, rid)
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_dt', page)
			
 
				-
			
 
				-    def get_type_1017_f(self, href):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_file', href)
			
 
				-
			
 
				-    def detail_get(self, response, item, new_url):
			
 
				-        response.encoding = response.apparent_encoding
			
 
				-        root = Selector(text=response.text)
			
 
				-
			
 
				-        if "来源渠道：必联电子招标投标平台" in response.text:
			
 
				-            # pdf 带 必联 水印，不入保存服务
			
 
				-            item["sendflag"] = "true"
			
 
				-
			
 
				-        dd = root.xpath('//div[@class="mian_list_03"]/@index').extract_first()
			
 
				-
			
 
				-        cookies = response.cookies.get_dict()
			
 
				-        headers2 = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Content-Length": "0",
			
 
				-            "Origin": "https://bulletin.cebpubservice.com",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": new_url,
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            "X-Requested-With": "XMLHttpRequest",
			
 
				-        }
			
 
				-
			
 
				-        url2 = "https://bulletin.cebpubservice.com/details/permission/getSecretKey"
			
 
				-        params = {
			
 
				-            "time__2652": self.get_time__2652()
			
 
				-        }
			
 
				-        res = requests.post(url2, headers=headers2, cookies=cookies, params=params,
			
 
				-                            timeout=30, proxies=self.proxy, verify=False)
			
 
				-
			
 
				-        ex_js = '''
			
 
				-            CryptoJS = require("crypto-js")
			
 
				-
			
 
				-            function decryptByDES(ciphertext, key) {
			
 
				-                    var keyHex = CryptoJS.enc.Utf8.parse("Ctpsp@884*");
			
 
				-                    var decrypted = CryptoJS.DES.decrypt({
			
 
				-                        ciphertext: CryptoJS.enc.Base64.parse(ciphertext)
			
 
				-                    }, keyHex, {
			
 
				-                        mode: CryptoJS.mode.ECB,
			
 
				-                        padding: CryptoJS.pad.Pkcs7
			
 
				-                    });
			
 
				-                    return decrypted.toString(CryptoJS.enc.Utf8);
			
 
				-            }
			
 
				-            '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        pm = ctx.call('decryptByDES', res.text.replace('"', ''))
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-        ffid = json.loads(pm).get('data')
			
 
				-        f_org = f"/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-
			
 
				-        for i in range(5):
			
 
				-            file_url = f"https://bulletin.cebpubservice.com/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-            headers = {
			
 
				-                "Accept": "*/*",
			
 
				-                "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-                "Cache-Control": "no-cache",
			
 
				-                "Connection": "keep-alive",
			
 
				-                "Pragma": "no-cache",
			
 
				-                "Referer": "https://bulletin.cebpubservice.com/resource/ceb/js/pdfjs-dist/web/viewer.html",
			
 
				-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            }
			
 
				-            f_tm = self.get_time__2652(f_org)
			
 
				-            params = {
			
 
				-                "time__2652": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=file_url,
			
 
				-                proxies=self.proxy,headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            self.proxy = get_proxy(socks5h=True)
			
 
				-            if i == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-    def decrypt_by_des(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text.replace('"', ''))
			
 
				-        data_org = eval(data_org.replace('true', '1').replace('false', '1').replace('null', '1'))
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_url(self, parse_url):
			
 
				-        uid = "".join(re.findall('uuid=(.*?)&', parse_url))
			
 
				-        headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/bulletin/{uid}/uid/0"
			
 
				-
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 5:
			
 
				-            params = {
			
 
				-                "type__1017": self.get_type_1017(uid)
			
 
				-            }
			
 
				-            try:
			
 
				-                res = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30)
			
 
				-                data_org = self.decrypt_by_des(res.text.replace('"', ""))
			
 
				-                break
			
 
				-            except:
			
 
				-                pass
			
 
				-
			
 
				-        new_href = data_org.get('data').get('pdfUrl')
			
 
				-        pub_time = data_org.get('data').get('noticeSendTimeStr', '')
			
 
				-        pbtime = pub_time.replace('年', '-').replace('月', '-').replace('日', '')
			
 
				-        if "bulletinPDF" not in new_href:
			
 
				-            new_href = data_org.get('data').get('noticeUrl')
			
 
				-        return new_href,pbtime
			
 
				-
			
 
				-    def new_parse(self,item, pdfurl):
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-
			
 
				-        headers = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": f"https://ctbpsp.com/web_pdf/pdfjs-dist/web/viewer.html?file={pdfurl}",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        for _ in range(5):
			
 
				-            f_tm = self.get_type_1017_f(pdfurl)
			
 
				-            params = {
			
 
				-                "type__1017": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=pdfurl,
			
 
				-                proxies=get_QGIP(),headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            if _ == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-
			
 
				-    def fetch_request(self, url):
			
 
				-        response = requests.get(url=url, headers=self.headers,
			
 
				-                                proxies=self.proxy, timeout=(30, 60), verify=False)
			
 
				-        return response
			
 
				-
			
 
				-    def deal_request(self, item):
			
 
				-        response = None
			
 
				-        retry_times = 0
			
 
				-        org_item = item.copy()
			
 
				-        while (retry_times := retry_times + 1) < 5:
			
 
				-            try:
			
 
				-                new_url,pub_time = self.get_url(item['parse_url'])
			
 
				-                if "bulletinPDF" in new_url:
			
 
				-                    try:
			
 
				-                        date_to_timestamp(item['publishtime'])
			
 
				-                    except:
			
 
				-                        item['publishtime'] = pub_time
			
 
				-                    self.new_parse(item=item,pdfurl=new_url)
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    response = self.fetch_request(new_url)
			
 
				-                    if response is not None and response.status_code == 200:
			
 
				-                        self.detail_get(response, item=item, new_url=new_url)
			
 
				-                        time.sleep(random.random())
			
 
				-                        return True
			
 
				-            except Exception as e:
			
 
				-                item = org_item
			
 
				-                logger.exception(f"{item['href']} 采集异常：{e}")
			
 
				-                time.sleep(random.randint(5,10))
			
 
				-                self.proxy = get_proxy(socks5h=True)
			
 
				-        logger.warning(f"[采集失败]{item['href']}")
			
 
				-        return False
			
 
				-
			
 
				-    def countSec(self):
			
 
				-        for count in range(5, 0, -1):
			
 
				-            print(f'\r{count} 秒 后结束任务', end='')
			
 
				-            time.sleep(1)
			
 
				-        print('\r任务结束')
			
 
				-
			
 
				-    def de_redis_key(self):
			
 
				-        self.end_state = True
			
 
				-        self.rds.hdel(self.redis_key, self.delete_key)
			
 
				-        logger.warning("当前数据未采集成功，数据已回填！")
			
 
				-        self.countSec()
			
 
				-
			
 
				-    def start(self, limit=1):
			
 
				-        logger.debug("********** 详情页采集开始 **********")
			
 
				-        time.sleep(random.random())
			
 
				-        count = 0
			
 
				-        ts = Timer(1195, self.de_redis_key)  # 声明一个定时器,设置多少s后执行
			
 
				-        ts.start()  # 启动定时器
			
 
				-        with self.db_name.find({"parser_name": "ztpc_qgzbgggsssyq", "failed": False, "is_crawl": False},
			
 
				-                               no_cursor_timeout=True) as data_lsit:
			
 
				-            for item in data_lsit:
			
 
				-                # logger.debug(item)
			
 
				-                if self.end_state:
			
 
				-                    break
			
 
				-                if count >= limit:
			
 
				-                    break
			
 
				-                unicode_key = md5value(item.get('href') + item.get('title'))
			
 
				-                if not self.rds.hexists(self.redis_key, unicode_key):  # 除 动态字段 外所有字段去重
			
 
				-                    self.rds.hset(self.redis_key, unicode_key, '')
			
 
				-                    self.delete_key = unicode_key
			
 
				-                    count += 1
			
 
				-                    update_id = item["_id"]
			
 
				-                    retry = item["retry"]
			
 
				-                    if self.deal_request(item):
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
			
 
				-                    else:
			
 
				-                        retry += 1
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True, "retry": retry}})
			
 
				-
			
 
				-        logger.debug("********** 详情页采集结束 **********")
			
 
				-        ts.cancel()  # 脚本规定时间内正常结束，取消定时器
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Details().start(limit=300)
			
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_details2.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_details2.py
@@ -1,323 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-08-06
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 详情页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-import json
			
 
				-from utils.attachment import AttachmentDownloader
			
 
				-from threading import Timer
			
 
				-from parsel import Selector
			
 
				-from utils.tools import *
			
 
				-
			
 
				-
			
 
				-class Details:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.proxy = get_proxy(socks5h=True)
			
 
				-        self.db_table = Mongo_client().py_spider
			
 
				-        self.db_name = self.db_table.theme_list
			
 
				-        self.zt_details = self.db_table.data_bak
			
 
				-        self.rds = Redis_client()
			
 
				-        self.redis_key = "ztpc_ssyq_msg"
			
 
				-        self.delete_key = ""
			
 
				-        self.end_state = False
			
 
				-        self.headers = {
			
 
				-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Upgrade-Insecure-Requests": "1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    def get_time__2652(self, page=None, cid=None, rid=None):
			
 
				-        with open('./ssyq_pm.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-
			
 
				-        return ctx.call('tm', page, cid, rid)
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_dt', page)
			
 
				-
			
 
				-    def get_type_1017_f(self, href):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_file', href)
			
 
				-
			
 
				-    def detail_get(self, response, item, new_url):
			
 
				-        response.encoding = response.apparent_encoding
			
 
				-        root = Selector(text=response.text)
			
 
				-
			
 
				-        if "来源渠道：必联电子招标投标平台" in response.text:
			
 
				-            # pdf 带 必联 水印，不入保存服务
			
 
				-            item["sendflag"] = "true"
			
 
				-
			
 
				-        dd = root.xpath('//div[@class="mian_list_03"]/@index').extract_first()
			
 
				-
			
 
				-        cookies = response.cookies.get_dict()
			
 
				-        headers2 = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Content-Length": "0",
			
 
				-            "Origin": "https://bulletin.cebpubservice.com",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": new_url,
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            "X-Requested-With": "XMLHttpRequest",
			
 
				-        }
			
 
				-
			
 
				-        url2 = "https://bulletin.cebpubservice.com/details/permission/getSecretKey"
			
 
				-        params = {
			
 
				-            "time__2652": self.get_time__2652()
			
 
				-        }
			
 
				-        res = requests.post(url2, headers=headers2, cookies=cookies, params=params,
			
 
				-                            timeout=30, proxies=self.proxy, verify=False)
			
 
				-
			
 
				-        ex_js = '''
			
 
				-            CryptoJS = require("crypto-js")
			
 
				-
			
 
				-            function decryptByDES(ciphertext, key) {
			
 
				-                    var keyHex = CryptoJS.enc.Utf8.parse("Ctpsp@884*");
			
 
				-                    var decrypted = CryptoJS.DES.decrypt({
			
 
				-                        ciphertext: CryptoJS.enc.Base64.parse(ciphertext)
			
 
				-                    }, keyHex, {
			
 
				-                        mode: CryptoJS.mode.ECB,
			
 
				-                        padding: CryptoJS.pad.Pkcs7
			
 
				-                    });
			
 
				-                    return decrypted.toString(CryptoJS.enc.Utf8);
			
 
				-            }
			
 
				-            '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        pm = ctx.call('decryptByDES', res.text.replace('"', ''))
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-        ffid = json.loads(pm).get('data')
			
 
				-        f_org = f"/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-
			
 
				-        for i in range(5):
			
 
				-            file_url = f"https://bulletin.cebpubservice.com/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-            headers = {
			
 
				-                "Accept": "*/*",
			
 
				-                "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-                "Cache-Control": "no-cache",
			
 
				-                "Connection": "keep-alive",
			
 
				-                "Pragma": "no-cache",
			
 
				-                "Referer": "https://bulletin.cebpubservice.com/resource/ceb/js/pdfjs-dist/web/viewer.html",
			
 
				-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            }
			
 
				-            f_tm = self.get_time__2652(f_org)
			
 
				-            params = {
			
 
				-                "time__2652": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=file_url,
			
 
				-                proxies=self.proxy,headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            self.proxy = get_proxy(socks5h=True)
			
 
				-            if i == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-    def decrypt_by_des(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text.replace('"', ''))
			
 
				-        data_org = eval(data_org.replace('true', '1').replace('false', '1').replace('null', '1'))
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_url(self, parse_url):
			
 
				-        uid = "".join(re.findall('uuid=(.*?)&', parse_url))
			
 
				-        headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/bulletin/{uid}/uid/0"
			
 
				-
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 5:
			
 
				-            params = {
			
 
				-                "type__1017": self.get_type_1017(uid)
			
 
				-            }
			
 
				-            try:
			
 
				-                res = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30)
			
 
				-                data_org = self.decrypt_by_des(res.text.replace('"', ""))
			
 
				-                break
			
 
				-            except:
			
 
				-                pass
			
 
				-
			
 
				-        new_href = data_org.get('data').get('pdfUrl')
			
 
				-        pub_time = data_org.get('data').get('noticeSendTimeStr', '')
			
 
				-        pbtime = pub_time.replace('年', '-').replace('月', '-').replace('日', '')
			
 
				-        if "bulletinPDF" not in new_href:
			
 
				-            new_href = data_org.get('data').get('noticeUrl')
			
 
				-        return new_href,pbtime
			
 
				-
			
 
				-    def new_parse(self,item, pdfurl):
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-
			
 
				-        headers = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": f"https://ctbpsp.com/web_pdf/pdfjs-dist/web/viewer.html?file={pdfurl}",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        for _ in range(5):
			
 
				-            f_tm = self.get_type_1017_f(pdfurl)
			
 
				-            params = {
			
 
				-                "type__1017": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=pdfurl,
			
 
				-                proxies=get_QGIP(),headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            if _ == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-
			
 
				-    def fetch_request(self, url):
			
 
				-        response = requests.get(url=url, headers=self.headers,
			
 
				-                                proxies=self.proxy, timeout=(30, 60), verify=False)
			
 
				-        return response
			
 
				-
			
 
				-    def deal_request(self, item):
			
 
				-        response = None
			
 
				-        retry_times = 0
			
 
				-        org_item = item.copy()
			
 
				-        while (retry_times := retry_times + 1) < 5:
			
 
				-            try:
			
 
				-                new_url,pub_time = self.get_url(item['parse_url'])
			
 
				-                if "bulletinPDF" in new_url:
			
 
				-                    try:
			
 
				-                        date_to_timestamp(item['publishtime'])
			
 
				-                    except:
			
 
				-                        item['publishtime'] = pub_time
			
 
				-                    self.new_parse(item=item,pdfurl=new_url)
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    response = self.fetch_request(new_url)
			
 
				-                    if response is not None and response.status_code == 200:
			
 
				-                        self.detail_get(response, item=item, new_url=new_url)
			
 
				-                        time.sleep(random.random())
			
 
				-                        return True
			
 
				-            except Exception as e:
			
 
				-                item = org_item
			
 
				-                logger.exception(f"{item['href']} 采集异常：{e}")
			
 
				-                time.sleep(random.randint(5,10))
			
 
				-                self.proxy = get_proxy(socks5h=True)
			
 
				-        logger.warning(f"[采集失败]{item['href']}")
			
 
				-        return False
			
 
				-
			
 
				-    def countSec(self):
			
 
				-        for count in range(5, 0, -1):
			
 
				-            print(f'\r{count} 秒 后结束任务', end='')
			
 
				-            time.sleep(1)
			
 
				-        print('\r任务结束')
			
 
				-
			
 
				-    def de_redis_key(self):
			
 
				-        self.end_state = True
			
 
				-        self.rds.hdel(self.redis_key, self.delete_key)
			
 
				-        logger.warning("当前数据未采集成功，数据已回填！")
			
 
				-        self.countSec()
			
 
				-
			
 
				-    def start(self, limit=1):
			
 
				-        logger.debug("********** 详情页采集开始 **********")
			
 
				-        time.sleep(random.random())
			
 
				-        count = 0
			
 
				-        ts = Timer(1195, self.de_redis_key)  # 声明一个定时器,设置多少s后执行
			
 
				-        ts.start()  # 启动定时器
			
 
				-        with self.db_name.find({"parser_name": "ztpc_qgzbgggsssyq", "failed": False, "is_crawl": False},
			
 
				-                               no_cursor_timeout=True) as data_lsit:
			
 
				-            for item in data_lsit:
			
 
				-                # logger.debug(item)
			
 
				-                if self.end_state:
			
 
				-                    break
			
 
				-                if count >= limit:
			
 
				-                    break
			
 
				-                unicode_key = md5value(item.get('href') + item.get('title'))
			
 
				-                if not self.rds.hexists(self.redis_key, unicode_key):  # 除 动态字段 外所有字段去重
			
 
				-                    self.rds.hset(self.redis_key, unicode_key, '')
			
 
				-                    self.delete_key = unicode_key
			
 
				-                    count += 1
			
 
				-                    update_id = item["_id"]
			
 
				-                    retry = item["retry"]
			
 
				-                    if self.deal_request(item):
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
			
 
				-                    else:
			
 
				-                        retry += 1
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True, "retry": retry}})
			
 
				-
			
 
				-        logger.debug("********** 详情页采集结束 **********")
			
 
				-        ts.cancel()  # 脚本规定时间内正常结束，取消定时器
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Details().start(limit=1000)
			
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_details3.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_details3.py
@@ -1,323 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-08-06
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 详情页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-import json
			
 
				-from utils.attachment import AttachmentDownloader
			
 
				-from threading import Timer
			
 
				-from parsel import Selector
			
 
				-from utils.tools import *
			
 
				-
			
 
				-
			
 
				-
			
 
				-class Details:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.proxy = get_proxy(socks5h=True)
			
 
				-        self.db_table = Mongo_client().py_spider
			
 
				-        self.db_name = self.db_table.theme_list
			
 
				-        self.zt_details = self.db_table.data_bak
			
 
				-        self.rds = Redis_client()
			
 
				-        self.redis_key = "ztpc_ssyq_msg"
			
 
				-        self.delete_key = ""
			
 
				-        self.end_state = False
			
 
				-        self.headers = {
			
 
				-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Upgrade-Insecure-Requests": "1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    def get_time__2652(self, page=None, cid=None, rid=None):
			
 
				-        with open('./ssyq_pm.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-
			
 
				-        return ctx.call('tm', page, cid, rid)
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_dt', page)
			
 
				-
			
 
				-    def get_type_1017_f(self, href):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_file', href)
			
 
				-
			
 
				-    def detail_get(self, response, item, new_url):
			
 
				-        response.encoding = response.apparent_encoding
			
 
				-        root = Selector(text=response.text)
			
 
				-
			
 
				-        if "来源渠道：必联电子招标投标平台" in response.text:
			
 
				-            # pdf 带 必联 水印，不入保存服务
			
 
				-            item["sendflag"] = "true"
			
 
				-
			
 
				-        dd = root.xpath('//div[@class="mian_list_03"]/@index').extract_first()
			
 
				-
			
 
				-        cookies = response.cookies.get_dict()
			
 
				-        headers2 = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Content-Length": "0",
			
 
				-            "Origin": "https://bulletin.cebpubservice.com",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": new_url,
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            "X-Requested-With": "XMLHttpRequest",
			
 
				-        }
			
 
				-
			
 
				-        url2 = "https://bulletin.cebpubservice.com/details/permission/getSecretKey"
			
 
				-        params = {
			
 
				-            "time__2652": self.get_time__2652()
			
 
				-        }
			
 
				-        res = requests.post(url2, headers=headers2, cookies=cookies, params=params,
			
 
				-                            timeout=30, proxies=self.proxy, verify=False)
			
 
				-
			
 
				-        ex_js = '''
			
 
				-            CryptoJS = require("crypto-js")
			
 
				-
			
 
				-            function decryptByDES(ciphertext, key) {
			
 
				-                    var keyHex = CryptoJS.enc.Utf8.parse("Ctpsp@884*");
			
 
				-                    var decrypted = CryptoJS.DES.decrypt({
			
 
				-                        ciphertext: CryptoJS.enc.Base64.parse(ciphertext)
			
 
				-                    }, keyHex, {
			
 
				-                        mode: CryptoJS.mode.ECB,
			
 
				-                        padding: CryptoJS.pad.Pkcs7
			
 
				-                    });
			
 
				-                    return decrypted.toString(CryptoJS.enc.Utf8);
			
 
				-            }
			
 
				-            '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        pm = ctx.call('decryptByDES', res.text.replace('"', ''))
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-        ffid = json.loads(pm).get('data')
			
 
				-        f_org = f"/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-
			
 
				-        for i in range(5):
			
 
				-            file_url = f"https://bulletin.cebpubservice.com/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-            headers = {
			
 
				-                "Accept": "*/*",
			
 
				-                "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-                "Cache-Control": "no-cache",
			
 
				-                "Connection": "keep-alive",
			
 
				-                "Pragma": "no-cache",
			
 
				-                "Referer": "https://bulletin.cebpubservice.com/resource/ceb/js/pdfjs-dist/web/viewer.html",
			
 
				-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            }
			
 
				-            f_tm = self.get_time__2652(f_org)
			
 
				-            params = {
			
 
				-                "time__2652": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=file_url,
			
 
				-                proxies=self.proxy,headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            self.proxy = get_proxy(socks5h=True)
			
 
				-            if i == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-    def decrypt_by_des(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text.replace('"', ''))
			
 
				-        data_org = eval(data_org.replace('true', '1').replace('false', '1').replace('null', '1'))
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_url(self, parse_url):
			
 
				-        uid = "".join(re.findall('uuid=(.*?)&', parse_url))
			
 
				-        headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/bulletin/{uid}/uid/0"
			
 
				-
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 5:
			
 
				-            params = {
			
 
				-                "type__1017": self.get_type_1017(uid)
			
 
				-            }
			
 
				-            try:
			
 
				-                res = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30)
			
 
				-                data_org = self.decrypt_by_des(res.text.replace('"', ""))
			
 
				-                break
			
 
				-            except:
			
 
				-                pass
			
 
				-
			
 
				-        new_href = data_org.get('data').get('pdfUrl')
			
 
				-        pub_time = data_org.get('data').get('noticeSendTimeStr', '')
			
 
				-        pbtime = pub_time.replace('年', '-').replace('月', '-').replace('日', '')
			
 
				-        if "bulletinPDF" not in new_href:
			
 
				-            new_href = data_org.get('data').get('noticeUrl')
			
 
				-        return new_href,pbtime
			
 
				-
			
 
				-    def new_parse(self,item, pdfurl):
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-
			
 
				-        headers = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": f"https://ctbpsp.com/web_pdf/pdfjs-dist/web/viewer.html?file={pdfurl}",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        for _ in range(5):
			
 
				-            f_tm = self.get_type_1017_f(pdfurl)
			
 
				-            params = {
			
 
				-                "type__1017": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=pdfurl,
			
 
				-                proxies=get_QGIP(),headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            if _ == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-
			
 
				-    def fetch_request(self, url):
			
 
				-        response = requests.get(url=url, headers=self.headers,
			
 
				-                                proxies=self.proxy, timeout=(30, 60), verify=False)
			
 
				-        return response
			
 
				-
			
 
				-    def deal_request(self, item):
			
 
				-        response = None
			
 
				-        retry_times = 0
			
 
				-        org_item = item.copy()
			
 
				-        while (retry_times := retry_times + 1) < 5:
			
 
				-            try:
			
 
				-                new_url,pub_time = self.get_url(item['parse_url'])
			
 
				-                if "bulletinPDF" in new_url:
			
 
				-                    try:
			
 
				-                        date_to_timestamp(item['publishtime'])
			
 
				-                    except:
			
 
				-                        item['publishtime'] = pub_time
			
 
				-                    self.new_parse(item=item,pdfurl=new_url)
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    response = self.fetch_request(new_url)
			
 
				-                    if response is not None and response.status_code == 200:
			
 
				-                        self.detail_get(response, item=item, new_url=new_url)
			
 
				-                        time.sleep(random.random())
			
 
				-                        return True
			
 
				-            except Exception as e:
			
 
				-                item = org_item
			
 
				-                logger.exception(f"{item['href']} 采集异常：{e}")
			
 
				-                time.sleep(random.randint(5,10))
			
 
				-                self.proxy = get_proxy(socks5h=True)
			
 
				-        logger.warning(f"[采集失败]{item['href']}")
			
 
				-        return False
			
 
				-
			
 
				-    def countSec(self):
			
 
				-        for count in range(5, 0, -1):
			
 
				-            print(f'\r{count} 秒 后结束任务', end='')
			
 
				-            time.sleep(1)
			
 
				-        print('\r任务结束')
			
 
				-
			
 
				-    def de_redis_key(self):
			
 
				-        self.end_state = True
			
 
				-        self.rds.hdel(self.redis_key, self.delete_key)
			
 
				-        logger.warning("当前数据未采集成功，数据已回填！")
			
 
				-        self.countSec()
			
 
				-
			
 
				-    def start(self, limit=1):
			
 
				-        logger.debug("********** 详情页采集开始 **********")
			
 
				-        time.sleep(random.random())
			
 
				-        count = 0
			
 
				-        ts = Timer(1195, self.de_redis_key)  # 声明一个定时器,设置多少s后执行
			
 
				-        ts.start()  # 启动定时器
			
 
				-        with self.db_name.find({"parser_name": "ztpc_qgzbgggsssyq", "failed": False, "is_crawl": False},
			
 
				-                               no_cursor_timeout=True) as data_lsit:
			
 
				-            for item in data_lsit:
			
 
				-                # logger.debug(item)
			
 
				-                if self.end_state:
			
 
				-                    break
			
 
				-                if count >= limit:
			
 
				-                    break
			
 
				-                unicode_key = md5value(item.get('href') + item.get('title'))
			
 
				-                if not self.rds.hexists(self.redis_key, unicode_key):  # 除 动态字段 外所有字段去重
			
 
				-                    self.rds.hset(self.redis_key, unicode_key, '')
			
 
				-                    self.delete_key = unicode_key
			
 
				-                    count += 1
			
 
				-                    update_id = item["_id"]
			
 
				-                    retry = item["retry"]
			
 
				-                    if self.deal_request(item):
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
			
 
				-                    else:
			
 
				-                        retry += 1
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True, "retry": retry}})
			
 
				-
			
 
				-        logger.debug("********** 详情页采集结束 **********")
			
 
				-        ts.cancel()  # 脚本规定时间内正常结束，取消定时器
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Details().start(limit=300)
			
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_details4.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_details4.py
@@ -1,323 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-08-06
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 详情页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-import json
			
 
				-from utils.attachment import AttachmentDownloader
			
 
				-from threading import Timer
			
 
				-from parsel import Selector
			
 
				-from utils.tools import *
			
 
				-
			
 
				-
			
 
				-
			
 
				-class Details:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.proxy = get_proxy(socks5h=True)
			
 
				-        self.db_table = Mongo_client().py_spider
			
 
				-        self.db_name = self.db_table.theme_list
			
 
				-        self.zt_details = self.db_table.data_bak
			
 
				-        self.rds = Redis_client()
			
 
				-        self.redis_key = "ztpc_ssyq_msg"
			
 
				-        self.delete_key = ""
			
 
				-        self.end_state = False
			
 
				-        self.headers = {
			
 
				-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Upgrade-Insecure-Requests": "1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    def get_time__2652(self, page=None, cid=None, rid=None):
			
 
				-        with open('./ssyq_pm.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-
			
 
				-        return ctx.call('tm', page, cid, rid)
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_dt', page)
			
 
				-
			
 
				-    def get_type_1017_f(self, href):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_file', href)
			
 
				-
			
 
				-    def detail_get(self, response, item, new_url):
			
 
				-        response.encoding = response.apparent_encoding
			
 
				-        root = Selector(text=response.text)
			
 
				-
			
 
				-        if "来源渠道：必联电子招标投标平台" in response.text:
			
 
				-            # pdf 带 必联 水印，不入保存服务
			
 
				-            item["sendflag"] = "true"
			
 
				-
			
 
				-        dd = root.xpath('//div[@class="mian_list_03"]/@index').extract_first()
			
 
				-
			
 
				-        cookies = response.cookies.get_dict()
			
 
				-        headers2 = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Content-Length": "0",
			
 
				-            "Origin": "https://bulletin.cebpubservice.com",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": new_url,
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            "X-Requested-With": "XMLHttpRequest",
			
 
				-        }
			
 
				-
			
 
				-        url2 = "https://bulletin.cebpubservice.com/details/permission/getSecretKey"
			
 
				-        params = {
			
 
				-            "time__2652": self.get_time__2652()
			
 
				-        }
			
 
				-        res = requests.post(url2, headers=headers2, cookies=cookies, params=params,
			
 
				-                            timeout=30, proxies=self.proxy, verify=False)
			
 
				-
			
 
				-        ex_js = '''
			
 
				-            CryptoJS = require("crypto-js")
			
 
				-
			
 
				-            function decryptByDES(ciphertext, key) {
			
 
				-                    var keyHex = CryptoJS.enc.Utf8.parse("Ctpsp@884*");
			
 
				-                    var decrypted = CryptoJS.DES.decrypt({
			
 
				-                        ciphertext: CryptoJS.enc.Base64.parse(ciphertext)
			
 
				-                    }, keyHex, {
			
 
				-                        mode: CryptoJS.mode.ECB,
			
 
				-                        padding: CryptoJS.pad.Pkcs7
			
 
				-                    });
			
 
				-                    return decrypted.toString(CryptoJS.enc.Utf8);
			
 
				-            }
			
 
				-            '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        pm = ctx.call('decryptByDES', res.text.replace('"', ''))
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-        ffid = json.loads(pm).get('data')
			
 
				-        f_org = f"/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-
			
 
				-        for i in range(5):
			
 
				-            file_url = f"https://bulletin.cebpubservice.com/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-            headers = {
			
 
				-                "Accept": "*/*",
			
 
				-                "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-                "Cache-Control": "no-cache",
			
 
				-                "Connection": "keep-alive",
			
 
				-                "Pragma": "no-cache",
			
 
				-                "Referer": "https://bulletin.cebpubservice.com/resource/ceb/js/pdfjs-dist/web/viewer.html",
			
 
				-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            }
			
 
				-            f_tm = self.get_time__2652(f_org)
			
 
				-            params = {
			
 
				-                "time__2652": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=file_url,
			
 
				-                proxies=self.proxy,headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            self.proxy = get_proxy(socks5h=True)
			
 
				-            if i == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-    def decrypt_by_des(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text.replace('"', ''))
			
 
				-        data_org = eval(data_org.replace('true', '1').replace('false', '1').replace('null', '1'))
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_url(self, parse_url):
			
 
				-        uid = "".join(re.findall('uuid=(.*?)&', parse_url))
			
 
				-        headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/bulletin/{uid}/uid/0"
			
 
				-
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 5:
			
 
				-            params = {
			
 
				-                "type__1017": self.get_type_1017(uid)
			
 
				-            }
			
 
				-            try:
			
 
				-                res = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30)
			
 
				-                data_org = self.decrypt_by_des(res.text.replace('"', ""))
			
 
				-                break
			
 
				-            except:
			
 
				-                pass
			
 
				-
			
 
				-        new_href = data_org.get('data').get('pdfUrl')
			
 
				-        pub_time = data_org.get('data').get('noticeSendTimeStr', '')
			
 
				-        pbtime = pub_time.replace('年', '-').replace('月', '-').replace('日', '')
			
 
				-        if "bulletinPDF" not in new_href:
			
 
				-            new_href = data_org.get('data').get('noticeUrl')
			
 
				-        return new_href,pbtime
			
 
				-
			
 
				-    def new_parse(self,item, pdfurl):
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-
			
 
				-        headers = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": f"https://ctbpsp.com/web_pdf/pdfjs-dist/web/viewer.html?file={pdfurl}",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        for _ in range(5):
			
 
				-            f_tm = self.get_type_1017_f(pdfurl)
			
 
				-            params = {
			
 
				-                "type__1017": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=pdfurl,
			
 
				-                proxies=get_QGIP(),headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            if _ == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-
			
 
				-    def fetch_request(self, url):
			
 
				-        response = requests.get(url=url, headers=self.headers,
			
 
				-                                proxies=self.proxy, timeout=(30, 60), verify=False)
			
 
				-        return response
			
 
				-
			
 
				-    def deal_request(self, item):
			
 
				-        response = None
			
 
				-        retry_times = 0
			
 
				-        org_item = item.copy()
			
 
				-        while (retry_times := retry_times + 1) < 5:
			
 
				-            try:
			
 
				-                new_url,pub_time = self.get_url(item['parse_url'])
			
 
				-                if "bulletinPDF" in new_url:
			
 
				-                    try:
			
 
				-                        date_to_timestamp(item['publishtime'])
			
 
				-                    except:
			
 
				-                        item['publishtime'] = pub_time
			
 
				-                    self.new_parse(item=item,pdfurl=new_url)
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    response = self.fetch_request(new_url)
			
 
				-                    if response is not None and response.status_code == 200:
			
 
				-                        self.detail_get(response, item=item, new_url=new_url)
			
 
				-                        time.sleep(random.random())
			
 
				-                        return True
			
 
				-            except Exception as e:
			
 
				-                item = org_item
			
 
				-                logger.exception(f"{item['href']} 采集异常：{e}")
			
 
				-                time.sleep(random.randint(5,10))
			
 
				-                self.proxy = get_proxy(socks5h=True)
			
 
				-        logger.warning(f"[采集失败]{item['href']}")
			
 
				-        return False
			
 
				-
			
 
				-    def countSec(self):
			
 
				-        for count in range(5, 0, -1):
			
 
				-            print(f'\r{count} 秒 后结束任务', end='')
			
 
				-            time.sleep(1)
			
 
				-        print('\r任务结束')
			
 
				-
			
 
				-    def de_redis_key(self):
			
 
				-        self.end_state = True
			
 
				-        self.rds.hdel(self.redis_key, self.delete_key)
			
 
				-        logger.warning("当前数据未采集成功，数据已回填！")
			
 
				-        self.countSec()
			
 
				-
			
 
				-    def start(self, limit=1):
			
 
				-        logger.debug("********** 详情页采集开始 **********")
			
 
				-        time.sleep(random.random())
			
 
				-        count = 0
			
 
				-        ts = Timer(1195, self.de_redis_key)  # 声明一个定时器,设置多少s后执行
			
 
				-        ts.start()  # 启动定时器
			
 
				-        with self.db_name.find({"parser_name": "ztpc_qgzbgggsssyq", "failed": False, "is_crawl": False},
			
 
				-                               no_cursor_timeout=True) as data_lsit:
			
 
				-            for item in data_lsit:
			
 
				-                # logger.debug(item)
			
 
				-                if self.end_state:
			
 
				-                    break
			
 
				-                if count >= limit:
			
 
				-                    break
			
 
				-                unicode_key = md5value(item.get('href') + item.get('title'))
			
 
				-                if not self.rds.hexists(self.redis_key, unicode_key):  # 除 动态字段 外所有字段去重
			
 
				-                    self.rds.hset(self.redis_key, unicode_key, '')
			
 
				-                    self.delete_key = unicode_key
			
 
				-                    count += 1
			
 
				-                    update_id = item["_id"]
			
 
				-                    retry = item["retry"]
			
 
				-                    if self.deal_request(item):
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
			
 
				-                    else:
			
 
				-                        retry += 1
			
 
				-                        self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True, "retry": retry}})
			
 
				-
			
 
				-        logger.debug("********** 详情页采集结束 **********")
			
 
				-        ts.cancel()  # 脚本规定时间内正常结束，取消定时器
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Details().start(limit=300)
			
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_details_bu.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_details_bu.py
@@ -1,300 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-08-06
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 详情页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-import json
			
 
				-from utils.attachment import AttachmentDownloader
			
 
				-from threading import Timer
			
 
				-from parsel import Selector
			
 
				-from utils.tools import *
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-class Details:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.proxy = get_proxy(socks5h=True)
			
 
				-        self.db_table = Mongo_client().py_spider
			
 
				-        self.db_name = self.db_table.theme_list
			
 
				-        self.zt_details = self.db_table.data_bak
			
 
				-        self.rds = Redis_client()
			
 
				-        self.redis_key = "ztpc_ssyq_msg"
			
 
				-        self.delete_key = ""
			
 
				-        self.end_state = False
			
 
				-        self.headers = {
			
 
				-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Upgrade-Insecure-Requests": "1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    def get_time__2652(self, page=None, cid=None, rid=None):
			
 
				-        with open('./ssyq_pm.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-
			
 
				-        return ctx.call('tm', page, cid, rid)
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_dt', page)
			
 
				-
			
 
				-    def get_type_1017_f(self, href):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_file', href)
			
 
				-
			
 
				-    def detail_get(self, response, item, new_url):
			
 
				-        response.encoding = response.apparent_encoding
			
 
				-        root = Selector(text=response.text)
			
 
				-
			
 
				-        if "来源渠道：必联电子招标投标平台" in response.text:
			
 
				-            # pdf 带 必联 水印，不入保存服务
			
 
				-            item["sendflag"] = "true"
			
 
				-
			
 
				-        dd = root.xpath('//div[@class="mian_list_03"]/@index').extract_first()
			
 
				-
			
 
				-        cookies = response.cookies.get_dict()
			
 
				-        headers2 = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Content-Length": "0",
			
 
				-            "Origin": "https://bulletin.cebpubservice.com",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": new_url,
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            "X-Requested-With": "XMLHttpRequest",
			
 
				-        }
			
 
				-
			
 
				-        url2 = "https://bulletin.cebpubservice.com/details/permission/getSecretKey"
			
 
				-        params = {
			
 
				-            "time__2652": self.get_time__2652()
			
 
				-        }
			
 
				-        res = requests.post(url2, headers=headers2, cookies=cookies, params=params,
			
 
				-                            timeout=30, proxies=self.proxy, verify=False)
			
 
				-
			
 
				-        ex_js = '''
			
 
				-            CryptoJS = require("crypto-js")
			
 
				-
			
 
				-            function decryptByDES(ciphertext, key) {
			
 
				-                    var keyHex = CryptoJS.enc.Utf8.parse("Ctpsp@884*");
			
 
				-                    var decrypted = CryptoJS.DES.decrypt({
			
 
				-                        ciphertext: CryptoJS.enc.Base64.parse(ciphertext)
			
 
				-                    }, keyHex, {
			
 
				-                        mode: CryptoJS.mode.ECB,
			
 
				-                        padding: CryptoJS.pad.Pkcs7
			
 
				-                    });
			
 
				-                    return decrypted.toString(CryptoJS.enc.Utf8);
			
 
				-            }
			
 
				-            '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        pm = ctx.call('decryptByDES', res.text.replace('"', ''))
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-        ffid = json.loads(pm).get('data')
			
 
				-        f_org = f"/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-
			
 
				-        for i in range(5):
			
 
				-            file_url = f"https://bulletin.cebpubservice.com/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-            headers = {
			
 
				-                "Accept": "*/*",
			
 
				-                "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-                "Cache-Control": "no-cache",
			
 
				-                "Connection": "keep-alive",
			
 
				-                "Pragma": "no-cache",
			
 
				-                "Referer": "https://bulletin.cebpubservice.com/resource/ceb/js/pdfjs-dist/web/viewer.html",
			
 
				-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            }
			
 
				-            f_tm = self.get_time__2652(f_org)
			
 
				-            params = {
			
 
				-                "time__2652": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=file_url,
			
 
				-                proxies=self.proxy,headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            self.proxy = get_proxy(socks5h=True)
			
 
				-            if i == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-    def decrypt_by_des(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text.replace('"', ''))
			
 
				-        data_org = eval(data_org.replace('true', '1').replace('false', '1').replace('null', '1'))
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_url(self, parse_url):
			
 
				-        uid = "".join(re.findall('uuid=(.*?)&', parse_url))
			
 
				-        headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/bulletin/{uid}/uid/0"
			
 
				-
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 5:
			
 
				-            params = {
			
 
				-                "type__1017": self.get_type_1017(uid)
			
 
				-            }
			
 
				-            try:
			
 
				-                res = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30)
			
 
				-                data_org = self.decrypt_by_des(res.text.replace('"', ""))
			
 
				-                break
			
 
				-            except:
			
 
				-                pass
			
 
				-
			
 
				-        new_href = data_org.get('data').get('pdfUrl')
			
 
				-        pub_time = data_org.get('data').get('noticeSendTimeStr', '')
			
 
				-        pbtime = pub_time.replace('年', '-').replace('月', '-').replace('日','')
			
 
				-        if "bulletinPDF" not in new_href:
			
 
				-            new_href = data_org.get('data').get('noticeUrl')
			
 
				-        return new_href,pbtime
			
 
				-
			
 
				-    def new_parse(self,item, pdfurl):
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-
			
 
				-        headers = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": f"https://ctbpsp.com/web_pdf/pdfjs-dist/web/viewer.html?file={pdfurl}",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        for _ in range(5):
			
 
				-            f_tm = self.get_type_1017_f(pdfurl)
			
 
				-            params = {
			
 
				-                "type__1017": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=pdfurl,
			
 
				-                proxies=get_QGIP(),headers=headers,params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            if _ == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-
			
 
				-    def fetch_request(self, url):
			
 
				-        response = requests.get(url=url, headers=self.headers,
			
 
				-                                proxies=self.proxy, timeout=(30, 60), verify=False)
			
 
				-        return response
			
 
				-
			
 
				-    def deal_request(self, item):
			
 
				-        response = None
			
 
				-        retry_times = 0
			
 
				-        org_item = item.copy()
			
 
				-        while (retry_times := retry_times + 1) < 5:
			
 
				-            try:
			
 
				-                new_url,pub_time = self.get_url(item['parse_url'])
			
 
				-                if "bulletinPDF" in new_url:
			
 
				-                    try:
			
 
				-                        date_to_timestamp(item['publishtime'])
			
 
				-                    except:
			
 
				-                        item['publishtime'] = pub_time
			
 
				-                    self.new_parse(item=item,pdfurl=new_url)
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    response = self.fetch_request(new_url)
			
 
				-                    if response is not None and response.status_code == 200:
			
 
				-                        self.detail_get(response, item=item, new_url=new_url)
			
 
				-                        time.sleep(random.random())
			
 
				-                        return True
			
 
				-            except Exception as e:
			
 
				-                item = org_item
			
 
				-                logger.exception(f"{item['href']} 采集异常：{e}")
			
 
				-                time.sleep(random.randint(5,10))
			
 
				-                self.proxy = get_proxy(socks5h=True)
			
 
				-        logger.warning(f"[采集失败]{item['href']}")
			
 
				-        return False
			
 
				-
			
 
				-    def start(self, limit=1):
			
 
				-        logger.debug("********** 详情页采集开始 **********")
			
 
				-
			
 
				-        with self.db_name.find({"parser_name": "ztpc_qgzbgggsssyq", "failed": True, "is_crawl": False,
			
 
				-                                "retry":{"$lt":3}}, no_cursor_timeout=True).limit(limit) as cursor:
			
 
				-            data_lsit = [dd for dd in cursor]
			
 
				-        for item in data_lsit:
			
 
				-            # logger.debug(item)
			
 
				-            update_id = item["_id"]
			
 
				-            retry = item["retry"]
			
 
				-            if self.deal_request(item):
			
 
				-                self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
			
 
				-            else:
			
 
				-                retry += 1
			
 
				-                self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True, "retry": retry}})
			
 
				-
			
 
				-        logger.debug("********** 详情页采集结束 **********")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Details().start(limit=2000)
			
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_list.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_list.py
@@ -1,208 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-10-29
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 列表页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-from utils.tools import *
			
 
				-from utils.RedisDB import RedisFilter
			
 
				-import requests
			
 
				-import warnings
			
 
				-import ast
			
 
				-
			
 
				-warnings.filterwarnings('ignore')
			
 
				-
			
 
				-
			
 
				-class Spider:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.py_spider = Mongo_client().py_spider
			
 
				-        self.zb_list = self.py_spider.theme_list
			
 
				-        self.RDS = RedisFilter()
			
 
				-        self.real_cont = 0
			
 
				-        self.headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            # "Connection": "keep-alive",
			
 
				-            # "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-    def get_acw_sc_v2(self, html):
			
 
				-        try:
			
 
				-            arg1 = "".join(re.findall("arg1='(.*?)'", html))
			
 
				-            if arg1:
			
 
				-                js_script = '''
			
 
				-                    function getAcw_sc__v2(obt_arg1) {
			
 
				-                        String["prototype"]["hexXor"] = function (_0x4e08d8) {
			
 
				-                            var _0x5a5d3b = '';
			
 
				-                            for (var _0xe89588 = 0x0; _0xe89588 < this["length"] && _0xe89588 < _0x4e08d8["length"]; _0xe89588 += 2) {
			
 
				-                                var _0x401af1 = parseInt(this["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x105f59 = parseInt(_0x4e08d8["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x189e2c = (_0x401af1 ^ _0x105f59)["toString"](16);
			
 
				-                                if (_0x189e2c["length"] == 1) {
			
 
				-                                    _0x189e2c = '0' + _0x189e2c;
			
 
				-                                }
			
 
				-                                _0x5a5d3b += _0x189e2c;
			
 
				-                            }
			
 
				-                            return _0x5a5d3b;
			
 
				-                        };
			
 
				-                        String["prototype"]["unsbox"] = function () {
			
 
				-                            var _0x4b082b = [15, 35,29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36];
			
 
				-                            var _0x4da0dc = [];
			
 
				-                            var _0x12605e = '';
			
 
				-                            for (var _0x20a7bf = 0x0; _0x20a7bf < this["length"]; _0x20a7bf++) {
			
 
				-                                var _0x385ee3 = this[_0x20a7bf];
			
 
				-                                for (var _0x217721 = 0; _0x217721 < _0x4b082b["length"]; _0x217721++) {
			
 
				-                                    if (_0x4b082b[_0x217721] == _0x20a7bf + 1) {
			
 
				-                                        _0x4da0dc[_0x217721] = _0x385ee3;
			
 
				-                                    }
			
 
				-                                }
			
 
				-                            }
			
 
				-                            _0x12605e = _0x4da0dc["join"]('');
			
 
				-                            return _0x12605e;
			
 
				-                        };
			
 
				-
			
 
				-                        var _0x5e8b26 = "3000176000856006061501533003690027800375";
			
 
				-                        // var arg1 = "0A5F01F50F9BC66FB28038F18B99B7B10CFF4667"
			
 
				-                        var arg1 = obt_arg1
			
 
				-                        var _0x23a392 = arg1["unsbox"]();
			
 
				-                        arg2 = _0x23a392["hexXor"](_0x5e8b26);
			
 
				-                        return arg2
			
 
				-                    }
			
 
				-                '''
			
 
				-                ctx = execjs.compile(js_script)
			
 
				-                arg2 = ctx.call('getAcw_sc__v2', arg1)
			
 
				-                return {"acw_sc__v2": arg2}
			
 
				-            else:
			
 
				-                return {}
			
 
				-        except:
			
 
				-            return {}
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_lt', page)
			
 
				-
			
 
				-    def get_data(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text)
			
 
				-        return data_org
			
 
				-
			
 
				-    def fetch(self, page):
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/recommand/type/5/pagesize/10/currentpage/{page}"
			
 
				-        params = {
			
 
				-            "type__1017": self.get_type_1017(page)
			
 
				-        }
			
 
				-        response = requests.get(url, headers=self.headers, params=params, proxies=get_QGIP(), verify=False)
			
 
				-        text = response.content.decode().replace('"', "")
			
 
				-        ret = self.get_data(text)
			
 
				-        if not ret:
			
 
				-            raise ValueError('数据内容为空.')
			
 
				-        return ret
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def data_extract(data):
			
 
				-        data_info = data.replace('true', '1').replace('false', '1').replace('null', '1')
			
 
				-        iter_data = ast.literal_eval(data_info)
			
 
				-        if iter_data.get('data') == 1:
			
 
				-            # {'success': 1, 'data': 1, 'errorMessage': ''}
			
 
				-            raise TypeError('数据获取失败.')
			
 
				-
			
 
				-        data_list = iter_data['data'].get('dataList')
			
 
				-        page_size = iter_data['data']['pageSize']
			
 
				-        return data_list, page_size
			
 
				-
			
 
				-    def parse(self, data_info, page):
			
 
				-        info_list, _ = self.data_extract(data_info)
			
 
				-
			
 
				-        results_list = []
			
 
				-        for info in info_list:
			
 
				-            hid = info.get('bulletinID')
			
 
				-            did = info.get('dataSource')
			
 
				-            href = f"http://ctbpsp.com/#/bulletinDetail?uuid={hid}&inpvalue=&dataSource={did}&tenderAgency="
			
 
				-            title = info.get('noticeName').strip()
			
 
				-            create_time = info.get('noticeSendTime')
			
 
				-            reginProvince = info.get('reginProvince').replace("省", "").replace("市", "")
			
 
				-
			
 
				-            dedup = [title + href]
			
 
				-            if not self.RDS.data_filter(dedup):
			
 
				-                item = {
			
 
				-                    "site": "中国招标投标公共服务平台",
			
 
				-                    "channel": "全国招标公告公示搜索引擎",
			
 
				-                    "spidercode": "a_qgzbgggsssyq_qbgg",
			
 
				-                    "area": reginProvince,
			
 
				-                    "city": "",
			
 
				-                    "district": "",
			
 
				-                    "href": href,
			
 
				-                    "title": title,
			
 
				-                    "publishtime": create_time,
			
 
				-                    "parse_url": href,
			
 
				-                    "parser_name": "ztpc_qgzbgggsssyq",
			
 
				-                    "is_mixed": False,
			
 
				-                    "is_theme": True,
			
 
				-                    "retry": 0,
			
 
				-                    "comeintime": int2long(time.time()),
			
 
				-                    "is_crawl": False,
			
 
				-                    "failed": False,
			
 
				-                    "iscompete": True,
			
 
				-                    "sendflag": "false",
			
 
				-                    "T": "bidding",
			
 
				-                    "infoformat": 1,
			
 
				-                    "type": "",
			
 
				-                    "publishdept": "",
			
 
				-                    "_d": "comeintime",
			
 
				-                }
			
 
				-
			
 
				-                self.zb_list.insert_one(item)
			
 
				-                self.RDS.data_save_redis(dedup)
			
 
				-                results_list.append(item)
			
 
				-
			
 
				-        logger.info(f' *** 第{page}页采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
			
 
				-        self.real_cont += len(results_list)
			
 
				-        return results_list
			
 
				-
			
 
				-    def crawl(self, page):
			
 
				-        retey = 0
			
 
				-        while (retey := retey + 1) < 5:
			
 
				-            try:
			
 
				-                data_info = self.fetch(page=page)
			
 
				-                self.parse(data_info=data_info, page=page)
			
 
				-                time.sleep(random.random())
			
 
				-                return
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"第{page}页 采集异常：{e}")
			
 
				-                time.sleep(3)
			
 
				-
			
 
				-    def start(self, crawl_page):
			
 
				-        logger.debug("********** 列表页开始 **********")
			
 
				-        for page in range(1, crawl_page + 1):
			
 
				-            self.crawl(page=page)
			
 
				-            logger.info(f"当前已采集 {self.real_cont} 条数据")
			
 
				-        logger.debug("********** 列表页结束 **********")
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    Spider().start(100)
			
--- a/lzz_theme/qgzbgggsssyq/py_ssyq_list_bu.py
+++ b/lzz_theme/qgzbgggsssyq/py_ssyq_list_bu.py
@@ -1,208 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-08-06
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 列表页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-from utils.tools import *
			
 
				-from utils.RedisDB import RedisFilter
			
 
				-import requests
			
 
				-import warnings
			
 
				-import ast
			
 
				-
			
 
				-warnings.filterwarnings('ignore')
			
 
				-
			
 
				-
			
 
				-class Spider:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.py_spider = Mongo_client().py_spider
			
 
				-        self.zb_list = self.py_spider.theme_list
			
 
				-        self.RDS = RedisFilter()
			
 
				-        self.real_cont = 0
			
 
				-        self.headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            # "Connection": "keep-alive",
			
 
				-            # "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-    def get_acw_sc_v2(self, html):
			
 
				-        try:
			
 
				-            arg1 = "".join(re.findall("arg1='(.*?)'", html))
			
 
				-            if arg1:
			
 
				-                js_script = '''
			
 
				-                    function getAcw_sc__v2(obt_arg1) {
			
 
				-                        String["prototype"]["hexXor"] = function (_0x4e08d8) {
			
 
				-                            var _0x5a5d3b = '';
			
 
				-                            for (var _0xe89588 = 0x0; _0xe89588 < this["length"] && _0xe89588 < _0x4e08d8["length"]; _0xe89588 += 2) {
			
 
				-                                var _0x401af1 = parseInt(this["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x105f59 = parseInt(_0x4e08d8["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x189e2c = (_0x401af1 ^ _0x105f59)["toString"](16);
			
 
				-                                if (_0x189e2c["length"] == 1) {
			
 
				-                                    _0x189e2c = '0' + _0x189e2c;
			
 
				-                                }
			
 
				-                                _0x5a5d3b += _0x189e2c;
			
 
				-                            }
			
 
				-                            return _0x5a5d3b;
			
 
				-                        };
			
 
				-                        String["prototype"]["unsbox"] = function () {
			
 
				-                            var _0x4b082b = [15, 35,29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36];
			
 
				-                            var _0x4da0dc = [];
			
 
				-                            var _0x12605e = '';
			
 
				-                            for (var _0x20a7bf = 0x0; _0x20a7bf < this["length"]; _0x20a7bf++) {
			
 
				-                                var _0x385ee3 = this[_0x20a7bf];
			
 
				-                                for (var _0x217721 = 0; _0x217721 < _0x4b082b["length"]; _0x217721++) {
			
 
				-                                    if (_0x4b082b[_0x217721] == _0x20a7bf + 1) {
			
 
				-                                        _0x4da0dc[_0x217721] = _0x385ee3;
			
 
				-                                    }
			
 
				-                                }
			
 
				-                            }
			
 
				-                            _0x12605e = _0x4da0dc["join"]('');
			
 
				-                            return _0x12605e;
			
 
				-                        };
			
 
				-
			
 
				-                        var _0x5e8b26 = "3000176000856006061501533003690027800375";
			
 
				-                        // var arg1 = "0A5F01F50F9BC66FB28038F18B99B7B10CFF4667"
			
 
				-                        var arg1 = obt_arg1
			
 
				-                        var _0x23a392 = arg1["unsbox"]();
			
 
				-                        arg2 = _0x23a392["hexXor"](_0x5e8b26);
			
 
				-                        return arg2
			
 
				-                    }
			
 
				-                '''
			
 
				-                ctx = execjs.compile(js_script)
			
 
				-                arg2 = ctx.call('getAcw_sc__v2', arg1)
			
 
				-                return {"acw_sc__v2": arg2}
			
 
				-            else:
			
 
				-                return {}
			
 
				-        except:
			
 
				-            return {}
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_lt', page)
			
 
				-
			
 
				-    def get_data(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text)
			
 
				-        return data_org
			
 
				-
			
 
				-    def fetch(self, page):
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/recommand/type/5/pagesize/10/currentpage/{page}"
			
 
				-        params = {
			
 
				-            "type__1017": self.get_type_1017(page)
			
 
				-        }
			
 
				-        response = requests.get(url, headers=self.headers, params=params, proxies=get_QGIP(), verify=False)
			
 
				-        text = response.content.decode().replace('"', "")
			
 
				-        ret = self.get_data(text)
			
 
				-        if not ret:
			
 
				-            raise ValueError('数据内容为空.')
			
 
				-        return ret
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def data_extract(data):
			
 
				-        data_info = data.replace('true', '1').replace('false', '1').replace('null', '1')
			
 
				-        iter_data = ast.literal_eval(data_info)
			
 
				-        if iter_data.get('data') == 1:
			
 
				-            # {'success': 1, 'data': 1, 'errorMessage': ''}
			
 
				-            raise TypeError('数据获取失败.')
			
 
				-
			
 
				-        data_list = iter_data['data'].get('dataList')
			
 
				-        page_size = iter_data['data']['pageSize']
			
 
				-        return data_list, page_size
			
 
				-
			
 
				-    def parse(self, data_info, page):
			
 
				-        info_list, _ = self.data_extract(data_info)
			
 
				-
			
 
				-        results_list = []
			
 
				-        for info in info_list:
			
 
				-            hid = info.get('bulletinID')
			
 
				-            did = info.get('dataSource')
			
 
				-            href = f"http://ctbpsp.com/#/bulletinDetail?uuid={hid}&inpvalue=&dataSource={did}&tenderAgency="
			
 
				-            title = info.get('noticeName').strip()
			
 
				-            create_time = info.get('noticeSendTime')
			
 
				-            reginProvince = info.get('reginProvince').replace("省", "").replace("市", "")
			
 
				-
			
 
				-            dedup = [title + href]
			
 
				-            if not self.RDS.data_filter(dedup):
			
 
				-                item = {
			
 
				-                    "site": "中国招标投标公共服务平台",
			
 
				-                    "channel": "全国招标公告公示搜索引擎",
			
 
				-                    "spidercode": "a_qgzbgggsssyq_qbgg",
			
 
				-                    "area": reginProvince,
			
 
				-                    "city": "",
			
 
				-                    "district": "",
			
 
				-                    "href": href,
			
 
				-                    "title": title,
			
 
				-                    "publishtime": create_time,
			
 
				-                    "parse_url": href,
			
 
				-                    "parser_name": "ztpc_qgzbgggsssyq",
			
 
				-                    "is_mixed": False,
			
 
				-                    "is_theme": True,
			
 
				-                    "retry": 0,
			
 
				-                    "comeintime": int2long(int(time.time())),
			
 
				-                    "is_crawl": False,
			
 
				-                    "failed": False,
			
 
				-                    "iscompete": True,
			
 
				-                    "sendflag": "false",
			
 
				-                    "T": "bidding",
			
 
				-                    "infoformat": 1,
			
 
				-                    "type": "",
			
 
				-                    "publishdept": "",
			
 
				-                    "_d": "comeintime",
			
 
				-                }
			
 
				-
			
 
				-                self.zb_list.insert_one(item)
			
 
				-                self.RDS.data_save_redis(dedup)
			
 
				-                results_list.append(item)
			
 
				-
			
 
				-        logger.info(f' *** 第{page}页采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
			
 
				-        self.real_cont += len(results_list)
			
 
				-        return results_list
			
 
				-
			
 
				-    def crawl(self, page):
			
 
				-        retey = 0
			
 
				-        while (retey := retey + 1) < 5:
			
 
				-            try:
			
 
				-                data_info = self.fetch(page=page)
			
 
				-                self.parse(data_info=data_info, page=page)
			
 
				-                time.sleep(random.random())
			
 
				-                return
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"第{page}页 采集异常：{e}")
			
 
				-                time.sleep(3)
			
 
				-
			
 
				-    def start(self, crawl_page):
			
 
				-        logger.debug("********** 列表页开始 **********")
			
 
				-        for page in range(1, crawl_page + 1):
			
 
				-            self.crawl(page=page)
			
 
				-            logger.info(f"当前已采集 {self.real_cont} 条数据")
			
 
				-        logger.debug("********** 列表页结束 **********")
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    Spider().start(1000)
			
--- a/lzz_theme/qgzbgggsssyq/sscrawl_details.py
+++ b/lzz_theme/qgzbgggsssyq/sscrawl_details.py
@@ -1,280 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-09-18
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 详情页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-import json
			
 
				-from utils.attachment import AttachmentDownloader
			
 
				-from parsel import Selector
			
 
				-from utils.tools import *
			
 
				-
			
 
				-
			
 
				-
			
 
				-class dt_Spider:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.proxy = get_proxy(socks5h=True)
			
 
				-        self.db_table = Mongo_client().py_spider
			
 
				-        self.zt_details = self.db_table.data_bak
			
 
				-        self.headers = {
			
 
				-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Upgrade-Insecure-Requests": "1",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    def get_time__2652(self, page=None, cid=None, rid=None):
			
 
				-        with open('./ssyq_pm.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-
			
 
				-        return ctx.call('tm', page, cid, rid)
			
 
				-
			
 
				-    def get_type_1017(self, page):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_dt', page)
			
 
				-
			
 
				-    def get_type_1017_f(self, href):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_file', href)
			
 
				-
			
 
				-    def detail_get(self, response, item, new_url):
			
 
				-        response.encoding = response.apparent_encoding
			
 
				-        root = Selector(text=response.text)
			
 
				-
			
 
				-        if "来源渠道：必联电子招标投标平台" in response.text:
			
 
				-            # pdf 带 必联 水印，不入保存服务
			
 
				-            item["sendflag"] = "true"
			
 
				-
			
 
				-        dd = root.xpath('//div[@class="mian_list_03"]/@index').extract_first()
			
 
				-
			
 
				-        cookies = response.cookies.get_dict()
			
 
				-        headers2 = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Content-Length": "0",
			
 
				-            "Origin": "https://bulletin.cebpubservice.com",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": new_url,
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            "X-Requested-With": "XMLHttpRequest",
			
 
				-        }
			
 
				-
			
 
				-        url2 = "https://bulletin.cebpubservice.com/details/permission/getSecretKey"
			
 
				-        params = {
			
 
				-            "time__2652": self.get_time__2652()
			
 
				-        }
			
 
				-        res = requests.post(url2, headers=headers2, cookies=cookies, params=params,
			
 
				-                            timeout=30, proxies=self.proxy, verify=False)
			
 
				-
			
 
				-        ex_js = '''
			
 
				-            CryptoJS = require("crypto-js")
			
 
				-
			
 
				-            function decryptByDES(ciphertext, key) {
			
 
				-                    var keyHex = CryptoJS.enc.Utf8.parse("Ctpsp@884*");
			
 
				-                    var decrypted = CryptoJS.DES.decrypt({
			
 
				-                        ciphertext: CryptoJS.enc.Base64.parse(ciphertext)
			
 
				-                    }, keyHex, {
			
 
				-                        mode: CryptoJS.mode.ECB,
			
 
				-                        padding: CryptoJS.pad.Pkcs7
			
 
				-                    });
			
 
				-                    return decrypted.toString(CryptoJS.enc.Utf8);
			
 
				-            }
			
 
				-            '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        pm = ctx.call('decryptByDES', res.text.replace('"', ''))
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-        ffid = json.loads(pm).get('data')
			
 
				-        f_org = f"/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-
			
 
				-        for i in range(5):
			
 
				-            file_url = f"https://bulletin.cebpubservice.com/details/bulletin/getBulletin/{ffid}/{dd}"
			
 
				-            headers = {
			
 
				-                "Accept": "*/*",
			
 
				-                "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-                "Cache-Control": "no-cache",
			
 
				-                "Connection": "keep-alive",
			
 
				-                "Pragma": "no-cache",
			
 
				-                "Referer": "https://bulletin.cebpubservice.com/resource/ceb/js/pdfjs-dist/web/viewer.html",
			
 
				-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-            }
			
 
				-            f_tm = self.get_time__2652(f_org)
			
 
				-            params = {
			
 
				-                "time__2652": f"{f_tm}"
			
 
				-            }
			
 
				-            attachment = AttachmentDownloader().fetch_attachment(
			
 
				-                file_name=item["title"], file_type="pdf", download_url=file_url,
			
 
				-                proxies=self.proxy, headers=headers, params=params, is_check=True)
			
 
				-            if attachment.get('size'):
			
 
				-                attachments[str(len(attachments) + 1)] = attachment
			
 
				-                break
			
 
				-            time.sleep(random.randint(3, 6))
			
 
				-            self.proxy = get_proxy(socks5h=True)
			
 
				-            if i == 4:
			
 
				-                raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-    def decrypt_by_des(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text.replace('"', ''))
			
 
				-        data_org = eval(data_org.replace('true', '1').replace('false', '1').replace('null', '1'))
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_url(self, parse_url):
			
 
				-        uid = "".join(re.findall('uuid=(.*?)&', parse_url))
			
 
				-        headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/bulletin/{uid}/uid/0"
			
 
				-
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 3:
			
 
				-            params = {
			
 
				-                "type__1017": self.get_type_1017(uid)
			
 
				-            }
			
 
				-            try:
			
 
				-                res = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30)
			
 
				-                data_org = self.decrypt_by_des(res.text.replace('"', ""))
			
 
				-                break
			
 
				-            except:
			
 
				-                pass
			
 
				-
			
 
				-        new_href = data_org.get('data').get('pdfUrl')
			
 
				-        pub_time = data_org.get('data').get('noticeSendTimeStr', '')
			
 
				-        pbtime = pub_time.replace('年', '-').replace('月', '-').replace('日', '')
			
 
				-        if "bulletinPDF" not in new_href:
			
 
				-            new_href = data_org.get('data').get('noticeUrl')
			
 
				-        return new_href, pbtime
			
 
				-
			
 
				-    def new_parse(self,item, pdfurl):
			
 
				-
			
 
				-        item["contenthtml"] = "详情请访问原网页！"
			
 
				-
			
 
				-        attachments = {}
			
 
				-
			
 
				-        headers = {
			
 
				-            "Accept": "*/*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": f"https://ctbpsp.com/web_pdf/pdfjs-dist/web/viewer.html?file={pdfurl}",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-        f_tm = self.get_type_1017_f(pdfurl)
			
 
				-        params = {
			
 
				-            "type__1017": f"{f_tm}"
			
 
				-        }
			
 
				-        attachment = AttachmentDownloader().fetch_attachment(
			
 
				-            file_name=item["title"], file_type="pdf", download_url=pdfurl,
			
 
				-            proxies=get_QGIP(),headers=headers,params=params, is_check=True)
			
 
				-        if attachment.get('size'):
			
 
				-            attachments[str(len(attachments) + 1)] = attachment
			
 
				-        else:
			
 
				-            raise FileNotFoundError("附件下载失败！")
			
 
				-
			
 
				-        if attachments:
			
 
				-            item['projectinfo'] = {"attachments": attachments}
			
 
				-
			
 
				-        item = format_fileds(item)
			
 
				-
			
 
				-        try:
			
 
				-            self.zt_details.insert_one(item)
			
 
				-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
			
 
				-        except DuplicateKeyError:
			
 
				-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
			
 
				-
			
 
				-
			
 
				-    def fetch_request(self, url):
			
 
				-        response = requests.get(url=url, headers=self.headers,
			
 
				-                                proxies=self.proxy, timeout=(30, 60), verify=False)
			
 
				-        return response
			
 
				-
			
 
				-    def deal_request(self, item):
			
 
				-        retry_times = 0
			
 
				-        while (retry_times := retry_times + 1) < 3:
			
 
				-            try:
			
 
				-                new_url,pub_time = self.get_url(item['href'])
			
 
				-                if "bulletinPDF" in new_url:
			
 
				-                    try:
			
 
				-                        date_to_timestamp(item['publishtime'])
			
 
				-                    except:
			
 
				-                        item['publishtime'] = pub_time
			
 
				-                    self.new_parse(item=item,pdfurl=new_url)
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    response = self.fetch_request(new_url)
			
 
				-                    if response is not None and response.status_code == 200:
			
 
				-                        self.detail_get(response, item=item, new_url=new_url)
			
 
				-                        time.sleep(random.random())
			
 
				-                        return True
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"{item['href']} 采集异常：{e}")
			
 
				-                time.sleep(random.randint(5,10))
			
 
				-                self.proxy = get_proxy(socks5h=True)
			
 
				-        logger.warning(f"[采集失败]{item['href']}")
			
 
				-        return False
			
 
				-
			
 
				-    def start(self, item: dict):
			
 
				-        logger.debug(f"********** {item['title']} 详情页采集开始 **********")
			
 
				-
			
 
				-        rst = self.deal_request(item)
			
 
				-
			
 
				-        logger.debug(f"********** {item['title']} 详情页采集结束 **********")
			
 
				-
			
 
				-        return rst
			
 
				-
			
 
				-# if __name__ == "__main__":
			
 
				-#     dt_Spider().start({})
			
--- a/lzz_theme/qgzbgggsssyq/sscrawl_list.py
+++ b/lzz_theme/qgzbgggsssyq/sscrawl_list.py
@@ -1,195 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-09-18
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 列表页
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-from utils.tools import *
			
 
				-import requests
			
 
				-import warnings
			
 
				-from urllib.parse import quote
			
 
				-from sscrawl_details import dt_Spider
			
 
				-
			
 
				-
			
 
				-warnings.filterwarnings('ignore')
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-class Spider:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-    def get_data(self, text: str):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        data_org = ctx.call('en_str', text)
			
 
				-        return data_org
			
 
				-
			
 
				-    def get_acw_sc_v2(self, html):
			
 
				-        try:
			
 
				-            arg1 = "".join(re.findall("arg1='(.*?)'", html))
			
 
				-            if arg1:
			
 
				-                js_script = '''
			
 
				-                    function getAcw_sc__v2(obt_arg1) {
			
 
				-                        String["prototype"]["hexXor"] = function (_0x4e08d8) {
			
 
				-                            var _0x5a5d3b = '';
			
 
				-                            for (var _0xe89588 = 0x0; _0xe89588 < this["length"] && _0xe89588 < _0x4e08d8["length"]; _0xe89588 += 2) {
			
 
				-                                var _0x401af1 = parseInt(this["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x105f59 = parseInt(_0x4e08d8["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x189e2c = (_0x401af1 ^ _0x105f59)["toString"](16);
			
 
				-                                if (_0x189e2c["length"] == 1) {
			
 
				-                                    _0x189e2c = '0' + _0x189e2c;
			
 
				-                                }
			
 
				-                                _0x5a5d3b += _0x189e2c;
			
 
				-                            }
			
 
				-                            return _0x5a5d3b;
			
 
				-                        };
			
 
				-                        String["prototype"]["unsbox"] = function () {
			
 
				-                            var _0x4b082b = [15, 35,29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36];
			
 
				-                            var _0x4da0dc = [];
			
 
				-                            var _0x12605e = '';
			
 
				-                            for (var _0x20a7bf = 0x0; _0x20a7bf < this["length"]; _0x20a7bf++) {
			
 
				-                                var _0x385ee3 = this[_0x20a7bf];
			
 
				-                                for (var _0x217721 = 0; _0x217721 < _0x4b082b["length"]; _0x217721++) {
			
 
				-                                    if (_0x4b082b[_0x217721] == _0x20a7bf + 1) {
			
 
				-                                        _0x4da0dc[_0x217721] = _0x385ee3;
			
 
				-                                    }
			
 
				-                                }
			
 
				-                            }
			
 
				-                            _0x12605e = _0x4da0dc["join"]('');
			
 
				-                            return _0x12605e;
			
 
				-                        };
			
 
				-
			
 
				-                        var _0x5e8b26 = "3000176000856006061501533003690027800375";
			
 
				-                        // var arg1 = "0A5F01F50F9BC66FB28038F18B99B7B10CFF4667"
			
 
				-                        var arg1 = obt_arg1
			
 
				-                        var _0x23a392 = arg1["unsbox"]();
			
 
				-                        arg2 = _0x23a392["hexXor"](_0x5e8b26);
			
 
				-                        return arg2
			
 
				-                    }
			
 
				-                '''
			
 
				-                ctx = execjs.compile(js_script)
			
 
				-                arg2 = ctx.call('getAcw_sc__v2', arg1)
			
 
				-                return {"acw_sc__v2": arg2}
			
 
				-            else:
			
 
				-                return {}
			
 
				-        except:
			
 
				-            return {}
			
 
				-
			
 
				-    def get_type_1017(self,typm):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_ss',typm)
			
 
				-
			
 
				-    def fetch(self, keyword):
			
 
				-        # url = "https://ctbpsp.com/cutominfoapi/searchkeyword"
			
 
				-        # params = {
			
 
				-        #     "keyword": keyword,
			
 
				-        #     "uid": "0",
			
 
				-        #     "PageSize": "10",
			
 
				-        #     "CurrentPage": "1",
			
 
				-        #     "searchType": "0",
			
 
				-        #     "bulletinType": "5",
			
 
				-        #     "type__1017": self.get_type_1017(quote(keyword))
			
 
				-        # }
			
 
				-        furl = f"https://ctbpsp.com/cutominfoapi/searchkeyword?keyword={keyword}&uid=0&PageSize=10&CurrentPage=1&searchType=0&bulletinType=5&type__1017={self.get_type_1017(quote(keyword, safe='/'))}"
			
 
				-
			
 
				-        response = requests.get(furl, headers=self.headers, proxies=get_QGIP(), verify=False)
			
 
				-        data_info = self.get_data(response.text.replace('"', ""))
			
 
				-        if "error while performing request" in data_info:
			
 
				-            raise ValueError("错误请求！")
			
 
				-
			
 
				-        return data_info
			
 
				-
			
 
				-    def parse(self, data_info):
			
 
				-        data_info = data_info.replace('true', '1').replace('false', '1').replace('null', '1')
			
 
				-        info_list = eval(data_info).get('data').get('dataList')
			
 
				-        for info in info_list:
			
 
				-            hid = info.get('bulletinID')
			
 
				-            did = info.get('dataSource')
			
 
				-            href = f"http://ctbpsp.com/#/bulletinDetail?uuid={hid}&inpvalue=&dataSource={did}&tenderAgency="
			
 
				-            title = info.get('noticeName').replace('<em>','').replace('</em>','').strip()
			
 
				-            create_time = info.get('noticeSendTime')
			
 
				-            reginProvince = info.get('reginProvince').replace("省", "").replace("市", "")
			
 
				-
			
 
				-            item = {
			
 
				-                "site": "中国招标投标公共服务平台",
			
 
				-                "channel": "全国招标公告公示搜索引擎",
			
 
				-                "spidercode": "a_qgzbgggsssyq_qbgg",
			
 
				-                "area": reginProvince,
			
 
				-                "city": "",
			
 
				-                "district": "",
			
 
				-                "href": href,
			
 
				-                "title": title,
			
 
				-                "publishtime": create_time,
			
 
				-                "parser_name": "ztpc_qgzbgggsssyq",
			
 
				-                "is_mixed": False,
			
 
				-                "comeintime": int2long(int(time.time())),
			
 
				-            }
			
 
				-
			
 
				-            return item
			
 
				-        else:
			
 
				-            return []
			
 
				-
			
 
				-    def crawl(self, keyword):
			
 
				-        retry = 0
			
 
				-        while (retry := retry + 1) < 5:
			
 
				-            try:
			
 
				-                data_info = self.fetch(keyword=keyword)
			
 
				-                list_item = self.parse(data_info=data_info)
			
 
				-                time.sleep(random.random())
			
 
				-                if not list_item:
			
 
				-                    logger.warning(f"{keyword} 补录失败！")
			
 
				-                    return None
			
 
				-                rr = dt_Spider().start(list_item)
			
 
				-                if rr:
			
 
				-                    logger.info(f" {keyword} 已补录")
			
 
				-                    return list_item
			
 
				-                else:
			
 
				-                    logger.warning(f"{keyword} 补录失败！")
			
 
				-                    return None
			
 
				-
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"{keyword} 采集异常：{e}")
			
 
				-                time.sleep(3)
			
 
				-
			
 
				-    def start(self, keyword: str):
			
 
				-        # logger.debug("********** 补录开始 **********")
			
 
				-
			
 
				-        list_item = self.crawl(keyword)
			
 
				-
			
 
				-        # logger.debug("********** 补录结束 **********")
			
 
				-        return list_item
			
 
				-
			
 
				-
			
 
				-# if __name__ == '__main__':
			
 
				-#     Spider().start('局电务公司海城站6502继电联锁改造工程断路器询价采购')
			
--- a/lzz_theme/qgzbgggsssyq/ssyq.js
+++ b/lzz_theme/qgzbgggsssyq/ssyq.js
@@ -1,310 +0,0 @@
 
				-DJlBm = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-nwYaV = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-EGMkl = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-EVrnI = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-tYNKY = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-TbTmg = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-MGEpz = function (l1, l2) {
			
 
				-    return l1 | l2;
			
 
				-}
			
 
				-fQAyr = function (l1, l2) {
			
 
				-    return l1 << l2;
			
 
				-}
			
 
				-OQJcB = function (l1, l2) {
			
 
				-    return l1 & l2;
			
 
				-}
			
 
				-hDFsv = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-DqGCc = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-rIHaw = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-ZqRgc = function (l1, l2) {
			
 
				-    return l1 | l2;
			
 
				-}
			
 
				-QyFsL = function (l1, l2) {
			
 
				-    return l1 << l2;
			
 
				-}
			
 
				-vIlmM = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-yFDAW = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-lJnvD = function (l1, l2) {
			
 
				-    return l1 | l2;
			
 
				-}
			
 
				-hPpla = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-LvJaY = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-UPtKX = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-qoTvE = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-ggJMq = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-shWCy = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-IMCME = function (l1, l2) {
			
 
				-    return l1 | l2;
			
 
				-}
			
 
				-VYdec = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-XZzQS = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-vMKQx = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-SPVsD = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-cRdDu = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-avGeL = function (l1, l2) {
			
 
				-    return l1 | l2;
			
 
				-}
			
 
				-UjRAq = function (l1, l2) {
			
 
				-    return l1 << l2;
			
 
				-}
			
 
				-jmDqy = function (l1, l2) {
			
 
				-    return l1 & l2;
			
 
				-}
			
 
				-FaATM = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-jBEMc = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-HPcNa = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-vwnaY = function (l1, l2) {
			
 
				-    return l1 | l2;
			
 
				-}
			
 
				-tenBS = function (l1, l2) {
			
 
				-    return l1 << l2;
			
 
				-}
			
 
				-KHkFx = function (l1, l2) {
			
 
				-    return l1 & l2;
			
 
				-}
			
 
				-OZwQ = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-XZzQS = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-xzUJc = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-CYlhh = function (l1, l2) {
			
 
				-    return l1 == l2;
			
 
				-}
			
 
				-BePag = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-shWCy = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-zhKex = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-dnoNS = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-pWyDJ = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-nwYaV = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-dDYin = function (l1, l2) {
			
 
				-    return l1(l2);
			
 
				-}
			
 
				-pGrZJ = function (lO, lX) {
			
 
				-    return dDYin(lO, lX);
			
 
				-}
			
 
				-EzIKl = function (l1, l2) {
			
 
				-    return l1 < l2;
			
 
				-}
			
 
				-QrNFe = function (lO, lX) {
			
 
				-    return EzIKl(lO, lX);
			
 
				-}
			
 
				-iYbmv = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-YCNgr = function (lO, lX) {
			
 
				-    return iYbmv(lO, lX);
			
 
				-}
			
 
				-nwYaV = function (l1, l2) {
			
 
				-    return l1 + l2;
			
 
				-}
			
 
				-lHHHe = function (lO, lX) {
			
 
				-    return nwYaV(lO, lX);
			
 
				-}
			
 
				-imaAM = function (l1, l2) {
			
 
				-    return l1 - l2;
			
 
				-}
			
 
				-IFaDs = function (lO, lX) {
			
 
				-    return imaAM(lO, lX);
			
 
				-}
			
 
				-bXDKh = function (l1, l2) {
			
 
				-    return l1 << l2;
			
 
				-}
			
 
				-fhOjT = function (lO, lX) {
			
 
				-    return bXDKh(lO, lX);
			
 
				-}
			
 
				-
			
 
				-sig = function (lO) {
			
 
				-    for (var lX = 0xe59 + -0x2602 + 0x17a9, lW = pGrZJ(encodeURIComponent, lO), le = 0x4d * -0x5d + 0x2ab + -0x29 * -0x9e; QrNFe(le, lW['length']); le++)
			
 
				-        lX = YCNgr(lHHHe(IFaDs(fhOjT(lX, -0x1cdc + 0x13dd + -0x2a * -0x37), lX), 0xdf2 + -0x26bd + 0x1a59), lW['charCodeAt'](le)),
			
 
				-            lX |= 0x3 * 0x4a3 + 0x130d + 0x1 * -0x20f6;
			
 
				-    return lX;
			
 
				-}
			
 
				-
			
 
				-function uu(lO, lX, lW) {
			
 
				-
			
 
				-    for (var le, lm, lq, li, lA = {}, lS = {}, lI = '', lj = 0x2b * 0x1 + 0x22d * 0x8 + -0x5db * 0x3, ld = 0x19bc + 0x1 * -0x25e7 + 0xc2e, lf = -0x1ba5 + -0xb * -0xfc + -0x3b * -0x49, lF = [], lE = 0x15cd * 0x1 + 0x2b3 * -0xc + 0x1 * 0xa97, lJ = -0x1ccc + -0x1 * -0x147 + 0x1 * 0x1b85, lG = -0x1 * 0x1d81 + -0x1 * 0x270a + 0x448b; DJlBm(lG, lO['length']); lG += -0x1852 + -0x495 + -0x19 * -0x128)
			
 
				-        if (lq = lO.charAt(lG),
			
 
				-        Object['prototype']['hasOwnProp' + 'erty']['call'](lA, lq) || (lA[lq] = ld++,
			
 
				-            lS[lq] = !(0x185 + 0x1 * -0x14ef + 0xe * 0x163)),
			
 
				-            li = nwYaV(lI, lq),
			
 
				-            Object['prototype']['hasOwnProp' + 'erty']['call'](lA, li))
			
 
				-            lI = li;
			
 
				-        else {
			
 
				-            if (Object['prototype']['hasOwnProp' + 'erty']['call'](lS, lI)) {
			
 
				-                if (DJlBm(lI['charCodeAt'](0x4db + 0xb * 0x175 + 0x129 * -0x12), 0xd * 0x1bf + 0x1 * -0x15c + 0x29 * -0x7f)) {
			
 
				-                    for (le = -0x136c + 0x13c5 + -0x1 * 0x59; SPVsD(le, lf); le++)
			
 
				-                        lE <<= -0x17e2 + 0x1 * -0x1943 + 0x3126,
			
 
				-                            EGMkl(lJ, EVrnI(lX, -0x2 * 0x7b5 + -0x7 * -0x1d5 + 0xa6 * 0x4)) ? (lJ = 0x109c + -0x2571 + 0x14d5,
			
 
				-                                lF['push'](tYNKY(lW, lE)),
			
 
				-                                lE = -0x124d + -0x2bf * -0x1 + 0x16a * 0xb) : lJ++;
			
 
				-                    for (lm = lI['charCodeAt'](0x205f + 0x350 + -0x23af),
			
 
				-                             le = -0x1fd + 0xe8 * -0x19 + 0x18a5; TbTmg(le, 0x1a4d * -0x1 + 0x1 * -0x319 + 0x1d6e); le++)
			
 
				-                        lE = MGEpz(fQAyr(lE, -0x762 + 0x137d + 0x1 * -0xc1a), OQJcB(-0x10a8 + 0xe5 * -0x26 + 0x32a7, lm)),
			
 
				-                            hDFsv(lJ, EVrnI(lX, -0x1601 + 0x136b + 0x297)) ? (lJ = 0x24bd + 0x1c38 + -0x40f5,
			
 
				-                                lF['push'](DqGCc(lW, lE)),
			
 
				-                                lE = 0x1 * 0x1da9 + -0x14ca + -0x8df) : lJ++,
			
 
				-                            lm >>= -0x1 * -0x8b + -0x2079 * -0x1 + 0xb01 * -0x3;
			
 
				-                } else {
			
 
				-                    for (lm = 0x20ca + -0xa3 * 0x1 + -0x2026,
			
 
				-                             le = -0x9fc + 0x290 + -0x1 * -0x76c; rIHaw(le, lf); le++)
			
 
				-                        lE = ZqRgc(QyFsL(lE, -0x371 * 0xb + 0xfe5 + 0x1 * 0x15f7), lm),
			
 
				-                            vIlmM(lJ, ggJMq(lX, 0x13ab + 0x1ead + -0x3257)) ? (lJ = -0x11c3 * -0x1 + 0x149b + -0x265e,
			
 
				-                                lF['push'](shWCy(lW, lE)),
			
 
				-                                lE = 0x1a * 0x137 + 0x11 * -0x220 + -0xe * -0x53) : lJ++,
			
 
				-                            lm = -0xc59 + 0x16de + 0x1 * -0xa85;
			
 
				-                    for (lm = lI['charCodeAt'](0x85a + -0x8c1 + -0x67 * -0x1),
			
 
				-                             le = -0xafa + 0x9 * 0x3bd + 0x16ab * -0x1; DJlBm(le, -0x14f6 * 0x1 + -0x25f + 0x1765 * 0x1); le++)
			
 
				-                        lE = IMCME(QyFsL(lE, 0x1ea9 + -0x2 * -0xfe5 + 0x1f39 * -0x2), OQJcB(-0x10d0 + 0x922 * -0x2 + 0x2315, lm)),
			
 
				-                            VYdec(lJ, XZzQS(lX, -0x21e5 + -0x8fc + -0x1 * -0x2ae2)) ? (lJ = -0x127b + -0x1 * -0x1e2f + -0xbb4,
			
 
				-                                lF['push'](vMKQx(lW, lE)),
			
 
				-                                lE = -0x1d25 + 0x6ac + 0x1679) : lJ++,
			
 
				-                            lm >>= 0x1d14 * 0x1 + 0x1dea + -0x1 * 0x3afd;
			
 
				-                }
			
 
				-                qoTvE(0x1bc5 + 0x18ef * -0x1 + -0x2d6, --lj) && (lj = Math['pow'](-0x1 * 0x1cd5 + -0x9 * -0x290 + -0x1ed * -0x3, lf),
			
 
				-                    lf++),
			
 
				-                    delete lS[lI];
			
 
				-            } else {
			
 
				-                for (lm = lA[lI],
			
 
				-                         le = -0xff2 + -0x25c3 + 0x35b5; LvJaY(le, lf); le++)
			
 
				-                    lE = lJnvD(fQAyr(lE, 0x1 * 0x131c + 0x234d + -0x3668), OQJcB(0x14b6 + -0x1 * -0x189b + 0x8 * -0x5aa, lm)),
			
 
				-                        vIlmM(lJ, hPpla(lX, 0x113 * 0x1f + -0xc7c + -0x14d0)) ? (lJ = -0x2281 + -0xb9 * -0x5 + 0x1ee4,
			
 
				-                            lF['push'](UPtKX(lW, lE)),
			
 
				-                            lE = -0xd29 + 0x102f + -0x306) : lJ++,
			
 
				-                        lm >>= -0x201a + 0x849 + 0x2 * 0xbe9;
			
 
				-            }
			
 
				-            vIlmM(0x146f + -0x1645 + -0xeb * -0x2, --lj) && (lj = Math['pow'](-0x6bd * 0x5 + 0x555 + 0x1 * 0x1c5e, lf),
			
 
				-                lf++),
			
 
				-                lA[li] = ld++,
			
 
				-                lI = yFDAW(String, lq);
			
 
				-        }
			
 
				-    for (lm = lA[lI],
			
 
				-             le = 0xe6 + 0x2c * -0x15 + 0x15b * 0x2; cRdDu(le, lf); le++)
			
 
				-        lE = avGeL(UjRAq(lE, 0x1ac2 + 0x10a3 + 0x4 * -0xad9), jmDqy(0xff3 + 0xfe2 + -0x2a * 0xc2, lm)),
			
 
				-            FaATM(lJ, jBEMc(lX, -0xe41 + -0xcf1 + 0x1b33)) ? (lJ = -0x1ca * -0xb + -0xb52 + -0x85c,
			
 
				-                lF['push'](yFDAW(lW, lE)),
			
 
				-                lE = 0x168b + -0x1 * -0x1bb9 + -0x1922 * 0x2) : lJ++,
			
 
				-            lm >>= -0x1026 + 0x973 * -0x2 + 0x9 * 0x3e5;
			
 
				-
			
 
				-    HPcNa(0x2 * -0x19c + -0x25e4 + 0x291c, --lj) && (lj = Math['pow'](0x241d + 0x1f8a + -0x1 * 0x43a5, lf),
			
 
				-        lf++)
			
 
				-
			
 
				-    for (lm = -0x1b08 + 0x1 * 0x89b + 0xd * 0x16b,
			
 
				-             le = -0x25 * -0x71 + 0x2661 + 0x1b5b * -0x2; cRdDu(le, lf); le++)
			
 
				-        lE = vwnaY(tenBS(lE, 0x2297 + 0x3d0 + -0x2666), KHkFx(0xdce + -0x17fb + 0xa2e, lm)),
			
 
				-            OZwQ(lJ, XZzQS(lX, 0x11eb + -0x2142 + 0xf58 * 0x1)) ? (lJ = 0xd14 + -0x25ad + 0x3 * 0x833,
			
 
				-                lF['push'](xzUJc(lW, lE)),
			
 
				-                lE = 0x1 * 0x225b + -0x1693 + 0x68 * -0x1d) : lJ++,
			
 
				-            lm >>= 0x4 * 0x7f2 + 0x21e2 + -0x41a9;
			
 
				-    for (; ;) {
			
 
				-        if (lE <<= -0x20d * -0xa + -0x128c * 0x2 + 0x1097,
			
 
				-            CYlhh(lJ, BePag(lX, 0xb3e + 0x6 * -0x4f6 + 0x1287))) {
			
 
				-            lF['push'](shWCy(lW, lE));
			
 
				-            break;
			
 
				-        }
			
 
				-        lJ++;
			
 
				-    }
			
 
				-    return lF.join("")
			
 
				-}
			
 
				-
			
 
				-function type_1017(href) {
			
 
				-    lm = href
			
 
				-    lS = 0x1 * 0x2051 + -0x23a2 * 0x1 + 0x351
			
 
				-    lO = zhKex(dnoNS(dnoNS(pWyDJ(nwYaV(sig(lm), '|'), lS), '|'), new Date().getTime()), '|1')
			
 
				-    lX = 6
			
 
				-    lW = function (lA) {
			
 
				-        return "DGi0YA7BemWnQjCl4+bR3f8SKIF9tUz/xhr2oEOgPpac=61ZqwTudLkM5vHyNXsVJ".charAt(lA);
			
 
				-    }
			
 
				-    return uu(lO, lX, lW)
			
 
				-}
			
 
				-
			
 
				-function type_1017_lt(page) {
			
 
				-    const url = "https://ctbpsp.com/cutominfoapi/recommand/type/5/pagesize/10/currentpage/" + page
			
 
				-    return type_1017(url)
			
 
				-}
			
 
				-
			
 
				-function type_1017_fl(page, type) {
			
 
				-    const url = "https://ctbpsp.com/cutominfoapi/recommand/type/" + type + "/pagesize/10/currentpage/" + page
			
 
				-    return type_1017(url)
			
 
				-}
			
 
				-
			
 
				-function type_1017_ss(key) {
			
 
				-    const url = "https://ctbpsp.com/cutominfoapi/searchkeyword?keyword=" + key + "&uid=0&PageSize=10&CurrentPage=1&searchType=0&bulletinType=5"
			
 
				-    return type_1017(url)
			
 
				-}
			
 
				-
			
 
				-function type_1017_dt(hid) {
			
 
				-    const url = "https://ctbpsp.com/cutominfoapi/bulletin/" + hid + "/uid/0"
			
 
				-    return type_1017(url)
			
 
				-}
			
 
				-
			
 
				-function type_1017_file(hid) {
			
 
				-    return type_1017(hid)
			
 
				-}
			
 
				-
			
 
				-// console.log(type_1017(5))
			
--- a/lzz_theme/qgzbgggsssyq/ssyq_list.py
+++ b/lzz_theme/qgzbgggsssyq/ssyq_list.py
@@ -1,230 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-10-29
			
 
				----------
			
 
				-@summary: 全国招标公告公示搜索引擎 - 列表页 - [地区+行业+公告类型]
			
 
				----------
			
 
				-@author: Lzz
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.getcwd()))
			
 
				-from utils.RedisDB import RedisFilter
			
 
				-from utils.tools import *
			
 
				-from datetime import datetime
			
 
				-import warnings
			
 
				-import json
			
 
				-import ast
			
 
				-
			
 
				-warnings.filterwarnings('ignore')
			
 
				-
			
 
				-
			
 
				-class Spider:
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.py_spider = Mongo_client().py_spider
			
 
				-        self.zb_list = self.py_spider.theme_list
			
 
				-        self.RDS = RedisFilter()
			
 
				-        self.real_cont = 0
			
 
				-        self.paginate = True
			
 
				-        self.headers = {
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-            # "Connection": "keep-alive",
			
 
				-            # "Pragma": "no-cache",
			
 
				-            "Referer": "https://ctbpsp.com/",
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
			
 
				-        }
			
 
				-
			
 
				-    def get_acw_sc_v2(self, html):
			
 
				-        try:
			
 
				-            arg1 = "".join(re.findall("arg1='(.*?)'", html))
			
 
				-            if arg1:
			
 
				-                js_script = '''
			
 
				-                    function getAcw_sc__v2(obt_arg1) {
			
 
				-                        String["prototype"]["hexXor"] = function (_0x4e08d8) {
			
 
				-                            var _0x5a5d3b = '';
			
 
				-                            for (var _0xe89588 = 0x0; _0xe89588 < this["length"] && _0xe89588 < _0x4e08d8["length"]; _0xe89588 += 2) {
			
 
				-                                var _0x401af1 = parseInt(this["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x105f59 = parseInt(_0x4e08d8["slice"](_0xe89588, _0xe89588 + 2), 16);
			
 
				-                                var _0x189e2c = (_0x401af1 ^ _0x105f59)["toString"](16);
			
 
				-                                if (_0x189e2c["length"] == 1) {
			
 
				-                                    _0x189e2c = '0' + _0x189e2c;
			
 
				-                                }
			
 
				-                                _0x5a5d3b += _0x189e2c;
			
 
				-                            }
			
 
				-                            return _0x5a5d3b;
			
 
				-                        };
			
 
				-                        String["prototype"]["unsbox"] = function () {
			
 
				-                            var _0x4b082b = [15, 35,29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36];
			
 
				-                            var _0x4da0dc = [];
			
 
				-                            var _0x12605e = '';
			
 
				-                            for (var _0x20a7bf = 0x0; _0x20a7bf < this["length"]; _0x20a7bf++) {
			
 
				-                                var _0x385ee3 = this[_0x20a7bf];
			
 
				-                                for (var _0x217721 = 0; _0x217721 < _0x4b082b["length"]; _0x217721++) {
			
 
				-                                    if (_0x4b082b[_0x217721] == _0x20a7bf + 1) {
			
 
				-                                        _0x4da0dc[_0x217721] = _0x385ee3;
			
 
				-                                    }
			
 
				-                                }
			
 
				-                            }
			
 
				-                            _0x12605e = _0x4da0dc["join"]('');
			
 
				-                            return _0x12605e;
			
 
				-                        };
			
 
				-
			
 
				-                        var _0x5e8b26 = "3000176000856006061501533003690027800375";
			
 
				-                        // var arg1 = "0A5F01F50F9BC66FB28038F18B99B7B10CFF4667"
			
 
				-                        var arg1 = obt_arg1
			
 
				-                        var _0x23a392 = arg1["unsbox"]();
			
 
				-                        arg2 = _0x23a392["hexXor"](_0x5e8b26);
			
 
				-                        return arg2
			
 
				-                    }
			
 
				-                '''
			
 
				-                ctx = execjs.compile(js_script)
			
 
				-                arg2 = ctx.call('getAcw_sc__v2', arg1)
			
 
				-                return {"acw_sc__v2": arg2}
			
 
				-            else:
			
 
				-                return {}
			
 
				-        except:
			
 
				-            return {}
			
 
				-
			
 
				-    def get_type_1017(self, page, tp):
			
 
				-        with open('./ssyq.js', 'r') as fr:
			
 
				-            ex_js = fr.read()
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('type_1017_fl', page, tp)
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def decrypto_data(text):
			
 
				-        ex_js = '''
			
 
				-        CryptoJS = require("crypto-js")
			
 
				-        function en_str(t) {
			
 
				-            var e = CryptoJS.enc.Utf8.parse("1qaz@wsx3e")
			
 
				-              , i = CryptoJS.DES.decrypt({
			
 
				-                ciphertext: CryptoJS.enc.Base64.parse(t)
			
 
				-            }, e, {
			
 
				-                mode: CryptoJS.mode.ECB,
			
 
				-                padding: CryptoJS.pad.Pkcs7
			
 
				-            });
			
 
				-            return i.toString(CryptoJS.enc.Utf8)
			
 
				-        }
			
 
				-        '''
			
 
				-        ctx = execjs.compile(ex_js)
			
 
				-        return ctx.call('en_str', text)
			
 
				-
			
 
				-    def fetch(self, page, param):
			
 
				-        logger.info(f'{param}|页码|{page}|发起请求')
			
 
				-        url = f"https://ctbpsp.com/cutominfoapi/recommand/type/{param['type']}/pagesize/10/currentpage/{page}"
			
 
				-        params = {
			
 
				-            "type__1017": self.get_type_1017(page, param['type']),
			
 
				-            "province": param['province'],
			
 
				-            "industry": param['industry'],
			
 
				-        }
			
 
				-        response = requests.get(url, headers=self.headers, params=params, proxies=get_QGIP(), timeout=60, verify=False)
			
 
				-        text = response.content.decode().replace('"', '')
			
 
				-        ret = self.decrypto_data(text)
			
 
				-        if not ret:
			
 
				-            raise ValueError('请求结果数据为空！')
			
 
				-        return ret
			
 
				-
			
 
				-    def parse(self, data_info, page, param):
			
 
				-        now_ts = int(datetime.now().replace(hour=0, minute=0, second=0, microsecond=0).timestamp())
			
 
				-
			
 
				-        data_info = data_info.replace('true', '1').replace('false', '1').replace('null', '1')
			
 
				-        iter_data = ast.literal_eval(data_info)
			
 
				-        if iter_data.get('data') == 1:
			
 
				-            # {'success': 1, 'data': 1, 'errorMessage': ''}
			
 
				-            raise TypeError('解析数据失败！')
			
 
				-
			
 
				-        info_list = iter_data['data']['dataList']
			
 
				-        page_size = iter_data['data']['pageSize']
			
 
				-
			
 
				-        results_list = []
			
 
				-        for info in info_list:
			
 
				-            hid = info.get('bulletinID')
			
 
				-            did = info.get('dataSource')
			
 
				-            href = f"http://ctbpsp.com/#/bulletinDetail?uuid={hid}&inpvalue=&dataSource={did}&tenderAgency="
			
 
				-            title = info.get('noticeName').strip()
			
 
				-            create_time = info.get('noticeSendTime')
			
 
				-            reginProvince = info.get('reginProvince').replace("省", "").replace("市", "")
			
 
				-
			
 
				-            pb_time = int(datetime.strptime(create_time, "%Y-%m-%d").timestamp())
			
 
				-            if pb_time < now_ts:
			
 
				-                # logger.info(f'当前{page}页{param["province"]}-{param["industry"]}-{param["typeName"]}--发布时间小于当前时间')
			
 
				-                logger.info('当日暂无新数据')
			
 
				-                self.paginate = False
			
 
				-                return
			
 
				-
			
 
				-            dedup = [title + href]
			
 
				-            if not self.RDS.data_filter(dedup):
			
 
				-                item = {
			
 
				-                    "site": "中国招标投标公共服务平台",
			
 
				-                    "channel": "全国招标公告公示搜索引擎",
			
 
				-                    "spidercode": "a_qgzbgggsssyq_qbgg",
			
 
				-                    "area": reginProvince,
			
 
				-                    "city": "",
			
 
				-                    "district": "",
			
 
				-                    "href": href,
			
 
				-                    "title": title,
			
 
				-                    "publishtime": create_time,
			
 
				-                    "parse_url": href,
			
 
				-                    "parser_name": "ztpc_qgzbgggsssyq",
			
 
				-                    "is_mixed": False,
			
 
				-                    "is_theme": True,
			
 
				-                    "retry": 0,
			
 
				-                    "comeintime": int2long(time.time()),
			
 
				-                    "is_crawl": False,
			
 
				-                    "failed": False,
			
 
				-                    "iscompete": True,
			
 
				-                    "sendflag": "false",
			
 
				-                    "T": "bidding",
			
 
				-                    "infoformat": 1,
			
 
				-                    "type": "",
			
 
				-                    "publishdept": "",
			
 
				-                    "_d": "comeintime",
			
 
				-                }
			
 
				-                self.zb_list.insert_one(item)
			
 
				-                self.RDS.data_save_redis(dedup)
			
 
				-                results_list.append(item)
			
 
				-
			
 
				-        logger.info(f' *** 第{page}页{param["province"]}-{param["industry"]}-{param["typeName"]}采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
			
 
				-        self.real_cont += len(results_list)
			
 
				-        if page_size < 10:
			
 
				-            logger.info(f'单页数据量小于10|{page_size}')
			
 
				-            # logger.info(f'当前{page}页{param["province"]}-{param["industry"]}-{param["typeName"]}--每页条数小于10')
			
 
				-            self.paginate = False
			
 
				-
			
 
				-        return results_list
			
 
				-
			
 
				-    def crawl(self, page, param):
			
 
				-        retey = 0
			
 
				-        while (retey := retey + 1) < 10:
			
 
				-            try:
			
 
				-                data_info = self.fetch(page=page, param=param)
			
 
				-                self.parse(data_info=data_info, page=page, param=param)
			
 
				-                time.sleep(random.random())
			
 
				-                return
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"第{page}页|采集异常|{e}")
			
 
				-                time.sleep(random.randint(3, 7))
			
 
				-
			
 
				-    def start(self, crawl_page):
			
 
				-        logger.debug("********** 列表页开始 **********")
			
 
				-        with open("./param.json", "r", encoding="utf-8") as f:
			
 
				-            json_text = f.read()
			
 
				-
			
 
				-        for param in json.loads(json_text):
			
 
				-            self.paginate = True
			
 
				-            for page in range(1, crawl_page + 1):
			
 
				-                if not self.paginate:
			
 
				-                    break
			
 
				-
			
 
				-                self.crawl(page=page, param=param)
			
 
				-                logger.info(f"当前已采集 {self.real_cont} 条数据")
			
 
				-
			
 
				-        logger.debug("********** 列表页结束 **********")
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    Spider().start(1000)
			
--- a/lzz_theme/qgzbgggsssyq/ssyq_main.py
+++ b/lzz_theme/qgzbgggsssyq/ssyq_main.py
@@ -1,55 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-09-18
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: lzz
			
 
				-"""
			
 
				-import time
			
 
				-
			
 
				-from loguru import logger
			
 
				-from pymongo import MongoClient
			
 
				-
			
 
				-from spider_search import SearchSpider
			
 
				-
			
 
				-# mgo = MongoClient('127.0.0.1', port=27080, username="", password="")
			
 
				-mgo = MongoClient('172.17.4.87', port=27080, username="", password="")
			
 
				-theme_list = mgo['py_spider']['theme_list']
			
 
				-
			
 
				-# 创建爬虫实例
			
 
				-search = SearchSpider()
			
 
				-
			
 
				-
			
 
				-def start(limit):
			
 
				-    logger.debug(f"uuid失效数据，补采开始 {limit} 条")
			
 
				-
			
 
				-    query = {
			
 
				-        "site": "中国招标投标公共服务平台",
			
 
				-        "failed": True,
			
 
				-        "is_crawl": False,
			
 
				-        "retry": {"$gte": 4, "$lte": 10}
			
 
				-    }
			
 
				-    sort = [("publishtime", -1)]
			
 
				-    p = {"title": 1, "retry": 1, "_id": 1}
			
 
				-    with theme_list.find(query, projection=p, sort=sort, limit=limit) as cursor:
			
 
				-        task_items = [doc for doc in cursor]
			
 
				-
			
 
				-    for item in task_items:
			
 
				-        _id = item['_id']
			
 
				-        title = "".join(item['title'].split()).strip()
			
 
				-        result = search.spider(title)
			
 
				-        if result is True:
			
 
				-            theme_list.update_one({"_id": _id}, {"$set": {"is_crawl": True, "failed": False}})
			
 
				-        else:
			
 
				-            retry = item["retry"] + 1
			
 
				-            theme_list.update_one({"_id": _id}, {"$set": {"retry": retry}})
			
 
				-            logger.error(f"{title}|补采失败")
			
 
				-
			
 
				-        time.sleep(1)
			
 
				-
			
 
				-    logger.debug("uuid失效数据，补采完成！")
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    start(200)
			
--- a/lzz_theme/qgzbgggsssyq/ssyq_pm.js
+++ b/lzz_theme/qgzbgggsssyq/ssyq_pm.js
--- a/lzz_theme/utils/webdriver.py
+++ b/lzz_theme/utils/webdriver.py
@@ -1,461 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2024-01-19
			
 
				----------
			
 
				-@summary: 远程selenium服务
			
 
				----------
			
 
				-@author: dzr
			
 
				-"""
			
 
				-import os
			
 
				-import queue
			
 
				-import threading
			
 
				-
			
 
				-from selenium import webdriver
			
 
				-from selenium.webdriver.chrome.remote_connection import ChromeRemoteConnection
			
 
				-from selenium.webdriver.firefox.remote_connection import FirefoxRemoteConnection
			
 
				-from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
			
 
				-
			
 
				-
			
 
				-
			
 
				-# 浏览器渲染
			
 
				-WEBDRIVER = dict(
			
 
				-    pool_size=1,  # 浏览器的数量
			
 
				-    load_images=False,  # 是否加载图片
			
 
				-    user_agent=None,  # 字符串 或 无参函数，返回值为user_agent
			
 
				-    proxy=None,  # xxx.xxx.xx.xxx:xxxx 或 无参函数，返回值为代理地址
			
 
				-    headless=False,  # 是否为无头浏览器
			
 
				-    driver_type="FIREFOX",  # CHROME、FIREFOX
			
 
				-    timeout=30,  # 请求超时时间
			
 
				-    window_size=(1280, 800),  # 窗口大小
			
 
				-    executable_path=None,  # 浏览器路径，默认为默认路径
			
 
				-    render_time=0,  # 渲染时长，即打开网页等待指定时间后再获取源码
			
 
				-    custom_argument=["--ignore-certificate-errors"],  # 自定义浏览器渲染参数
			
 
				-    usages_local_driver=True,  # 是否加载本地驱动
			
 
				-    server_addr="http://192.168.3.182:8899/wd/hub",  # selenium 远程服务地址
			
 
				-    version="",  # 远程浏览器版本
			
 
				-    service_log_path=os.devnull  # 日志路径
			
 
				-)
			
 
				-
			
 
				-from loguru import logger
			
 
				-from utils.tools import Singleton
			
 
				-
			
 
				-DEFAULT_USERAGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"
			
 
				-
			
 
				-
			
 
				-class WebDriver(RemoteWebDriver):
			
 
				-    """浏览器采集 - selenium"""
			
 
				-    CHROME = "CHROME"
			
 
				-    FIREFOX = "FIREFOX"
			
 
				-
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        load_images=True,
			
 
				-        user_agent=None,
			
 
				-        proxy=None,
			
 
				-        driver_type=CHROME,
			
 
				-        timeout=20,
			
 
				-        headless=False,
			
 
				-        usages_local_driver=False,
			
 
				-        window_size=(1024, 800),
			
 
				-        server_addr=None,
			
 
				-        version=None,
			
 
				-        custom_argument=None,
			
 
				-        executable_path=None,
			
 
				-        service_log_path=None,
			
 
				-        **kwargs
			
 
				-    ):
			
 
				-        """
			
 
				-        webdirver 封装，支持 chrome 和 firefox
			
 
				-        Args:
			
 
				-            load_images: 是否加载图片
			
 
				-            user_agent: 字符串 或 无参函数，返回值为user_agent
			
 
				-            proxy: xxx.xxx.xxx.xxx:xxxx 或 无参函数，返回值为代理地址
			
 
				-            headless: 是否启用无头模式
			
 
				-            driver_type: CHROME 或 FIREFOX...
			
 
				-            timeout: 请求超时时间
			
 
				-            window_size: # 窗口大小
			
 
				-            executable_path: 浏览器路径，默认为默认路径
			
 
				-            server_addr: 远程服务地址
			
 
				-            usages_local_driver: 是否使用本地驱动
			
 
				-            service_log_path: selenium service 日志路径
			
 
				-            version: 浏览器版本
			
 
				-            **kwargs:
			
 
				-        """
			
 
				-        self._load_images = load_images or WEBDRIVER["load_images"]
			
 
				-        self._user_agent = user_agent or DEFAULT_USERAGENT
			
 
				-        self._proxy = proxy or WEBDRIVER["proxy"]
			
 
				-        self._headless = headless or WEBDRIVER["headless"]
			
 
				-        self._usages_local_driver = usages_local_driver or WEBDRIVER["_usages_local_driver"]
			
 
				-        self._timeout = timeout or WEBDRIVER["timeout"]
			
 
				-        self._window_size = window_size or WEBDRIVER["window_size"]
			
 
				-        self._executable_path = executable_path or WEBDRIVER["executable_path"]
			
 
				-        self._custom_argument = custom_argument or WEBDRIVER["custom_argument"]
			
 
				-        self._server_addr = server_addr or WEBDRIVER["server_addr"]
			
 
				-        self._version = version or WEBDRIVER["version"]
			
 
				-        self._service_log_path = service_log_path or WEBDRIVER["service_log_path"]
			
 
				-
			
 
				-        if driver_type == WebDriver.CHROME:
			
 
				-            self.driver = self.chrome_driver()
			
 
				-
			
 
				-        elif driver_type == WebDriver.FIREFOX:
			
 
				-            self.driver = self.firefox_driver()
			
 
				-
			
 
				-        else:
			
 
				-            raise TypeError(
			
 
				-                "dirver_type must be one of CHROME or FIREFOX, but received {}".format(
			
 
				-                    type(driver_type)
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        # driver.get(url)一直不返回，但也不报错的问题，这时程序会卡住，设置超时选项能解决这个问题。
			
 
				-        self.driver.set_page_load_timeout(self._timeout)
			
 
				-        # 设置10秒脚本超时时间
			
 
				-        self.driver.set_script_timeout(self._timeout)
			
 
				-
			
 
				-        self._is_remote = not self._usages_local_driver
			
 
				-
			
 
				-    def __enter__(self):
			
 
				-        return self
			
 
				-
			
 
				-    def __exit__(self, exc_type, exc_val, exc_tb):
			
 
				-        if exc_val:
			
 
				-            logger.error(exc_val)
			
 
				-
			
 
				-        self.quit()
			
 
				-        return False
			
 
				-
			
 
				-    def __getattr__(self, name):
			
 
				-        if self.driver:
			
 
				-            return getattr(self.driver, name)
			
 
				-        else:
			
 
				-            raise AttributeError
			
 
				-
			
 
				-    def get_driver(self):
			
 
				-        return self.driver
			
 
				-
			
 
				-    def local_firefox_driver(self):
			
 
				-        firefox_profile = webdriver.FirefoxProfile()
			
 
				-        firefox_options = webdriver.FirefoxOptions()
			
 
				-        firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
			
 
				-        firefox_profile.set_preference("dom.webdriver.enabled", False)
			
 
				-
			
 
				-        if self._proxy:
			
 
				-            proxy = self._proxy() if callable(self._proxy) else self._proxy
			
 
				-            proxy = proxy.replace("socks5://", "")
			
 
				-            # 使用socks5 代理
			
 
				-            ip, port = proxy.split(":")
			
 
				-            firefox_profile.set_preference('network.proxy.type', 1)  # 不使用代理：0, 使用代理：1
			
 
				-            firefox_profile.set_preference('network.proxy.socks', ip)
			
 
				-            firefox_profile.set_preference('network.proxy.socks_port', int(port))
			
 
				-
			
 
				-        if self._user_agent:
			
 
				-            firefox_profile.set_preference(
			
 
				-                "general.useragent.override",
			
 
				-                self._user_agent() if callable(
			
 
				-                    self._user_agent) else self._user_agent,
			
 
				-            )
			
 
				-
			
 
				-        if not self._load_images:
			
 
				-            firefox_profile.set_preference("permissions.default.image", 2)
			
 
				-
			
 
				-        if self._headless:
			
 
				-            firefox_options.add_argument("--headless")
			
 
				-            firefox_options.add_argument("--disable-gpu")
			
 
				-
			
 
				-        # 添加自定义的配置参数
			
 
				-        if self._custom_argument:
			
 
				-            for arg in self._custom_argument:
			
 
				-                firefox_options.add_argument(arg)
			
 
				-
			
 
				-        if self._executable_path:
			
 
				-            driver = webdriver.Firefox(
			
 
				-                capabilities=firefox_capabilities,
			
 
				-                options=firefox_options,
			
 
				-                firefox_profile=firefox_profile,
			
 
				-                executable_path=self._executable_path,
			
 
				-                service_log_path=self._service_log_path
			
 
				-            )
			
 
				-        else:
			
 
				-            driver = webdriver.Firefox(
			
 
				-                capabilities=firefox_capabilities,
			
 
				-                options=firefox_options,
			
 
				-                firefox_profile=firefox_profile,
			
 
				-                service_log_path=self._service_log_path
			
 
				-            )
			
 
				-
			
 
				-        if self._window_size:
			
 
				-            driver.set_window_size(*self._window_size)
			
 
				-
			
 
				-        return driver
			
 
				-
			
 
				-    def remote_firefox_driver(self):
			
 
				-        firefox_options = webdriver.FirefoxOptions()
			
 
				-        desired_capabilities = firefox_options.to_capabilities()
			
 
				-        firefox_options.set_preference("dom.webdriver.enabled", False)
			
 
				-
			
 
				-        if self._version:
			
 
				-            desired_capabilities['version'] = self._version
			
 
				-
			
 
				-        if self._proxy:
			
 
				-            proxy = self._proxy() if callable(self._proxy) else self._proxy
			
 
				-            proxy = proxy.replace("socks5://", "")
			
 
				-            # 使用socks5 代理
			
 
				-            ip, port = proxy.split(":")
			
 
				-            firefox_options.set_preference('network.proxy.type', 1)  # 不使用代理：0, 使用代理：1
			
 
				-            firefox_options.set_preference('network.proxy.socks', ip)
			
 
				-            firefox_options.set_preference('network.proxy.socks_port', int(port))
			
 
				-
			
 
				-        if self._user_agent:
			
 
				-            firefox_options.set_preference(
			
 
				-                "general.useragent.override",
			
 
				-                self._user_agent() if callable(self._user_agent) else self._user_agent,
			
 
				-            )
			
 
				-
			
 
				-        if not self._load_images:
			
 
				-            firefox_options.set_preference("permissions.default.image", 2)
			
 
				-
			
 
				-        if self._headless:
			
 
				-            firefox_options.add_argument("--headless")
			
 
				-            firefox_options.add_argument("--disable-gpu")
			
 
				-
			
 
				-        if self._custom_argument:
			
 
				-            for arg in self._custom_argument:
			
 
				-                firefox_options.add_argument(arg)
			
 
				-
			
 
				-        executor = FirefoxRemoteConnection(remote_server_addr=self._server_addr)
			
 
				-        browser = webdriver.Remote(
			
 
				-            command_executor=executor,
			
 
				-            desired_capabilities=desired_capabilities,
			
 
				-            options=firefox_options
			
 
				-        )
			
 
				-
			
 
				-        if self._window_size:
			
 
				-            browser.set_window_size(*self._window_size)
			
 
				-
			
 
				-        return browser
			
 
				-
			
 
				-    def firefox_driver(self):
			
 
				-        if self._usages_local_driver:
			
 
				-            return self.local_firefox_driver()
			
 
				-        return self.remote_firefox_driver()
			
 
				-
			
 
				-    def remote_chrome_driver(self):
			
 
				-        chrome_options = webdriver.ChromeOptions()
			
 
				-        desired_capabilities = chrome_options.to_capabilities()
			
 
				-        # 此步骤很重要，设置为开发者模式，防止被各大网站识别出来使用了Selenium
			
 
				-        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
			
 
				-        chrome_options.add_experimental_option("useAutomationExtension", False)
			
 
				-        chrome_options.add_argument('--disable-blink-features=AutomationControlled')
			
 
				-        # docker 里运行需要
			
 
				-        chrome_options.add_argument('--no-sandbox')
			
 
				-        chrome_options.add_argument('--disable-extensions')
			
 
				-        chrome_options.add_argument('--disable-dev-shm-usage')
			
 
				-
			
 
				-        if self._version:
			
 
				-            desired_capabilities['version'] = self._version
			
 
				-
			
 
				-        if self._proxy:
			
 
				-            chrome_options.add_argument(
			
 
				-                "--proxy-server={}".format(
			
 
				-                    self._proxy() if callable(self._proxy) else self._proxy
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        if self._user_agent:
			
 
				-            chrome_options.add_argument(
			
 
				-                "user-agent={}".format(
			
 
				-                    self._user_agent()
			
 
				-                    if callable(self._user_agent)
			
 
				-                    else self._user_agent
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        if not self._load_images:
			
 
				-            chrome_options.add_experimental_option(
			
 
				-                "prefs", {"profile.managed_default_content_settings.images": 2}
			
 
				-            )
			
 
				-
			
 
				-        if self._headless:
			
 
				-            chrome_options.add_argument("--headless")
			
 
				-            chrome_options.add_argument("--disable-gpu")
			
 
				-
			
 
				-        if self._window_size:
			
 
				-            chrome_options.add_argument(
			
 
				-                "--window-size={},{}".format(self._window_size[0], self._window_size[1])
			
 
				-            )
			
 
				-
			
 
				-        # 添加自定义的配置参数
			
 
				-        if self._custom_argument:
			
 
				-            for arg in self._custom_argument:
			
 
				-                chrome_options.add_argument(arg)
			
 
				-
			
 
				-        browser = webdriver.Remote(
			
 
				-            command_executor=ChromeRemoteConnection(
			
 
				-                remote_server_addr=self._server_addr,
			
 
				-                keep_alive=True),
			
 
				-            desired_capabilities=desired_capabilities,
			
 
				-            options=chrome_options
			
 
				-        )
			
 
				-
			
 
				-        # 隐藏浏览器特征
			
 
				-        with open(os.path.join(os.path.dirname(__file__), "./js/stealth.min.js")) as f:
			
 
				-            js = f.read()
			
 
				-            params = {
			
 
				-                'cmd': 'Page.addScriptToEvaluateOnNewDocument',
			
 
				-                'params': {'source': js}
			
 
				-            }
			
 
				-            response = browser.execute("executeCdpCommand", params)['value']
			
 
				-        return browser
			
 
				-
			
 
				-    def local_chrome_driver(self):
			
 
				-        chrome_options = webdriver.ChromeOptions()
			
 
				-        # 此步骤很重要，设置为开发者模式，防止被各大网站识别出来使用了Selenium
			
 
				-        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
			
 
				-        chrome_options.add_experimental_option("useAutomationExtension", False)
			
 
				-        chrome_options.add_argument('--disable-blink-features=AutomationControlled')
			
 
				-        # docker 里运行需要
			
 
				-        chrome_options.add_argument("--no-sandbox")
			
 
				-        chrome_options.add_argument('--disable-extensions')
			
 
				-        chrome_options.add_argument('--disable-dev-shm-usage')
			
 
				-
			
 
				-        if self._proxy:
			
 
				-            chrome_options.add_argument(
			
 
				-                "--proxy-server={}".format(
			
 
				-                    self._proxy() if callable(self._proxy) else self._proxy
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        if self._user_agent:
			
 
				-            chrome_options.add_argument(
			
 
				-                "user-agent={}".format(
			
 
				-                    self._user_agent()
			
 
				-                    if callable(self._user_agent)
			
 
				-                    else self._user_agent
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        if not self._load_images:
			
 
				-            chrome_options.add_experimental_option(
			
 
				-                "prefs", {"profile.managed_default_content_settings.images": 2}
			
 
				-            )
			
 
				-
			
 
				-        if self._headless:
			
 
				-            chrome_options.add_argument("--headless")
			
 
				-            chrome_options.add_argument("--disable-gpu")
			
 
				-
			
 
				-        if self._window_size:
			
 
				-            chrome_options.add_argument(
			
 
				-                "--window-size={},{}".format(self._window_size[0], self._window_size[1])
			
 
				-            )
			
 
				-
			
 
				-        # 添加自定义的配置参数
			
 
				-        if self._custom_argument:
			
 
				-            for arg in self._custom_argument:
			
 
				-                chrome_options.add_argument(arg)
			
 
				-
			
 
				-        if self._executable_path:
			
 
				-            driver = webdriver.Chrome(
			
 
				-                chrome_options=chrome_options,
			
 
				-                executable_path=self._executable_path,
			
 
				-                service_log_path=self._service_log_path
			
 
				-            )
			
 
				-        else:
			
 
				-            driver = webdriver.Chrome(
			
 
				-                chrome_options=chrome_options,
			
 
				-                service_log_path=self._service_log_path
			
 
				-            )
			
 
				-
			
 
				-        # 隐藏浏览器特征
			
 
				-        with open(os.path.join(os.path.dirname(__file__), "./js/stealth.min.js")) as f:
			
 
				-            js = f.read()
			
 
				-            driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})
			
 
				-
			
 
				-        return driver
			
 
				-
			
 
				-    def chrome_driver(self):
			
 
				-        if self._usages_local_driver:
			
 
				-            return self.local_chrome_driver()
			
 
				-        return self.remote_chrome_driver()
			
 
				-
			
 
				-    @property
			
 
				-    def cookies(self):
			
 
				-        cookies_json = {}
			
 
				-        for cookie in self.driver.get_cookies():
			
 
				-            cookies_json[cookie["name"]] = cookie["value"]
			
 
				-        return cookies_json
			
 
				-
			
 
				-    @cookies.setter
			
 
				-    def cookies(self, val: dict):
			
 
				-        """
			
 
				-        设置cookie
			
 
				-        Args:
			
 
				-            val: {"key":"value", "key2":"value2"}
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        for key, value in val.items():
			
 
				-            self.driver.add_cookie({"name": key, "value": value})
			
 
				-
			
 
				-    def quit(self):
			
 
				-        try:
			
 
				-            self.get_driver().quit()
			
 
				-        except Exception:
			
 
				-            # We don't care about the message because something probably has gone wrong
			
 
				-            pass
			
 
				-
			
 
				-    # def __del__(self):
			
 
				-    #     if self.driver:
			
 
				-    #         self.driver.quit()
			
 
				-
			
 
				-
			
 
				-@Singleton
			
 
				-class WebDriverPool:
			
 
				-    def __init__(self, pool_size=5, **kwargs):
			
 
				-        self.queue = queue.Queue(maxsize=pool_size)
			
 
				-        self.kwargs = kwargs
			
 
				-        self.lock = threading.RLock()
			
 
				-        self.driver_count = 0
			
 
				-
			
 
				-    @property
			
 
				-    def is_full(self):
			
 
				-        return self.driver_count >= self.queue.maxsize
			
 
				-
			
 
				-    def get(self, user_agent: str = None, proxy: str = None) -> WebDriver:
			
 
				-        """
			
 
				-        获取webdriver
			
 
				-        当webdriver为新实例时会使用 user_agen, proxy, cookie参数来创建
			
 
				-        Args:
			
 
				-            user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
			
 
				-            proxy: xxx.xxx.xxx.xxx
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        if not self.is_full:
			
 
				-            with self.lock:
			
 
				-                if not self.is_full:
			
 
				-                    kwargs = self.kwargs.copy()
			
 
				-                    if user_agent:
			
 
				-                        kwargs["user_agent"] = user_agent
			
 
				-                    if proxy:
			
 
				-                        kwargs["proxy"] = proxy
			
 
				-                    driver = WebDriver(**kwargs)
			
 
				-                    self.queue.put(driver)
			
 
				-                    self.driver_count += 1
			
 
				-
			
 
				-        driver = self.queue.get()
			
 
				-
			
 
				-        return driver
			
 
				-
			
 
				-    def put(self, driver):
			
 
				-        self.queue.put(driver)
			
 
				-
			
 
				-    def remove(self, driver):
			
 
				-        driver.quit()
			
 
				-        self.driver_count -= 1
			
 
				-
			
 
				-    def close(self):
			
 
				-        while not self.queue.empty():
			
 
				-            driver = self.queue.get()
			
 
				-            driver.quit()
			
 
				-            self.driver_count -= 1