8 kuukautta sitten · 4b031a9bb6
--- a/中国建设银行集采平台/Zgjsyhjcpt.py
+++ b/中国建设银行集采平台/Zgjsyhjcpt.py
@@ -1,49 +1,30 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 """
														
 
															-Created on 2023-02-25
														
 
															+Created on 2024-12-12
														
 
															 ---------
														
 
															 @summary: 中国建设银行集采平台
														
 
															 ---------
														
 
															 @author: lzz
														
 
															 """
														
 
															 import feapder
														
 
															-from items.spider_item import DataBakItem
														
 
															+from items.spider_item import BidingListItem
														
 
															 from collections import namedtuple
														
 
															-import time, random
														
 
															-import execjs
														
 
															-import requests
														
 
															-from untils.attachment import AttachmentDownloader
														
 
															-from untils.tools import extract_file_type,remove_htmldata
														
 
															-from feapder.network.selector import Selector
														
 
															-def get_ser(text):
														
 
															-    with open('./zgjsyhjcpt.js', 'r') as f:
														
 
															-        exjs = f.read()
														
 
															-    ctx = execjs.compile(exjs)
														
 
															-    ser = ctx.call("encryptRSA", f"{text}")
														
 
															-    return ser
														
 
															-
														
 
															-
														
 
															-def transfer_url(url):
														
 
															-    url = url.replace("+", "%2B").replace("/", "%2F").replace("?", "%3F")
														
 
															-    url = url.replace("#", "%23").replace("&", "%26").replace("=", "%3D")
														
 
															-    return url
														
 
															-
														
 
															 class Zgjsyhjcpt(feapder.BiddingListSpider):
														
 
															     def start_callback(self):
														
 
															-        Menu = namedtuple('Menu', ['channel', 'code', 'typeone', 'crawl_page'])
														
 
															+        Menu = namedtuple('Menu', ['channel', 'code', 'tid', 'crawl_page'])
														
 
															         self.site = "中国建设银行集采平台"
														
 
															         self.menus = [
														
 
															-            Menu('供应商征集', 'a_zgjsyhjcpt_gyszj', 'ccbgyszj', 3),
														
 
															-            Menu('采购专区-招标公告', 'a_zgjsyhjcpt_cgzq_zbgg', 'ccbbidzbgg', 1),
														
 
															-            Menu('采购专区-变更公告', 'a_zgjsyhjcpt_cgzq_bggg', 'ccbbidecgg', 1),
														
 
															-            Menu('采购专区-中标候选人公示', 'a_zgjsyhjcpt_cgzq_zbhxrgs', 'ccbbidzbgs', 1),
														
 
															-            Menu('采购专区-中标结果公示', 'a_zgjsyhjcpt_cgzq_zbjggs', 'ccbbidzbjggs', 1),
														
 
															-            Menu('采购公开信息', 'a_zgjsyhjcpt_cggkxx', 'ccbpurtzgg', 3),
														
 
															+            Menu('供应商征集', 'a_zgjsyhjcpt_gyszj', '360', 1),
														
 
															+            Menu('采购专区-招标公告', 'a_zgjsyhjcpt_cgzq_zbgg', '355', 1),
														
 
															+            Menu('采购专区-变更公告', 'a_zgjsyhjcpt_cgzq_bggg', '357', 1),
														
 
															+            Menu('采购专区-中标候选人公示', 'a_zgjsyhjcpt_cgzq_zbhxrgs', '358', 1),
														
 
															+            Menu('采购专区-中标结果公示', 'a_zgjsyhjcpt_cgzq_zbjggs', '359', 1),
														
 
															+            Menu('采购公开信息', 'a_zgjsyhjcpt_cggkxx', '353', 1),
														
 
															         ]
														
 
															         self.headers = {
														
@@ -60,27 +41,26 @@ class Zgjsyhjcpt(feapder.BiddingListSpider):
 
															     def start_requests(self):
														
 
															         for menu in self.menus:
														
 
															-            start_url = f"https://ibuy.ccb.com/cms/channel/{menu.typeone}/index.htm"
														
 
															+            start_url = f"https://ibuy.ccb.com/json/contentFile/{menu.tid}/1.json"
														
 
															             yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
														
 
															     def parse(self, request, response):
														
 
															         menu = request.item
														
 
															-        info_list = response.xpath('//div[@class="infolist-main single-main bidlist"]/ul/li|//div[@class="infolist-main bidlist"]/ul/li')
														
 
															-        for info in info_list:
														
 
															-            title = "".join(info.xpath('./a/span/text()').extract()).strip()
														
 
															-            href_org = info.xpath('./a/@hrefurl').extract_first()
														
 
															-            href_hid = info.xpath('./a/@hid').extract_first()
														
 
															-            ser_p = transfer_url(get_ser(href_hid))
														
 
															-            href = f"https://ibuy.ccb.com{href_org}?pageNo={href_hid}&_ser_p={ser_p}"
														
 
															-            create_time = "".join(info.xpath('./a/em[1]/text()').extract()).strip()
														
 
															-            dedup_href = href.split("&_ser_p")[0]
														
 
															+        info_list = response.json
														
 
															+        for info in info_list[:30]:
														
 
															+            title = info.get('title').strip()
														
 
															+            hid = info.get('id')
														
 
															+            pid = menu.get('tid')
														
 
															+            href = f"https://ibuy.ccb.com/cms/index.html#/content?pId={pid}&id={hid}"
														
 
															+            create_time = info.get('releaseDate').strip()
														
 
															+
														
 
															+            htm = create_time.split('-')[0]
														
 
															             area = "全国"  # 省份
														
 
															             city = ""  # 城市
														
 
															-            data_item = DataBakItem()  # 存储数据的管道
														
 
															+            data_item = BidingListItem()  # 存储数据的管道
														
 
															             data_item.href = href  # 标书链接
														
 
															-            data_item.unique_key = (dedup_href,title,create_time)
														
 
															             data_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 （编辑器定的）
														
 
															             data_item.spidercode = menu.get("code")  # 最上方定义的爬虫code（编辑器定的）
														
 
															             data_item.title = title  # 标题
														
@@ -89,32 +69,9 @@ class Zgjsyhjcpt(feapder.BiddingListSpider):
 
															             data_item.area = area  # 城市默认:全国
														
 
															             data_item.city = city  # 城市 默认为空
														
 
															-            time.sleep(random.randint(3, 5))
														
 
															-            res = requests.get(href, headers=self.headers, proxies=request.proxies(), verify=False, timeout=30)
														
 
															-            if res.status_code == 200:
														
 
															-                root = Selector(res.text)
														
 
															-                html = root.xpath('//div[@class="article-content"]').extract_first("")
														
 
															-
														
 
															-                rm_list = ['//p[@class="collect"]','//div[@class="article-bottom"]']
														
 
															-                data_item.contenthtml = remove_htmldata(rm_list,html,root)
														
 
															-
														
 
															-                attachments = {}
														
 
															-                files = root.xpath('//div[@class="article-content"]//a[@href]')
														
 
															-                for file in files:
														
 
															-                    file_url = "https://ibuy.ccb.com" + file.xpath('./@href').extract_first("")
														
 
															-                    file_name = file.xpath('./text()').extract_first("")
														
 
															-                    file_type = extract_file_type(file_name, file_url)
														
 
															-
														
 
															-                    if file_type and 'download' in file_url:
														
 
															-                        attachment = AttachmentDownloader().fetch_attachment(
														
 
															-                            file_name=file_name, file_type=file_type, download_url=file_url,
														
 
															-                            proxies=request.proxies())
														
 
															-                        attachments[str(len(attachments) + 1)] = attachment
														
 
															-
														
 
															-                if len(attachments) > 0:
														
 
															-                    data_item.projectinfo = {"attachments": attachments}
														
 
															-
														
 
															-
														
 
															+            data_item.unique_key = ('href',)
														
 
															+            data_item.parse = "self.detail_get"  # 详情页回调方法
														
 
															+            data_item.parse_url = f"https://ibuy.ccb.com/json/contentFile/{pid}/{htm}/{hid}.json"
														
 
															             yield data_item
														
@@ -124,20 +81,6 @@ class Zgjsyhjcpt(feapder.BiddingListSpider):
 
															     def download_midware(self, request):
														
 
															         page = request.page
														
 
															-        menu = request.item
														
 
															-        if menu.get('code') == "a_zgjsyhjcpt_gyszj":
														
 
															-            data = {
														
 
															-                "pageNo": f"{page}",
														
 
															-                "_ser_p": f"{get_ser(page)}",
														
 
															-                "collectStatus": "0"
														
 
															-            }
														
 
															-        else:
														
 
															-            data = {
														
 
															-                "pageNo": f"{page}",
														
 
															-                "_ser_p": f"{get_ser(page)}",
														
 
															-            }
														
 
															-
														
 
															-        request.data = data
														
 
															         request.headers = self.headers
														
--- a/中国建设银行集采平台/zgjsyhjcpt.js
+++ b/中国建设银行集采平台/zgjsyhjcpt.js
@@ -1,69 +1,69 @@
 
															 const jsdom = require("jsdom");
														
 
															-const { JSDOM } = jsdom;
														
 
															-const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
														
 
															+const {JSDOM} = jsdom;
														
 
															+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`,
														
 
															+    {
														
 
															+        url: "https://example.org/",
														
 
															+        referrer: "https://example.com/",
														
 
															+        contentType: "text/html",
														
 
															+    });
														
 
															 window = dom.window;
														
 
															 var JSEncrypt = require("jsencrypt");
														
 
															 const publicKey = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCzrjWyZGR7kNdlnjDYptEB9mEc10NU53muUM/0vbzD8ivekX5zuJ6P7VrjzT7VIu1r9U9B2+xXzSF+2tinEzBpE8z/DAeL235ZmNUQJFIVGvrGUYs4q7nj21n4qNlwfbjpEH2kPkBG3jgAeEHMXj4tkaI5Nb/6Kr+yCZpaSn2U+wIDAQAB"
														
 
															-var b64map="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
														
 
															-var b64pad="=";
														
 
															+var b64map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
														
 
															+var b64pad = "=";
														
 
															 function hex2b64(h) {
														
 
															-  var i;
														
 
															-  var c;
														
 
															-  var ret = "";
														
 
															-  for(i = 0; i+3 <= h.length; i+=3) {
														
 
															-    c = parseInt(h.substring(i,i+3),16);
														
 
															-    ret += b64map.charAt(c >> 6) + b64map.charAt(c & 63);
														
 
															-  }
														
 
															-  if(i+1 == h.length) {
														
 
															-    c = parseInt(h.substring(i,i+1),16);
														
 
															-    ret += b64map.charAt(c << 2);
														
 
															-  }
														
 
															-  else if(i+2 == h.length) {
														
 
															-    c = parseInt(h.substring(i,i+2),16);
														
 
															-    ret += b64map.charAt(c >> 2) + b64map.charAt((c & 3) << 4);
														
 
															-  }
														
 
															-  while((ret.length & 3) > 0) ret += b64pad;
														
 
															-  return ret;
														
 
															+    var i;
														
 
															+    var c;
														
 
															+    var ret = "";
														
 
															+    for (i = 0; i + 3 <= h.length; i += 3) {
														
 
															+        c = parseInt(h.substring(i, i + 3), 16);
														
 
															+        ret += b64map.charAt(c >> 6) + b64map.charAt(c & 63);
														
 
															+    }
														
 
															+    if (i + 1 == h.length) {
														
 
															+        c = parseInt(h.substring(i, i + 1), 16);
														
 
															+        ret += b64map.charAt(c << 2);
														
 
															+    } else if (i + 2 == h.length) {
														
 
															+        c = parseInt(h.substring(i, i + 2), 16);
														
 
															+        ret += b64map.charAt(c >> 2) + b64map.charAt((c & 3) << 4);
														
 
															+    }
														
 
															+    while ((ret.length & 3) > 0) ret += b64pad;
														
 
															+    return ret;
														
 
															 }
														
 
															 JSEncrypt.prototype.encrypt = function (string) {
														
 
															-  // Return the encrypted string.
														
 
															-  try {
														
 
															-    return hex2b64(this.getKey().encrypt(string));
														
 
															-  }
														
 
															-  catch (ex) {
														
 
															-    return false;
														
 
															-  }
														
 
															+    try {
														
 
															+        return hex2b64(this.getKey().encrypt(string));
														
 
															+    } catch (ex) {
														
 
															+        return false;
														
 
															+    }
														
 
															 };
														
 
															-
														
 
															-
														
 
															-JSEncrypt.prototype.encryptLong = function(string) {
														
 
															+JSEncrypt.prototype.encryptLong = function (string) {
														
 
															     var k = this.getKey();
														
 
															-    var maxLength = (((k.n.bitLength()+7)>>3)-11);
														
 
															+    var maxLength = (((k.n.bitLength() + 7) >> 3) - 11);
														
 
															     try {
														
 
															-      var lt = "";
														
 
															-      var ct = "";
														
 
															+        var lt = "";
														
 
															+        var ct = "";
														
 
															-      if (string.length > maxLength) {
														
 
															-        lt = string.match(/.{1,117}/g);
														
 
															-        lt.forEach(function(entry) {
														
 
															-          var t1 = k.encrypt(entry);
														
 
															-          ct += t1 ;
														
 
															-        });
														
 
															-        return hex2b64(ct);
														
 
															-      }
														
 
															-      var t = k.encrypt(string);
														
 
															-      var y = hex2b64(t);
														
 
															-      return y;
														
 
															+        if (string.length > maxLength) {
														
 
															+            lt = string.match(/.{1,117}/g);
														
 
															+            lt.forEach(function (entry) {
														
 
															+                var t1 = k.encrypt(entry);
														
 
															+                ct += t1;
														
 
															+            });
														
 
															+            return hex2b64(ct);
														
 
															+        }
														
 
															+        var t = k.encrypt(string);
														
 
															+        var y = hex2b64(t);
														
 
															+        return y;
														
 
															     } catch (ex) {
														
 
															-      return false;
														
 
															+        return false;
														
 
															     }
														
 
															 }
														
@@ -73,3 +73,11 @@ function encryptRSA(str) {
 
															     encrypt.setPublicKey(publicKey);
														
 
															     return encrypt.encryptLong(str);
														
 
															 }
														
 
															+
														
 
															+function u() {
														
 
															+    const t = (new Date).getHours()
														
 
															+        , e = (new Date).getMinutes()
														
 
															+        , a = 10 * Math.floor(e / 10);
														
 
															+    return `${t}${a}`
														
 
															+}
														
 
															+
														
--- a/中国建设银行集采平台/中国建设银行集采平台-详情页.py
+++ b/中国建设银行集采平台/中国建设银行集采平台-详情页.py
@@ -0,0 +1,104 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2024-12-12
														
 
															+---------
														
 
															+@summary: 中国建设银行集采平台
														
 
															+---------
														
 
															+@author: lzz
														
 
															+"""
														
 
															+import feapder
														
 
															+from items.spider_item import DataBakItem
														
 
															+from untils.attachment import AttachmentDownloader
														
 
															+from untils.tools import extract_file_type
														
 
															+import requests
														
 
															+
														
 
															+headers = {
														
 
															+    "Accept": "application/json, text/plain, */*",
														
 
															+    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
														
 
															+    "Cache-Control": "no-cache",
														
 
															+    "Connection": "keep-alive",
														
 
															+    "Pragma": "no-cache",
														
 
															+    "Referer": "https://ibuy.ccb.com/cms/index.html",
														
 
															+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def get_file_set(proxies=False):
														
 
															+    headers = {
														
 
															+        "Accept": "application/json, text/plain, */*",
														
 
															+        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
														
 
															+        "Cache-Control": "no-cache",
														
 
															+        "Connection": "keep-alive",
														
 
															+        "Pragma": "no-cache",
														
 
															+        "Referer": "https://ibuy.ccb.com/cms/index.html",
														
 
															+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
														
 
															+    }
														
 
															+
														
 
															+    url = "https://ibuy.ccb.com/json/contentFile/sysFile.json"
														
 
															+    try:
														
 
															+        response = requests.get(url, headers=headers, proxies=proxies, timeout=20, verify=False)
														
 
															+        return response.json()
														
 
															+    except:
														
 
															+        return []
														
 
															+
														
 
															+
														
 
															+def get_file_info(fid, file_set):
														
 
															+    for info in file_set:
														
 
															+        if info.get('id') == fid:
														
 
															+            file_name = info.get('fileName')
														
 
															+            file_type = info.get('fileType')
														
 
															+            break
														
 
															+    else:
														
 
															+        file_name = ""
														
 
															+        file_type = ""
														
 
															+
														
 
															+    return file_name, file_type
														
 
															+
														
 
															+
														
 
															+class Details(feapder.BiddingDetailSpider):
														
 
															+    file_set = get_file_set()
														
 
															+
														
 
															+    def start_requests(self):
														
 
															+        data_list = self.get_tasks_by_rabbitmq(limit=20)
														
 
															+        for item in data_list:
														
 
															+            request_params = item.get("request_params")
														
 
															+            timeout = request_params.pop('timeout', 10)
														
 
															+            yield feapder.Request(url=item['parse_url'], item=item, proxies=False,
														
 
															+                                  deal_detail=item.get("deal_detail"), callback=eval(item.get("parse")),
														
 
															+                                  timeout=timeout, **request_params)
														
 
															+
														
 
															+    def download_midware(self, request):
														
 
															+        request.headers = headers
														
 
															+
														
 
															+    def detail_get(self, request, response):
														
 
															+
														
 
															+        items = request.item
														
 
															+        list_item = DataBakItem(**items)
														
 
															+
														
 
															+        html = response.json.get('content')
														
 
															+
														
 
															+        list_item.contenthtml = html
														
 
															+
														
 
															+        attachments = {}
														
 
															+        file_list = response.json.get('fileId')
														
 
															+        if file_list:
														
 
															+            for fid in file_list.split(','):
														
 
															+                file_name, file_type = get_file_info(fid, self.file_set)
														
 
															+
														
 
															+                if not file_type:
														
 
															+                    file_type = extract_file_type(file_name=file_name)
														
 
															+                file_url = f"https://ibuy.ccb.com/json/upload/{fid}.{file_type}"
														
 
															+
														
 
															+                if file_name and file_type:
														
 
															+                    attachment = AttachmentDownloader().fetch_attachment(
														
 
															+                        file_name=file_name, file_type=file_type, download_url=file_url)
														
 
															+                    attachments[str(len(attachments) + 1)] = attachment
														
 
															+
														
 
															+        if attachments:
														
 
															+            list_item.projectinfo = {"attachments": attachments}
														
 
															+
														
 
															+        yield list_item
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    spider = Details(redis_key="lzz:Zgjsyhjcpt").start()
														
--- a/全军武器装备采购信息网/采购公告需求-列表页.py
+++ b/全军武器装备采购信息网/采购公告需求-列表页.py
@@ -1,98 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on 2024-10-10
														
 
															----------
														
 
															-@summary: 全军武器装备采购信息网
														
 
															----------
														
 
															-@author: lzz
														
 
															-"""
														
 
															-import feapder
														
 
															-from items.spider_item import MgpListItem
														
 
															-from collections import namedtuple
														
 
															-import time
														
 
															-
														
 
															-
														
 
															-
														
 
															-class Feapder(feapder.BiddingListSpider):
														
 
															-
														
 
															-
														
 
															-    def start_callback(self):
														
 
															-        Menu = namedtuple('Menu', ['channel', 'code', 'tid', 'cid', 'crawl_page'])
														
 
															-
														
 
															-        self.site = "全军武器装备采购信息网"
														
 
															-
														
 
															-        self.menus = [
														
 
															-            Menu('采购公告', 'a_qjwqzbcgxxw_cggg_jdgg', '1149231276155707394', 'cggg', 5),
														
 
															-            Menu('采购公告', 'a_qjwqzbcgxxw_cggg_jdgg', '1149231318006472705', 'cggg', 5),
														
 
															-            Menu('采购需求', 'a_qjwqzbcgxxw_cgxq_jdxq', 'HZ287281676ce46401676cf0975c000e', 'cgxq', 1),
														
 
															-            Menu('采购需求', 'a_qjwqzbcgxxw_cgxq_jdxq', 'HZ287281676ce46401676cf59ca5001b', 'cgxq', 1),
														
 
															-        ]
														
 
															-
														
 
															-        self.headers = {
														
 
															-            "Accept": "application/json, text/javascript, */*; q=0.01",
														
 
															-            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
														
 
															-            "Cache-Control": "no-cache",
														
 
															-            "Connection": "keep-alive",
														
 
															-            "Content-Type": "application/json",
														
 
															-            "Pragma": "no-cache",
														
 
															-            "Referer": "http://www.weain.mil.cn/",
														
 
															-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
														
 
															-            "X-Requested-With": "XMLHttpRequest",
														
 
															-            "isEncrypt": "isNotEncrypt",
														
 
															-            "token;": ""
														
 
															-        }
														
 
															-
														
 
															-
														
 
															-    def start_requests(self):
														
 
															-        for menu in self.menus:
														
 
															-            start_url = f"http://www.weain.mil.cn/api/front/list/{menu.cid}/list"
														
 
															-            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
														
 
															-
														
 
															-    def download_midware(self, request):
														
 
															-        page = request.page
														
 
															-        menu = request.item
														
 
															-        params = {
														
 
															-            "LMID": menu.get('tid'),
														
 
															-            "pageNo": f"{page}",
														
 
															-            "_t": f"{round(time.time()*1000)}"
														
 
															-        }
														
 
															-        request.params = params
														
 
															-        request.headers = self.headers
														
 
															-
														
 
															-    def parse(self, request, response):
														
 
															-        menu = request.item
														
 
															-        info_list = response.json.get('list').get('contentList')
														
 
															-        for info in info_list:
														
 
															-            href = "http://www.weain.mil.cn" + info.get('pcUrl')
														
 
															-            title = info.get('nonSecretTitle').strip()
														
 
															-            create_time = info.get('publishTime')
														
 
															-
														
 
															-            area = "全国"      # 省份
														
 
															-            city = ""    # 城市
														
 
															-
														
 
															-            list_item = MgpListItem()  # 存储数据的管道
														
 
															-            list_item.href = href  # 标书链接
														
 
															-            list_item.unique_key = ('title','href')
														
 
															-            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 （编辑器定的）
														
 
															-            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code（编辑器定的）
														
 
															-            list_item.title = title  # 标题
														
 
															-            list_item.site = self.site
														
 
															-            list_item.publishtime = create_time
														
 
															-            list_item.area = area  # 城市默认:全国
														
 
															-            list_item.city = city  # 城市 默认为空
														
 
															-
														
 
															-            list_item.unique_key = ('href',)
														
 
															-            list_item.parse = "self.detail_get"  # 详情页回调方法
														
 
															-            list_item.deal_detail = ['//div[@id="content"]']
														
 
															-            list_item.proxies = False
														
 
															-            list_item.parse_url = href
														
 
															-
														
 
															-            yield list_item
														
 
															-
														
 
															-        # 无限翻页
														
 
															-        request = self.infinite_pages(request, response)
														
 
															-        yield request
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    Feapder(redis_key="lzz:qjwqzbcgxxw_cgxq").start()
														
--- a/全军武器装备采购信息网/采购公告需求-详情页.py
+++ b/全军武器装备采购信息网/采购公告需求-详情页.py
@@ -1,76 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-Created on 2024-10-10
														
 
															----------
														
 
															-@summary: 全军武器装备采购信息网
														
 
															----------
														
 
															-@author: lzz
														
 
															-"""
														
 
															-import feapder
														
 
															-from items.spider_item import DataBakItem
														
 
															-from feapder.utils.tools import log
														
 
															-from untils.attachment import AttachmentDownloader
														
 
															-from untils.tools import remove_htmldata
														
 
															-import re
														
 
															-
														
 
															-headers = {
														
 
															-    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
														
 
															-    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
														
 
															-    "Cache-Control": "no-cache",
														
 
															-    "Connection": "keep-alive",
														
 
															-    "Pragma": "no-cache",
														
 
															-    "Upgrade-Insecure-Requests": "1",
														
 
															-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
														
 
															-}
														
 
															-
														
 
															-
														
 
															-class Details(feapder.BiddingDetailSpider):
														
 
															-
														
 
															-
														
 
															-    def start_requests(self):
														
 
															-        data_list = self.get_tasks_by_rabbitmq(limit=10)
														
 
															-        for item in data_list:
														
 
															-            # log.debug(item)
														
 
															-            request_params = item.get("request_params")
														
 
															-            yield feapder.Request(url=item.get("parse_url"), item=item,
														
 
															-                                          deal_detail=item.get("deal_detail"),
														
 
															-                                          callback=eval(item.get("parse")), **request_params)
														
 
															-
														
 
															-
														
 
															-    def detail_get(self,request,response):
														
 
															-        items = request.item
														
 
															-        list_item = DataBakItem(**items)
														
 
															-
														
 
															-        html = response.xpath('//div[@id="content"]|//div[@class="secret"]').extract_first()
														
 
															-        rl_list = ['//span[@id="demandPv"]','点击次数：','//div[@class="right"]',
														
 
															-                   '//div[@id="demandDocking"]','未经授权，严禁转载']
														
 
															-        html = remove_htmldata(rl_list,html,response)
														
 
															-
														
 
															-        html2 = "".join(re.findall("htmlDecode\('(.*?)'\)\);", response.text, re.S))
														
 
															-        html3 = "".join(re.findall("demandPrerequisites = '(.*?)';", response.text, re.S))
														
 
															-
														
 
															-        list_item.contenthtml = html + html2 + html3
														
 
															-
														
 
															-        file_org = "".join(re.findall('var url = "file/(.*?)"',response.text))
														
 
															-        file_types = ['zip', 'docx', 'ftp', 'pdf', 'doc', 'rar', 'gzzb', 'jpg',
														
 
															-                      'png', 'zbid', 'xls', 'xlsx', 'swp', 'dwg', 'wps']
														
 
															-        if file_org:
														
 
															-            attachments = {}
														
 
															-            file_url = f"http://www.weain.mil.cn/file/{file_org}"
														
 
															-            file_name = file_url.split('/')[-1]
														
 
															-            file_type = file_url.split('.')[-1].lower()
														
 
															-
														
 
															-            if file_type in file_types:
														
 
															-                attachment = AttachmentDownloader().fetch_attachment(
														
 
															-                    file_name=file_name, file_type=file_type,
														
 
															-                    download_url=file_url,proxies=request.proxies())
														
 
															-                attachments[str(len(attachments) + 1)] = attachment
														
 
															-
														
 
															-            if attachments:
														
 
															-                list_item.projectinfo = {"attachments": attachments}
														
 
															-
														
 
															-        yield list_item
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    Details(redis_key="lzz:qjwqzbcgxxw_cgxq").start()