dzr 3 сар өмнө
parent
commit
0917321869
86 өөрчлөгдсөн 478 нэмэгдсэн , 460 устгасан
  1. 1 0
      lzz_theme/bqgyjtgscgdzswpt/bqgy_cookies.txt
  2. 1 1
      lzz_theme/clgjzbcgjtyxgs/clgj_cookies.txt
  3. 13 10
      lzz_theme/crontab.txt
  4. 1 1
      lzz_theme/qgzbgggsssyq/spider_detail_retry.py
  5. 1 1
      lzz_theme/qgzbgggsssyq/spider_list_area.py
  6. 0 126
      lzz_theme/qjwqzbcgxxw/1.py
  7. 73 9
      lzz_theme/qjwqzbcgxxw/qjwqzb_details.py
  8. 3 3
      lzz_theme/qjwqzbcgxxw/qjwqzb_list.py
  9. 2 2
      lzz_theme/sfc/sfc_cjgg_list.py
  10. 1 1
      lzz_theme/sfc/sfc_cookies.txt
  11. 2 2
      lzz_theme/sfc/sfc_gkbx_list.py
  12. 2 2
      lzz_theme/sfc/sfc_gzgg_list.py
  13. 2 2
      lzz_theme/sfc/sfc_htgg_list.py
  14. 1 1
      lzz_theme/sfc/sfc_uuid.txt
  15. 1 1
      lzz_theme/sfc/sfc_zzgg_detail.py
  16. 2 2
      lzz_theme/sfc/sfc_zzgg_list.py
  17. BIN
      lzz_theme/sfc/slice.png
  18. BIN
      lzz_theme/szycycgpt/slice.png
  19. 1 1
      lzz_theme/tjszfcgw/tjszfcgw_list.py
  20. 1 1
      lzz_theme/utils/chaojiying.py
  21. 9 12
      lzz_theme/utils/tools.py
  22. 3 1
      lzz_theme/xgyyglj/xgyy_spider_details.py
  23. 2 0
      lzz_theme/ynszfcgw/yncgyx_details.py
  24. 1 1
      lzz_theme/yyc/yyc_ck.json
  25. 0 1
      lzz_theme/yzcbjkjfzyxgs/yzc_details.py
  26. 0 2
      lzz_theme/yzcbjkjfzyxgs/yzc_list.py
  27. 1 0
      lzz_theme/yzcbjkjfzyxgs/yzcbjkjfzyxgs.js
  28. 1 1
      lzz_theme/yzcbjkjfzyxgs/yzcbjkjfzyxgs_ck.py
  29. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/zgdk_cookies.json
  30. 1 1
      lzz_theme/zgwkjtyxgs/list_spider.py
  31. 1 1
      lzz_theme/zgwkjtyxgs/zgwkjtyxgs_details.py
  32. 1 1
      lzz_theme/zgwkjtyxgs/zgwkjtyxgs_details2.py
  33. 1 2
      lzz_theme/zgzbtbggfwpt/history_crawl.py
  34. 131 0
      lzz_theme/zgzbtbggfwpt/list_spider.py
  35. 1 1
      lzz_theme/zmdszfcgdzsc/zmd.js
  36. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_byx_ck.json
  37. 3 6
      lzz_theme/zmdszfcgdzsc/zmd_byx_details.py
  38. 2 2
      lzz_theme/zmdszfcgdzsc/zmd_byx_list.py
  39. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_gxq_ck.json
  40. 6 9
      lzz_theme/zmdszfcgdzsc/zmd_gxq_details.py
  41. 2 3
      lzz_theme/zmdszfcgdzsc/zmd_gxq_list.py
  42. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_kfq_ck.json
  43. 6 9
      lzz_theme/zmdszfcgdzsc/zmd_kfq_details.py
  44. 2 3
      lzz_theme/zmdszfcgdzsc/zmd_kfq_list.py
  45. 65 71
      lzz_theme/zmdszfcgdzsc/zmd_login.py
  46. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_pyx_ck.json
  47. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_pyx_details.py
  48. 2 3
      lzz_theme/zmdszfcgdzsc/zmd_pyx_list.py
  49. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_qsx_ck.json
  50. 6 7
      lzz_theme/zmdszfcgdzsc/zmd_qsx_details.py
  51. 1 2
      lzz_theme/zmdszfcgdzsc/zmd_qsx_list.py
  52. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_qyzq_ck.json
  53. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_qyzq_details.py
  54. 2 3
      lzz_theme/zmdszfcgdzsc/zmd_qyzq_list.py
  55. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_rnx_ck.json
  56. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_rnx_details.py
  57. 2 3
      lzz_theme/zmdszfcgdzsc/zmd_rnx_list.py
  58. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_sbj_ck.json
  59. 7 6
      lzz_theme/zmdszfcgdzsc/zmd_sbj_details.py
  60. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_sbj_list.py
  61. 1 2
      lzz_theme/zmdszfcgdzsc/zmd_sbjcgyx_list.py
  62. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_scx_ck.json
  63. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_scx_details.py
  64. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_scx_list.py
  65. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_sfq_ck.json
  66. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_sfq_details.py
  67. 2 3
      lzz_theme/zmdszfcgdzsc/zmd_sfq_list.py
  68. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_spx_ck.json
  69. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_spx_details.py
  70. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_spx_list.py
  71. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_xcx_ck.json
  72. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_xcx_details.py
  73. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_xcx_list.py
  74. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_xpx_ck.json
  75. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_xpx_details.py
  76. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_xpx_list.py
  77. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_ycq_ck.json
  78. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_ycq_details.py
  79. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_ycq_list.py
  80. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_zszq_ck.json
  81. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_zszq_details.py
  82. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_zszq_list.py
  83. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_zyx_ck.json
  84. 6 8
      lzz_theme/zmdszfcgdzsc/zmd_zyx_details.py
  85. 3 4
      lzz_theme/zmdszfcgdzsc/zmd_zyx_list.py
  86. 1 1
      lzz_theme/ztlbsww/ztlbsww_ck.json

+ 1 - 0
lzz_theme/bqgyjtgscgdzswpt/bqgy_cookies.txt

@@ -0,0 +1 @@
+94ee8a9a-5b88-4bb3-9429-a3fb42534676

+ 1 - 1
lzz_theme/clgjzbcgjtyxgs/clgj_cookies.txt

@@ -1 +1 @@
-eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyOTYyOTIiLCJpc3MiOiJwbGF0Zm9ybUNlbnRlciIsImlhdCI6MTc0MzQ2OTIxOSwiZXhwIjoxNzQzNTU1NjE5LCJuYmYiOjE3NDM0NjkyMTksImp0aSI6IjU5ZTAwMjllNjQ2ZjRhNjA5MzgwY2IyY2U2ZDRiOWZmIiwiYXVkIjpbImJVc2VyIl0sInVzZXJJZCI6Mjk2MjkyLCJ1c2VyTmFtZSI6IjEzMjIzMDc0MDAzIiwidGVuYW50SWQiOjIyMDEsImVtYWlsQWRkcmVzcyI6IjEzMjIzMDc0MDAzQGludmFsaWQuY24iLCJ0eXBlIjoiYWNjZXNzVG9rZW4ifQ.H2Gugj4nsPdeY-WJvTyAjTzHOFiA4s9HDkq5C6b3eNcZTwUb_G8VG5HBA3ENBgXjCHARxpN7mIocAUPSGr5iIDw4_8_aImdZvp-NhoTRitkSYvnO-S2vabBJDLkw9GR4ui6vENfKMllcYvM6TpE_Z42RzvMkW0V_iLcuo2oK2VmncPoN1GhJhZtWfxmfHuLCEPPv130cLvdjB-GPy3EWyzYWUlxcOktZ6KuDAciUQernR0GU1GenGCyazszJpT7JAA9INWontdTZ-iBBkYk9rS4GbVuWDlPNQRz4fB8NiqcSQMRC5Z0yFfJSTHMnE1eTqs-wiBhsXHX6OV32zrP02g
+eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyOTYyOTIiLCJpc3MiOiJwbGF0Zm9ybUNlbnRlciIsImlhdCI6MTc0NTk3NDkxNiwiZXhwIjoxNzQ2MDYxMzE2LCJuYmYiOjE3NDU5NzQ5MTYsImp0aSI6ImYzOWQwMGYzNWViNjQ4NmM4ZGM5Yjc5ZThkNGFkYjYzIiwiYXVkIjpbImJVc2VyIl0sInVzZXJJZCI6Mjk2MjkyLCJ1c2VyTmFtZSI6IjEzMjIzMDc0MDAzIiwidGVuYW50SWQiOjIyMDEsImVtYWlsQWRkcmVzcyI6IjEzMjIzMDc0MDAzQGludmFsaWQuY24iLCJ0eXBlIjoiYWNjZXNzVG9rZW4ifQ.Y9v0HbZYxa05iAnzRt3o5CVVgIDVmAIG4QRuWbony-6Fu85uYr2Qsrv8zsHfvcyi3S_EFVcalQi60xqXyt8xJRgV9mA1lKTYV7TW7C18AMMozTVvh-BPTdv4bc3oH6F4Xtzc9aSdZx_2_AiNSSXtny-M1ybARqatnPdmO54Mu7dEIibtUHNn_j1Y3Dh2QidD7xuNgDHen1AbysZMGNF9osDDjbxi99UBoyJomVvhFYs0FiS1e4eXz_zVDuQSIVw_9aQgE99nqLQ2sILes62EJV_4dk5qOUNs5JHW2BnsAZoLcwzT8SWhvsVtGqFBx4VIlV1jGdDEyVvgjlVpuwg0HQ

+ 13 - 10
lzz_theme/crontab.txt

@@ -1,4 +1,4 @@
-
+PYTHONPATH=/mnt/lzz_theme/utils:/mnt/lzz_theme
 NODE_PATH=/usr/lib/node_modules
 
 50 3 * * * cd /mnt/lzz_theme && ./rm_file.sh
@@ -40,16 +40,19 @@ NODE_PATH=/usr/lib/node_modules
 0 7 * * * cd /mnt/lzz_theme/zgwkjtyxgs && ./retry_start.sh
 
 # 中国招标投标公共服务平台
-*/10 6-23 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && ./list_start.sh
-*/10 6-23 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && ./detail_start.sh
-0 6-23/1  * * * cd /mnt/lzz_theme/zgzbtbggfwpt && ./his_start.sh
-0 7 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && ./retry_start.sh
+*/2 7-23 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_f.py > /dev/null &
+*/20 * * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_details.py > /dev/null &
+# 05 22 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_b.py > /dev/null &
+20 0 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_date.py > /dev/null &
+15 4 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 spider_detail_retry.py > /dev/null &
 
 # 全国招标公告公示搜索引擎
-*/20 6-23 * * * cd /mnt/lzz_theme/qgzbgggsssyq && ./start.sh
-10 6-23/2 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 py_ssyq_list_bu.py > log/py_ssyq_list_bu.out 2>&1
-20 8 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 py_ssyq_details_bu.py > log/py_ssyq_details_bu.out 2>&1
-30 9 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 ssyq_main.py > log/ssyq_main.out 2>&1
+*/2 9-23,0 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_f.py > /dev/null &
+*/20 * * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_detail.py > /dev/null &
+# */20 * * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list.py > /dev/null &
+30 1 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_detail_retry.py > /dev/null &
+10 1-8/2 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_area.py > /dev/null &
+# 10 1-8/2 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_b.py > /dev/null &
 
 # 建设项目环境影响登记表备案系统 
 50 8 * * * cd /mnt/lzz_theme/jsxmhjyxdjbbaxt && ./start.sh
@@ -100,7 +103,7 @@ NODE_PATH=/usr/lib/node_modules
 10 7-20/1 * * * cd /mnt/lzz_theme/sfc && ./start.sh
 
 # 全军武器装备采购信息网
-15 6-21/1 * * * cd /mnt/lzz_theme/qjwqzbcgxxw && ./start.sh
+# 15 6-21/1 * * * cd /mnt/lzz_theme/qjwqzbcgxxw && ./start.sh
 
 # 浪潮电子采购平台
 10 6-21/1 * * * cd /mnt/lzz_theme/lcdzcgpt && ./start.sh

+ 1 - 1
lzz_theme/qgzbgggsssyq/spider_detail_retry.py

@@ -44,4 +44,4 @@ class RetrySpider(Spider):
 
 
 if __name__ == "__main__":
-    RetrySpider(sizes=10000, threads=50).start()
+    RetrySpider(sizes=10000, threads=100).start()

+ 1 - 1
lzz_theme/qgzbgggsssyq/spider_list_area.py

@@ -83,4 +83,4 @@ class Spider(ListSpider):
 
 
 if __name__ == '__main__':
-    Spider(pages=100, threads=5, interval=0.4).start()
+    Spider(pages=100, threads=10, interval=0.4).start()

+ 0 - 126
lzz_theme/qjwqzbcgxxw/1.py

@@ -1,126 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on 2025-02-11
----------
-@summary: 全军武器装备采购信息网 - 详情页
----------
-@author: Lzz
-"""
-import sys
-import os
-
-sys.path.append(os.path.dirname(os.getcwd()))
-from utils.attachment import AttachmentDownloader
-from utils.tools import *
-from parsel import Selector
-import warnings
-
-warnings.filterwarnings('ignore')
-
-
-class Details:
-
-    def __init__(self):
-        self.db_table = Mongo_client().py_spider
-        self.db_name = self.db_table.theme_list
-        self.zt_details = self.db_table.data_bak
-        self.proxy = None
-
-    def detail_get(self, response, item):
-        response.encoding = response.apparent_encoding
-        root = Selector(response.text)
-
-        html = root.xpath('//div[@id="content"]|//div[@class="secret"]').extract_first("")
-        rl_list = ['//span[@id="demandPv"]', '点击次数:', '//div[@class="right"]',
-                   '//div[@id="demandDocking"]', '未经授权,严禁转载']
-        html = remove_htmldata(rl_list, html, root)
-
-        html2 = "".join(re.findall("htmlDecode\('(.*?)'\)\);", response.text, re.S))
-        html3 = "".join(re.findall("demandPrerequisites = '(.*?)';", response.text, re.S))
-
-        file_org = "".join(re.findall('var url = "file/(.*?)"', response.text))
-        file_types = ['zip', 'docx', 'ftp', 'pdf', 'doc', 'rar', 'gzzb', 'jpg',
-                      'png', 'zbid', 'xls', 'xlsx', 'swp', 'dwg', 'wps']
-        if file_org:
-            attachments = {}
-            file_url = f"http://www.weain.mil.cn/file/{file_org}"
-            file_name = file_url.split('/')[-1]
-            file_type = file_url.split('.')[-1].lower()
-
-            if file_type in file_types:
-                attachment = AttachmentDownloader().fetch_attachment(
-                    file_name=file_name, file_type=file_type,
-                    download_url=file_url, proxies=self.proxy)
-                attachments[str(len(attachments) + 1)] = attachment
-
-            if attachments:
-                item['projectinfo'] = {"attachments": attachments}
-
-        item["contenthtml"] = html + html2 + html3
-
-        item = format_fileds(item)
-
-        try:
-            self.zt_details.insert_one(item)
-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
-        except DuplicateKeyError:
-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
-
-    def fetch_request(self, item):
-        headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Pragma": "no-cache",
-            "Upgrade-Insecure-Requests": "1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
-        }
-        vv= item.get('publishtime').replace("-","").replace(":","").replace(" ","")
-        response = requests.get(url=item.get("parse_url")+f"?v={vv}", headers=headers,
-                                proxies=self.proxy, timeout=(30, 30), verify=False)
-        time.sleep(1)
-        return response
-
-    def deal_request(self, item):
-        retry_times = 0
-        org_item = item.copy()
-        while retry_times < 5:
-            try:
-                response = self.fetch_request(item)
-                res_code = response.status_code
-                if response and res_code == 200:
-                    self.detail_get(response, item=item)
-                    return True
-                else:
-                    retry_times += 1
-                    time.sleep(3)
-            except Exception as e:
-                item = org_item
-                logger.error(f"{item.get('competehref')} 采集异常:{e}")
-                retry_times += 1
-                time.sleep(random.randint(3, 6))
-        logger.warning(f"[采集失败]{item.get('competehref')}")
-        return False
-
-    def start(self, limit=1):
-        logger.debug("********** 详情页采集开始 **********")
-
-        with self.db_name.find({"parser_name": "ztpc_qjwqzbcgxxw", "is_crawl": False}, sort=[('publishtime', -1)]).limit(limit) as cursor:
-            data_lsit = [dd for dd in cursor]
-        for item in data_lsit:
-            logger.debug(item)
-            update_id = item["_id"]
-            result = self.deal_request(item)
-            if result is True:
-                self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
-            else:
-                self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True}})
-            time.sleep(random.randint(5, 10))
-
-        logger.debug("********** 详情页采集结束 **********")
-
-
-if __name__ == "__main__":
-    Details().start(limit=50)
-

+ 73 - 9
lzz_theme/qjwqzbcgxxw/qjwqzb_details.py

@@ -1,13 +1,16 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-02-11
+Created on 2025-04-25
 ---------
 @summary: 全军武器装备采购信息网 - 详情页
 ---------
 @author: Lzz
 """
+import json
+import random
 import sys
 import os
+import time
 
 sys.path.append(os.path.dirname(os.getcwd()))
 from utils.attachment import AttachmentDownloader
@@ -25,14 +28,20 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.proxy = None
+        self.cookies = None
+        self.count = 0
 
     def detail_get(self, response, item):
         response.encoding = response.apparent_encoding
+        if '页面将在<span id="minnum">3</span>秒后跳转' in response.text:
+            logger.warning(" <<< cookies过期 >>> ")
+            return "500"
         root = Selector(response.text)
 
         html = root.xpath('//div[@id="content"]|//div[@class="secret"]').extract_first("")
-        rl_list = ['//span[@id="demandPv"]', '点击次数:', '//div[@class="right"]',
-                   '//div[@id="demandDocking"]', '未经授权,严禁转载']
+        rl_list = ['//span[@id="demandPv"]', '点击次数:', '//div[@class="right "]', '//span[@id="pv"]',
+                   '//div[@id="demandDocking"]', '(未经授权,严禁转载)', '未经授权,严禁转载',
+                   '//div[@class="right"]', ]
         html = remove_htmldata(rl_list, html, root)
 
         html2 = "".join(re.findall("htmlDecode\('(.*?)'\)\);", response.text, re.S))
@@ -76,13 +85,17 @@ class Details:
             "Upgrade-Insecure-Requests": "1",
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
         }
-        vv= item.get('publishtime').replace("-","").replace(":","").replace(" ","")
-        response = requests.get(url=item.get("parse_url")+f"?v={vv}", headers=headers,
+
+        vv = item.get('publishtime').replace("-", "").replace(":", "").replace(" ", "")
+        response = requests.get(url=item.get("parse_url") + f"?v={vv}", headers=headers, cookies=self.cookies,
                                 proxies=self.proxy, timeout=(30, 30), verify=False)
         time.sleep(1)
         return response
 
     def deal_request(self, item):
+        if not self.search_key(item.get('title')):
+            return True
+
         retry_times = 0
         org_item = item.copy()
         while retry_times < 5:
@@ -90,7 +103,9 @@ class Details:
                 response = self.fetch_request(item)
                 res_code = response.status_code
                 if response and res_code == 200:
-                    self.detail_get(response, item=item)
+                    rr = self.detail_get(response, item=item)
+                    if rr and rr == "500":
+                        return "500"
                     return True
                 else:
                     retry_times += 1
@@ -103,10 +118,46 @@ class Details:
         logger.warning(f"[采集失败]{item.get('competehref')}")
         return False
 
+    def search_key(self, title):
+        keywords_list = ['数据中心', '视频会议系统', '大数据', '虚拟化', '服务器', '交换机', '防火墙', '入侵检测',
+                         '云计算',
+                         '学习室', '网络', '云桌面', '智慧教室', '网络设备', '路由器', '负载均衡', 'SDN', '国产化改造',
+                         '智能管理平台', 'IMC', '存储', '分布式', '网络资源管理', '人工智能', '数据中心', '信息化',
+                         '云办公', '磁盘阵列', 'GPU', '硬件建设', '超算', '高算', '模拟中心', '机房设备', '高性能',
+                         '条件建设', '配套系统', '平台建设', '数据', '通用平台', '数据采集', '智慧卫勤', '推演', '孪生',
+                         '仿真', '兵棋', '智慧校园', '数档', 'AI', '大模型', '模拟', '态势', '数据治理', '财务数据',
+                         '云办公', '入侵防御', '机房设备', '模拟中心', 'RoCE', 'HPC']
+
+        for keyword in keywords_list:
+            if keyword in title:
+                return True
+
+        return False
+
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-
-        with self.db_name.find({"parser_name": "ztpc_qjwqzbcgxxw", "is_crawl": False, "failed": False},
+        # time.sleep(random.randint(10,60))
+        with open('./cookies_info.json', 'r') as fr:
+            cks = json.loads(fr.read())
+        now_time = int(time.time())
+        login_time = cks.get('login_time')
+        total_count = cks.get('total_count')
+        self.cookies = cks.get('cookies')
+        self.count = cks.get('perday_count')
+
+        if login_time + 520000 < now_time:
+            logger.warning(" <<< cookies失效,更换cookies >>> ")
+            return
+        if total_count > 3000:
+            logger.warning(" <<< 本次登录采集上限,重新登陆继续采集 >>> ")
+            return
+
+        if self.count > 500:
+            logger.warning(" <<< 今日采集上限,明日继续采集 >>> ")
+            return
+
+        # with self.db_name.find({"parser_name": "ztpc_qjwqzbcgxxw", "is_crawl": False, "failed": False},
+        with self.db_name.find({"parser_name": "ztpc_qjwqzbcgxxw", "is_crawl": False,},
                                sort=[('publishtime', -1)]).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
@@ -114,13 +165,26 @@ class Details:
             update_id = item["_id"]
             result = self.deal_request(item)
             if result is True:
+                self.count += 1
                 self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
+            elif result == "500":
+                break
             else:
                 self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True}})
             time.sleep(random.randint(5, 10))
 
+        total_count += self.count
+        new_info = {
+            "cookies": self.cookies,
+            "total_count": total_count,
+            "perday_count": self.count,
+            "login_time": login_time
+        }
+        with open('./cookies_info.json', 'w+') as fw:
+            fw.write(json.dumps(new_info))
+
         logger.debug("********** 详情页采集结束 **********")
 
 
 if __name__ == "__main__":
-    Details().start(limit=50)
+    Details().start(limit=random.randint(60,100))

+ 3 - 3
lzz_theme/qjwqzbcgxxw/qjwqzb_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-02-11
+Created on 2025-04-25
 ---------
 @summary: 全军武器装备采购信息网 - 列表页
 ---------
@@ -140,8 +140,8 @@ if __name__ == '__main__':
     Menu = namedtuple('Menu', ['channel', 'spidercode', 'tid', 'cid', 'crawl_page'])
 
     menus = [
-        Menu('采购公告', 'a_qjwqzbcgxxw_cggg_jdgg', '1149231276155707394', 'cggg', 5),
-        Menu('采购公告', 'a_qjwqzbcgxxw_cggg_jdgg', '1149231318006472705', 'cggg', 10),
+        Menu('采购公告', 'a_qjwqzbcgxxw_cggg_jdgg', '1149231276155707394', 'cggg', 10),
+        Menu('采购公告', 'a_qjwqzbcgxxw_cggg_jdgg', '1149231318006472705', 'cggg', 30),
         Menu('采购需求', 'a_qjwqzbcgxxw_cgxq_jdxq', 'HZ287281676ce46401676cf0975c000e', 'cgxq', 1),
         Menu('采购需求', 'a_qjwqzbcgxxw_cgxq_jdxq', 'HZ287281676ce46401676cf59ca5001b', 'cgxq', 1),
     ]

+ 2 - 2
lzz_theme/sfc/sfc_cjgg_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-22
+Created on 2025-04-25
 ---------
 @summary: 苏服采 - 公告信息-成交公告
 ---------
@@ -22,7 +22,7 @@ warnings.filterwarnings('ignore')
 class Crawl_sfc:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = False
         self.py_spider = Mongo_client().py_spider
         self.zb_list = self.py_spider.theme_list
 

+ 1 - 1
lzz_theme/sfc/sfc_cookies.txt

@@ -1 +1 @@
-eyJhbGciOiJIUzI1NiJ9.eyJ1c2VyQ29udGV4dCI6IntcInVzZXJuYW1lXCI6XCJ0b3AxMjNcIixcIm5pY2tOYW1lXCI6XCLlvKDph5HlnaRcIixcImZhY2VcIjpcImdyb3VwMS9NMDAvMDUvMDQvd0tnQWNXZHpTeVNBUGhJc0FBQlVQVS16UUtNNy5uLmpwZyxncm91cDEvTTAwLzBGLzRDL3dLZ0FjR2R6U3ktQUlQSkRBQUE3bERVR3kyMDIubi5wbmdcIixcImlkXCI6XCIxODcxODU2NjE0MzQ2NjIwOTI4XCIsXCJsb25nVGVybVwiOmZhbHNlLFwicm9sZVwiOlwiTUVNQkVSXCIsXCJzdG9yZUlkXCI6XCIxODcxODYyOTM4MzE5ODU5NzE0XCIsXCJjbGVya0lkXCI6XCIxODczOTIzMjI4MDE2NDUxNTg2XCIsXCJzdG9yZU5hbWVcIjpcIuays-WNl-aLk-aZruiuoeeul-acuue9kee7nOW3peeoi-aciemZkOWFrOWPuFwiLFwiY29tcGFueVNjYWxlXCI6XCJtZWR1aW1cIixcImNvbXBhbnlOYW1lXCI6XCLmsrPljZfmi5Pmma7orqHnrpfmnLrnvZHnu5zlt6XnqIvmnInpmZDlhazlj7hcIixcImlzU3VwZXJcIjp0cnVlLFwiYXJlYUNvZGVcIjpcIjQxMDEwNVwiLFwibGFzdExvZ2luRGF0ZVwiOlwiQXByIDEsIDIwMjUgOToxMToxMCBBTVwiLFwicmVtb3RlSXBcIjpcIjEwMS4yMDAuMjA5LjExXCJ9Iiwic3ViIjoidG9wMTIzIiwiZXhwIjoxNzQzNTEzMDcyfQ.dXqdBhvxp7_nrtFLgbV589zwmjj9FriCP75mhyBgpbU
+eyJhbGciOiJIUzI1NiJ9.eyJ1c2VyQ29udGV4dCI6IntcInVzZXJuYW1lXCI6XCJ0b3AxMjNcIixcIm5pY2tOYW1lXCI6XCLlvKDph5HlnaRcIixcImZhY2VcIjpcImdyb3VwMS9NMDAvMDUvMDQvd0tnQWNXZHpTeVNBUGhJc0FBQlVQVS16UUtNNy5uLmpwZyxncm91cDEvTTAwLzBGLzRDL3dLZ0FjR2R6U3ktQUlQSkRBQUE3bERVR3kyMDIubi5wbmdcIixcImlkXCI6XCIxODcxODU2NjE0MzQ2NjIwOTI4XCIsXCJsb25nVGVybVwiOmZhbHNlLFwicm9sZVwiOlwiTUVNQkVSXCIsXCJzdG9yZUlkXCI6XCIxODcxODYyOTM4MzE5ODU5NzE0XCIsXCJjbGVya0lkXCI6XCIxODczOTIzMjI4MDE2NDUxNTg2XCIsXCJzdG9yZU5hbWVcIjpcIuays-WNl-aLk-aZruiuoeeul-acuue9kee7nOW3peeoi-aciemZkOWFrOWPuFwiLFwiY29tcGFueVNjYWxlXCI6XCJtZWR1aW1cIixcImNvbXBhbnlOYW1lXCI6XCLmsrPljZfmi5Pmma7orqHnrpfmnLrnvZHnu5zlt6XnqIvmnInpmZDlhazlj7hcIixcImlzU3VwZXJcIjp0cnVlLFwiYXJlYUNvZGVcIjpcIjQxMDEwNVwiLFwibGFzdExvZ2luRGF0ZVwiOlwiQXByIDI5LCAyMDI1IDM6MTE6MTggUE1cIixcInJlbW90ZUlwXCI6XCIxMDEuMjAwLjIwOS4xMVwifSIsInN1YiI6InRvcDEyMyIsImV4cCI6MTc0NTk4OTg3MX0.zyZFe5qkUIijvGqNqrclaE47JbG6HEdi5qy5HQ2Cdj0

+ 2 - 2
lzz_theme/sfc/sfc_gkbx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-23
+Created on 2025-04-25
 ---------
 @summary: 苏服采 - 公告信息-公开比选
 ---------
@@ -23,7 +23,7 @@ warnings.filterwarnings('ignore')
 class Crawl_sfc:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = False
         self.py_spider = Mongo_client().py_spider
         self.zb_list = self.py_spider.theme_list
 

+ 2 - 2
lzz_theme/sfc/sfc_gzgg_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-23
+Created on 2025-04-25
 ---------
 @summary: 苏服采 - 公告信息-更正公告
 ---------
@@ -23,7 +23,7 @@ warnings.filterwarnings('ignore')
 class Crawl_sfc:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = False
         self.py_spider = Mongo_client().py_spider
         self.zb_list = self.py_spider.theme_list
 

+ 2 - 2
lzz_theme/sfc/sfc_htgg_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-23
+Created on 2025-04-25
 ---------
 @summary: 苏服采 - 公告信息-合同公告
 ---------
@@ -22,7 +22,7 @@ warnings.filterwarnings('ignore')
 class Crawl_sfc:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = False
         self.py_spider = Mongo_client().py_spider
         self.zb_list = self.py_spider.theme_list
 

+ 1 - 1
lzz_theme/sfc/sfc_uuid.txt

@@ -1 +1 @@
-91600922-8bd3-46cc-b8fc-ea99bb7173c0
+5244fd4b-b1c2-4f66-98e2-d32175e29a7e

+ 1 - 1
lzz_theme/sfc/sfc_zzgg_detail.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-23
+Created on 2025-04-25
 ---------
 @summary: 苏服采 - 详情页
 ---------

+ 2 - 2
lzz_theme/sfc/sfc_zzgg_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-23
+Created on 2025-04-25
 ---------
 @summary: 苏服采 - 公告信息-终止公告
 ---------
@@ -22,7 +22,7 @@ warnings.filterwarnings('ignore')
 class Crawl_sfc:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = False
         self.py_spider = Mongo_client().py_spider
         self.zb_list = self.py_spider.theme_list
 

BIN
lzz_theme/sfc/slice.png


BIN
lzz_theme/szycycgpt/slice.png


+ 1 - 1
lzz_theme/tjszfcgw/tjszfcgw_list.py

@@ -74,7 +74,6 @@ class Crawl_Tjs:
 
     def fetch_list_page(self, page, menu, ss):
         logger.debug(f' *** 开始采集第{page}页 ***')
-
         headers = {
             "Accept": "*/*",
             "Accept-Language": "zh-CN,zh;q=0.9",
@@ -113,6 +112,7 @@ class Crawl_Tjs:
     def parser_list_page(self, response, page, menu):
         if '您的访问过于频繁,请稍后再试' in response.text:
             raise RequestException('您的访问过于频繁,请稍后再试')
+
         results_list = []
         info_list = Selector(response.text).xpath('//ul[@class="dataList"]/li')
         for info in info_list:

+ 1 - 1
lzz_theme/utils/chaojiying.py

@@ -37,7 +37,7 @@ def postpic(im, codetype=6001, jy_code=None):
     pic_type = codetype
     files = _pack_file(file)
     url = f"{JY_SERVER}/v1/images/discern?pic_type={pic_type}"
-    if spidercode is not None:
+    if jy_code is not None:
         url = f"{JY_SERVER}/v1/images/discern?pic_type={pic_type}&jy_code={jy_code}"
 
     data = {

+ 9 - 12
lzz_theme/utils/tools.py

@@ -6,26 +6,23 @@ Created on 2024-04-09
 ---------
 @author: Lzz
 """
-import sys
-import os
 
-sys.path.append(os.path.dirname(os.getcwd()))
+import calendar
+import datetime
+import functools
+import hashlib
+import random
 import re
 import time
+from collections import namedtuple
+
 import bson
+import execjs
 import redis
 import requests
-import datetime
-import calendar
-import hashlib
-import random
-import execjs
-import functools
-from hashlib import md5
 from loguru import logger
-from collections import namedtuple
 from pymongo import MongoClient
-from pymongo.errors import DuplicateKeyError
+
 from .clean_html import cleaner
 
 SearchText = namedtuple('SearchText', ['total'])

+ 3 - 1
lzz_theme/xgyyglj/xgyy_spider_details.py

@@ -9,6 +9,8 @@ Created on 2024-01-23
 import sys
 import os
 
+from pymongo.errors import DuplicateKeyError
+
 sys.path.append(os.path.dirname(os.getcwd()))
 import random
 from utils.clean_html import cleaner
@@ -53,7 +55,7 @@ class Details:
             if info:
                 if re.search('\w', info):
                     # tran_info = BD_spider().baidu(info)
-                    new_data_list.append(tran_info)
+                    new_data_list.append(info)
                 else:
                     new_data_list.append(info)
 

+ 2 - 0
lzz_theme/ynszfcgw/yncgyx_details.py

@@ -9,6 +9,8 @@ Created on 2024-09-22
 import sys
 import os
 
+from pymongo.errors import DuplicateKeyError
+
 sys.path.append(os.path.dirname(os.getcwd()))
 from utils.attachment import AttachmentDownloader
 from get_cookies import get_ck

+ 1 - 1
lzz_theme/yyc/yyc_ck.json

@@ -1 +1 @@
-{"at": "7d4ad986-5a99-4c7b-831d-b37f362e932d", "CASPRIVACY": "true", "JSESSIONID": "F30F44761B9C433B79395D903BC4107D", "_yht_code_uuid": "84e9f91d-1588-4fa0-944c-d21753321a7b", "acw_tc": "276082a017434638222263777e08ba224eac08a4c4507ffbdf4c0722b44f85", "redirect_url": "https%3A%2F%2Fyc.yonyoucloud.com%2Fyuncai%2FSSO%2Flogin.jsp%3Fr%3DL3l1bmNhaS9wb3J0YWw%26", "yht_default_country": "86", "yht_default_login_type": "mobile", "TGC": "eyJhbGciOiJIUzUxMiJ9.ZXlKaGJHY2lPaUprYVhJaUxDSmxibU1pT2lKQk1USTRRMEpETFVoVE1qVTJJbjAuLm9iOFdfX3I4Um5jc29udTRDeU9nWWcuTDdKSENPM25ybVRMejg3SzZVZmVsZC1BMjVJNUl6X0MxUG1LN191VXZwVkRiTkl2RkJmVHhMNkZoSHU1OFVqemRoRExMYS1kVUszUTZramRnVWdZVmpSSEdycm1fbmwtUEg0SUV4cmFoN0NoMHlPa0dtS29rdTZnNFJCZlI2R2xoTGRqOEk1SmhfS2x2R2JzM2xwbUJ4bnVIMjFoWVF4NVpkOHlIVFNQRGFlZ3lwNUVJbGhPOVJaZGlIZ29YdU9DV0NNeGhjOU4xUmp3dUJKcWg4dzAyM1psWHlJNXF6S1UyQUoyOUlGRU1ydy4tcGxZSy1VNWozS1FJbXJpb05WVmN3.RC1N-ZCjPhL5VrjSJl09Vwgew2JllSG95dC9N_dWhXVe0b0oAXii3rWW9O6aBIPZUD_-jRinpAlM1QbSThi_Zg", "HWWAFSESID": "aeaf773723d844176b", "HWWAFSESTIME": "1743463823899", "m_biztier_sign": "", "tenantid": "sfg8fw7k", "token": "d2ViLDM2MDAsaFdVRGxxMDZIRjBIZVZPSmpaNEtnMzJTbmkrVjFObC9MWVZhazR6SFo5Tk0vYldqdWw1YmVKcW1qYVF6cE1HYmJ0cjlPMEJkRFBacW9MSG5xdDA4cnc9PQ", "u_logints": "1743463824057", "u_usercode": "4a7ecbb9-8ca7-4b0c-b9d6-7fad20374565", "ucuserid": "4a7ecbb9-8ca7-4b0c-b9d6-7fad20374565", "yht_tenantinfo": "sfg8fw7k", "yht_username": "ST-45865926-uOzMXO7oKq3hwBsceflR-online__4a7ecbb9-8ca7-4b0c-b9d6-7fad20374565", "yht_usertoken": "aF%2FBoZO8QO0DytaTIR8tCLDIDsQ11t4VAcmJgZPy5FeN0muGmz8hBWvrg4oxhEsktfdWG59X59j94JnbXGnOug%3D%3D"}
+{"at": "90f15fad-464b-46d9-90f6-0779efac8838", "CASPRIVACY": "true", "JSESSIONID": "48BEFBFE5DFA3480704F86C8A5C7B8E4", "_yht_code_uuid": "bc58fb54-e5b8-4f37-904a-e7d6a769bcf2", "acw_tc": "1a0c660617459694170004591e0080f11fd1bf5dcd84714fa4107f2fd9b68b", "redirect_url": "https%3A%2F%2Fyc.yonyoucloud.com%2Fyuncai%2FSSO%2Flogin.jsp%3Fr%3DL3l1bmNhaS9wb3J0YWw%26", "yht_default_country": "86", "yht_default_login_type": "mobile", "TGC": "eyJhbGciOiJIUzUxMiJ9.ZXlKaGJHY2lPaUprYVhJaUxDSmxibU1pT2lKQk1USTRRMEpETFVoVE1qVTJJbjAuLjVieGdqR1NCUU5yMGo5bXF4MTVVckEuSG9scFhCb0NvWjlvS1Yxb0NMWFpCT25oZ2NFY2dwLXkwZHJTRXljTW1uUE41ZUdvblRDWGFVYS1FU2FOMlp6VVVJTEw1UXROWUh2TmlSb0MxaHFXcWJrclF0UVRZZk9kMjNHajBPcUlieENYSUFLTVg0WEdUcUthM0MzcWNLRGF1SUhfUmZXWjJPdldlNUV0M2M3ZFFCbVI0WVNRMjVNbkJNUk1nSmRxZFdqdnNnZ1dncXV5QkZvVDNvNUplRGNuY2d5X3Z4dHVmQ2ZURDJ4Q0l3T1Y2WjV0bUtyd3RMb1dwQXN4d1JFaElVWS55Y1VJREpweUVZOXhxZzIxSTcyOUdn.1wtIlFSv1d1TTlVHH9sIu-ofSqe9TzmZZtNfKovcJcG6QRDeVAvVw3P73VeTD-eib34-MVsShngMH6ljhByK0w", "HWWAFSESID": "5918073b820142ca8e", "HWWAFSESTIME": "1745969419199", "m_biztier_sign": "", "tenantid": "sfg8fw7k", "token": "d2ViLDM2MDAsZHlrOVE1ditCS2NLYmo2T3E4cWxNK2QycEZLMEYwOVduL3BpQlJwWWxHQ2FRSVN3c3UwRE9QdmtyVU5uaUFUalBaRW1yV3FSdWR6c04vcWNsL1gwWWc9PQ", "u_logints": "1745969419374", "u_usercode": "4a7ecbb9-8ca7-4b0c-b9d6-7fad20374565", "ucuserid": "4a7ecbb9-8ca7-4b0c-b9d6-7fad20374565", "yht_tenantinfo": "sfg8fw7k", "yht_username": "ST-792310246-zbY6PQyFAiujZsufweDo-online__4a7ecbb9-8ca7-4b0c-b9d6-7fad20374565", "yht_usertoken": "57ghhL3Aa%2FvJvHSzEvRpoMa6wlHafIuMvislcVetUi76K3VhSqdqbv3iGum8eQuENN5WcZCADqU7YusJ2srprw%3D%3D"}

+ 0 - 1
lzz_theme/yzcbjkjfzyxgs/yzc_details.py

@@ -54,7 +54,6 @@ class Details:
         response.encoding = response.apparent_encoding
         root = Selector(response.text)
         html = "".join(root.xpath('//div[@class="bulletin-detail"]|//div[@class="pane"]').extract())
-
         ex_str = "(1)登录“优质采云采购平台”(https://www.youzhicai.com/)公告查看页面点击“我要报价”。请未注册的供应商及时办理注册审核,注册咨询电话:400-0099-555。因未及时办理注册审核手续影响报价的,责任自负。"
         #  ex_str,'优质采云采购平台','https://www.youzhicai.com/','400-0099-555'
         rm_list = ['//div[@id="companyinfodetail"]', '//div[@class="deal-something"]',

+ 0 - 2
lzz_theme/yzcbjkjfzyxgs/yzc_list.py

@@ -49,9 +49,7 @@ class Spdier:
             "upgrade-insecure-requests": "1",
             "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
         }
-
         url = "https://www.youzhicai.com/s/0_0_0_0_.html?key="
-
         data = {
             "MsStartDate": get_today_of_day(),
             "MsEndDate": get_today_of_day(),

+ 1 - 0
lzz_theme/yzcbjkjfzyxgs/yzcbjkjfzyxgs.js

@@ -12,6 +12,7 @@ var _0x1d91 = function (_0x213c47, _0x709663) {
     var _0x1a538d = _0x1a53[_0x213c47];
     return _0x1a538d;
 };
+
 (function (_0x1ebd30, _0x53e814) {
     var _0x38c256 = _0x1d91;
     while (!![]) {

+ 1 - 1
lzz_theme/yzcbjkjfzyxgs/yzcbjkjfzyxgs_ck.py

@@ -11,6 +11,7 @@ def create_cookie(proxies=False):
 
     username = "13938526112"
     password = "Hftp0304"
+
     logger.debug(f" >>> 登录账号:{username} ...")
     session = requests.session()
     session.proxies = proxies
@@ -37,7 +38,6 @@ def create_cookie(proxies=False):
         data1 = json.dumps(data1, separators=(',', ':'))
         res = session.post(url1, headers=headers, cookies=cks, data=data1)
         lid = res.json().get('data')[0].get('id')
-
         url = "https://www.youzhicai.com/login-api/logInVerify/accountLogInVerify"
         data = {
             "id": lid,

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/zgdk_cookies.json

@@ -1 +1 @@
-{'user': '91110105756025873C', 'token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiI5MTExMDEwNTc1NjAyNTg3M0MiLCJleHAiOjE3NDM0OTgzNjIsImlhdCI6MTc0MzQ5MTE2MiwianRpIjoiYjI2NTg0YWEtM2QzNC00NGMwLThmYzEtNDdlZjIyMjIzMTRmIn0.r2c7W21tfZcZXj1uDVbzUQ7IHzPbRnhsrik4QWoP_wjo8xG9tShb5oedaBcgJ2FNrDTQ-SYqZL2o6VV01PGscQ', 'expire_time': 1743491411}
+{'user': '91110105756025873C', 'token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiI5MTExMDEwNTc1NjAyNTg3M0MiLCJleHAiOjE3NDU5ODIzMzYsImlhdCI6MTc0NTk3NTEzNiwianRpIjoiNDVkYTcxZTktZWQ0YS00YTA2LTliYTAtZTYxOGM3NzUzMGY3In0.XbYEnfj4EKp_9a8nD-xrKKgwr9ulabHMf0-xuCG4kVNy8cxLPCUa8vdMVXQ3x3xohCih8ACBhfNDBD75XcqfWA', 'expire_time': 1745975407}

+ 1 - 1
lzz_theme/zgwkjtyxgs/list_spider.py

@@ -34,6 +34,7 @@ class Crawl_Zgwk:
     def get_params(self, data, publikey):
         with open('zgwkjtyxgscgdzswpt.js', 'r') as f:
             exjs = f.read()
+
         ctcx = execjs.compile(exjs)
         param = ctcx.call('encryptParam', data, publikey)
         return param
@@ -51,7 +52,6 @@ class Crawl_Zgwk:
             "Pragma": "no-cache",
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
         }
-
         pk_url = "https://ec.minmetals.com.cn/open/homepage/public"
         pk_res = session.get(pk_url, headers=headers, proxies=self.proxy, timeout=60, verify=False)
         publikey = pk_res.text

+ 1 - 1
lzz_theme/zgwkjtyxgs/zgwkjtyxgs_details.py

@@ -41,6 +41,7 @@ class Details:
     def get_params(self, data, publikey):
         with open('zgwkjtyxgscgdzswpt.js', 'r') as f:
             exjs = f.read()
+
         ctcx = execjs.compile(exjs)
         param = ctcx.call('encryptParam', data, publikey)
         return param
@@ -50,7 +51,6 @@ class Details:
 
         if "jpgg" in item["href"]:
             detail_info = response.json().get('obj').get('info')
-
             if detail_info.get('offerTimes') == "0":
                 offerTimes = "一轮报价"
             else:

+ 1 - 1
lzz_theme/zgwkjtyxgs/zgwkjtyxgs_details2.py

@@ -41,6 +41,7 @@ class Details:
     def get_params(self, data, publikey):
         with open('zgwkjtyxgscgdzswpt.js', 'r') as f:
             exjs = f.read()
+
         ctcx = execjs.compile(exjs)
         param = ctcx.call('encryptParam', data, publikey)
         return param
@@ -50,7 +51,6 @@ class Details:
 
         if "jpgg" in item["href"]:
             detail_info = response.json().get('obj').get('info')
-
             if detail_info.get('offerTimes') == "0":
                 offerTimes = "一轮报价"
             else:

+ 1 - 2
lzz_theme/zgzbtbggfwpt/history_crawl.py

@@ -20,5 +20,4 @@ if __name__ == '__main__':
         Menu('中标候选人公示', 'a_zgzbtbggfwpt_zhbhxrgs2', '91', 'candidate', 50),
         Menu('资格预审公告', 'a_zgzbtbggfwpt_zgysgg2', '92', 'qualify', 10),
     ]
-
-    Crawl_Zgzb(menus).start()
+    Crawl_Zgzb(menus, threads=10, interval=0.8).start()

+ 131 - 0
lzz_theme/zgzbtbggfwpt/list_spider.py

@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-02
+---------
+@summary: 中国招标投标公共服务平台 - 列表页
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+from utils.RedisDB import RedisFilter
+from parsel import Selector
+from collections import namedtuple
+from utils.tools import *
+import requests
+
+
+class Crawl_Zgzb:
+
+    def __init__(self):
+        self.py_spider = Mongo_client().py_spider
+        self.zb_list = self.py_spider.theme_list
+        self.RDS = RedisFilter()
+        self.real_cont = 0
+
+    def get_time__2652(self, page=None, cid=None, rid=None):
+        with open('./zgzbtbggfwpt_pm.js', 'r') as fr:
+            ex_js = fr.read()
+        ctx = execjs.compile(ex_js)
+        return ctx.call('tm', page, cid, rid)
+
+    def fetch_list_page(self, page, menu):
+        logger.debug(f' *** 开始采集第{page}页 ***')
+
+        headers = {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+        }
+
+        params = {
+            "searchDate": "2000-04-02",
+            "dates": "300",
+            "word": "",
+            "categoryId": menu.cid,
+            "industryName": "",
+            "area": "",
+            "status": "",
+            "publishMedia": "",
+            "sourceInfo": "",
+            "showStatus": "1",
+            "page": f"{page}",
+            "time__2652": self.get_time__2652(page, menu.cid, menu.rid),
+        }
+
+        url = f"https://bulletin.cebpubservice.com/xxfbcmses/search/{menu.rid}.html"
+        resp = requests.get(url, headers=headers, params=params, proxies=get_QGIP(), timeout=30, verify=False)
+        time.sleep(1)
+        return resp
+
+    def parser_list_page(self, response, page, menu):
+        results_list = []
+        info_list = Selector(response.text).xpath('//table[@class="table_text"]/tr')
+        for info in info_list[1:]:
+            h_org = info.xpath('./td[1]/a/@href').extract_first()
+            href = "".join(re.findall("javascript:urlOpen\('(.*?)'", h_org))
+            title = info.xpath('./td[1]/a/@title').extract_first("").strip()
+            area = info.xpath('./td[3]/span/@title').extract_first("").strip()
+            create_time = info.xpath('./td[5]/text()').extract_first("").strip()
+            if not create_time:
+                create_time = info.xpath('./td[4]/text()').extract_first("").strip()
+
+            dedup = [href]
+            if not self.RDS.data_filter(dedup):
+                item = {
+                    "site": "中国招标投标公共服务平台",
+                    "channel": menu.channel,
+                    "spidercode": menu.spidercode,
+                    "area": area or "全国",
+                    "city": "",
+                    "district": "",
+                    "href": href,
+                    "title": title,
+                    "publishtime": create_time,
+                    "parse_url": href,
+                    "parser_name": "ztpc_zgzbtbggfwpt",
+                    "is_mixed": False,
+                    "is_theme": True,
+                    "retry": 0,
+                    "comeintime": int2long(int(time.time())),
+                    "is_crawl": False,
+                    "failed": False,
+                }
+
+                self.zb_list.insert_one(item)
+                self.RDS.data_save_redis(dedup)
+                results_list.append(item)
+
+        logger.info(f' *** 第{page}页采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
+        return results_list
+
+    def crawl_list_spider(self, page, menu):
+        retry_times = 0
+        while (retry_times := retry_times + 1) < 3:
+            try:
+                response = self.fetch_list_page(page=page, menu=menu)
+                res_code = response.status_code
+                logger.debug(f"第{page}页 状态码:{res_code}")
+                if response is not None and res_code == 200:
+                    informations = self.parser_list_page(response=response, page=page, menu=menu)
+                    self.real_cont += len(informations)
+                    logger.info(f"当前已采集 {self.real_cont} 条数据")
+                    time.sleep(random.random())
+                    break
+            except Exception as e:
+                logger.error(f"第{page}页 采集异常:{e}")
+
+    def start_list(self, menus):
+        logger.debug("********** 列表页开始 **********")
+        for menu in menus:
+            logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
+            page = menu.crawl_page
+            for page in range(1, page + 1):
+                self.crawl_list_spider(page=page, menu=menu)
+            logger.debug(f"------ {menu.channel} 采集结束 ------")
+
+        logger.debug("********** 列表页结束 **********")

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd.js

@@ -10,9 +10,9 @@ window = dom.window;
 
 var JSEncrypt = require("jsencrypt");
 
-var keyStr2 = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCinafk2x6vbwgDnF9/IHGruC8TRzYozCjKc0wMU18mcA2k/LNQcIpG6XPNHf1IEw57uhU6rb4x1tBDGTqiuIPUh05vIvX+4x4jJ1su2IaDgPIw2Z2JrTon1HtmTEXuJOcjX24sGz1kMOAeOTRbboMSVQ2E/xoIbxo8daReirNdgwIDAQAB";
 
 function akey(t) {
+    var keyStr2 = "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCinafk2x6vbwgDnF9/IHGruC8TRzYozCjKc0wMU18mcA2k/LNQcIpG6XPNHf1IEw57uhU6rb4x1tBDGTqiuIPUh05vIvX+4x4jJ1su2IaDgPIw2Z2JrTon1HtmTEXuJOcjX24sGz1kMOAeOTRbboMSVQ2E/xoIbxo8daReirNdgwIDAQAB";
     var encrypt = new JSEncrypt();
     encrypt.setPublicKey(keyStr2);
     return encrypt.encrypt(t);

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_byx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "A11CAE4A3DFDBFB499DEC46A4366984E", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "D3D8A41D5CDD91C065B49715B1970012", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 3 - 6
lzz_theme/zmdszfcgdzsc/zmd_byx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城泌阳县
 ---------
@@ -38,7 +38,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -52,7 +51,6 @@ class Details:
         buycount = root.xpath('//i[@class="buycount"]/text()').extract_first(0)
         sAccount = root.xpath('//i[@class="onepice"]/text()').extract_first(0)
         singleAccount = round((float(sAccount) * float(buycount)), 2)
-
         try:
             aa = float(re.findall('	allAccount = (.*?);', text)[0])
         except:
@@ -143,9 +141,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_byx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_byx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 2
lzz_theme/zmdszfcgdzsc/zmd_byx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城泌阳县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_gxq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "8796E7F57E455D16A692F7527DE10519", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "05BBD0A4A75ACC3249EF1E9CD665C0A9", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 9
lzz_theme/zmdszfcgdzsc/zmd_gxq_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城高新区
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "gxq"
+        self.host = "zmdgxq.zmddzsc.cn"
         self.url = "https://zmdgxq.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -52,7 +52,6 @@ class Details:
         buycount = root.xpath('//i[@class="buycount"]/text()').extract_first(0)
         sAccount = root.xpath('//i[@class="onepice"]/text()').extract_first(0)
         singleAccount = round((float(sAccount) * float(buycount)), 2)
-
         try:
             aa = float(re.findall('	allAccount = (.*?);', text)[0])
         except:
@@ -83,9 +82,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdgxq.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +141,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_gxq", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_gxq", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 3
lzz_theme/zmdszfcgdzsc/zmd_gxq_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城高新区
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdgxq.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_kfq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "7CEE238AC249492BE60530AE148A1CFE", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "CE7686FBE7906F5C2DD7C20877A1ECC3", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 9
lzz_theme/zmdszfcgdzsc/zmd_kfq_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城开发区
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "kfq"
+        self.host = "zmdkfq.zmddzsc.cn"
         self.url = "https://zmdkfq.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -52,7 +52,6 @@ class Details:
         buycount = root.xpath('//i[@class="buycount"]/text()').extract_first(0)
         sAccount = root.xpath('//i[@class="onepice"]/text()').extract_first(0)
         singleAccount = round((float(sAccount) * float(buycount)), 2)
-
         try:
             aa = float(re.findall('	allAccount = (.*?);', text)[0])
         except:
@@ -83,9 +82,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdkfq.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +141,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_kfq", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_kfq", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 3
lzz_theme/zmdszfcgdzsc/zmd_kfq_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城开发区
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdkfq.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 65 - 71
lzz_theme/zmdszfcgdzsc/zmd_login.py

@@ -1,3 +1,4 @@
+import random
 import re
 import time
 import execjs
@@ -18,7 +19,7 @@ def encrypt_pwd(pwd):
 
 
 def create_cookie(url, spath, proxies=False):
-    time.sleep(5)
+    time.sleep(random.randint(3, 10))
     username = "18803933603"
     password = "Jianyu@2022!"
 
@@ -32,7 +33,6 @@ def create_cookie(url, spath, proxies=False):
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
             "X-Requested-With": "XMLHttpRequest",
         }
-
         data = {
             "loginName": username,
             "cpassword": encrypt_pwd(password),
@@ -45,12 +45,13 @@ def create_cookie(url, spath, proxies=False):
         with open(f'./zmd_{spath}_ck.json', 'w', encoding='utf-8') as fw:
             fw.write(json.dumps(cookies))
         logger.success(f"{username} 登录成功!")
+
         return cookies
     except Exception as e:
         logger.error(f"[登录失败] {e}")
 
 
-def get_bj_html(hid, distributorid, host):
+def get_gys_html(hid, root, host):
     headers = {
         "Accept": "application/json, text/javascript, */*; q=0.01",
         "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
@@ -59,56 +60,58 @@ def get_bj_html(hid, distributorid, host):
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
         "X-Requested-With": "XMLHttpRequest",
     }
-
-    url = f"https://{host}/notice/getNoticeTenderHistory.action"
+    url = f"https://{host}/notice/getUnitTenderHistory.action"
     params = {
         "cpId": f"{hid}",
         "finalval": "1"
     }
     try:
         response = requests.get(url, headers=headers, params=params, proxies=False, timeout=20, verify=False)
-        flag = 0
         total_tmp = ""
+        flag = 0
+        source_page = root.xpath(
+            '//div[@class="w1190"]//ul[@class="quote-recordsunit-wrapper"]/table/tbody/tr').extract()
         for bb in response.json()[0]:
-            if distributorid == str(bb.get('distributorid')):
-                if flag == 0:
-                    tmp = f'''
-                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} 提交报价¥{bb.get('tendermoney')} 竞价成交</i></span></li>
-                    '''
-                else:
-                    tmp = f'''
-                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} {bb.get('tendermoney')}</i></span></li>
-                    '''
-                flag += 1
-            else:
-                if bb.get('tendermoney') == "join":
-                    tmp = f'''
-                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} 参与竞价,未报价</i></span></li>
-                    '''
-                else:
-                    tmp = f'''
-                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} 提交报价¥{bb.get('tendermoney')}</i></span></li>
-                    '''
-            total_tmp += tmp
-
+            ss_page = source_page[flag]
+            cdid = bb.get('cdid')
+            ss_page = ss_page.replace(f'<i id="productname{cdid}"></i>', f"<i>{bb.get('productname')}</i>")
+            ss_page = ss_page.replace(f'<i id="brand{cdid}"></i>', f"<i>{bb.get('brand')}</i>")
+            ss_page = ss_page.replace(f'<i id="model{cdid}"></i>', f"<i>{bb.get('model')}</i>")
+            tdm = "".join(re.findall(f'<i id="single{cdid}">(.*?)</i>', ss_page))
+            ss_page = ss_page.replace(f'<i id="single{cdid}">{tdm}</i>', f"<i>{bb.get('tendermoney')}</i>")
+            total_tmp += ss_page
+            flag += 1
         if total_tmp:
-            bj_html = f'''
+            gys_html = f'''
             <div>
-                <p>全部报价记录</p>
-                <ul>
+                <p>成交供应商详细报价</p>
+                <table width="100%">
+                    <thead>
+                    <tr>
+                        <th width="15%">品目</th>
+                        <th width="15%">商品名称</th>
+                        <th width="10%">品牌</th>
+                        <th width="10%">型号</th>
+                        <th width="5%">竞价限制单价</th>
+                        <th width="3%">数量</th>
+                        <th width="5%">报价单价</th>
+                    </tr>
+                    </thead>
+                    <tbody>
                     {total_tmp}
-                </ul>
+                    </tbody>
+                </table>
             </div>
             '''
         else:
-            bj_html = ""
+            gys_html = ""
 
-        return bj_html
+        return gys_html
     except Exception as e:
         return ""
 
 
-def get_gys_html(hid, root, host):
+def get_bj_html(hid, distributorid, host, proxies=False):
     headers = {
         "Accept": "application/json, text/javascript, */*; q=0.01",
         "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
@@ -117,56 +120,47 @@ def get_gys_html(hid, root, host):
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
         "X-Requested-With": "XMLHttpRequest",
     }
-
-    url = f"https://{host}/notice/getUnitTenderHistory.action"
+    url = f"https://{host}/notice/getNoticeTenderHistory.action"
     params = {
         "cpId": f"{hid}",
         "finalval": "1"
     }
     try:
-        response = requests.get(url, headers=headers, params=params, proxies=False, timeout=20, verify=False)
-        total_tmp = ""
+        response = requests.get(url, headers=headers, params=params, proxies=proxies, timeout=20, verify=False)
         flag = 0
-        source_page = root.xpath('//div[@class="w1190"]//ul[@class="quote-recordsunit-wrapper"]/table/tbody/tr').extract()
+        total_tmp = ""
         for bb in response.json()[0]:
-            ss_page = source_page[flag]
-            cdid = bb.get('cdid')
-            ss_page = ss_page.replace(f'<i id="productname{cdid}"></i>', f"<i>{bb.get('productname')}</i>")
-            ss_page = ss_page.replace(f'<i id="brand{cdid}"></i>', f"<i>{bb.get('brand')}</i>")
-            ss_page = ss_page.replace(f'<i id="model{cdid}"></i>', f"<i>{bb.get('model')}</i>")
-            tdm = "".join(re.findall(f'<i id="single{cdid}">(.*?)</i>',ss_page))
-            ss_page = ss_page.replace(f'<i id="single{cdid}">{tdm}</i>', f"<i>{bb.get('tendermoney')}</i>")
-            total_tmp += ss_page
-            flag += 1
+            if distributorid == str(bb.get('distributorid')):
+                if flag == 0:
+                    tmp = f'''
+                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} 提交报价¥{bb.get('tendermoney')} 竞价成交</i></span></li>
+                    '''
+                else:
+                    tmp = f'''
+                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} {bb.get('tendermoney')}</i></span></li>
+                    '''
+                flag += 1
+            else:
+                if bb.get('tendermoney') == "join":
+                    tmp = f'''
+                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} 参与竞价,未报价</i></span></li>
+                    '''
+                else:
+                    tmp = f'''
+                    <li><span><i class="">{bb.get('distributorName')} {bb.get('tendertime')} 提交报价¥{bb.get('tendermoney')}</i></span></li>
+                    '''
+            total_tmp += tmp
 
         if total_tmp:
-            gys_html = f'''
+            bj_html = f'''
             <div>
-                <p>成交供应商详细报价</p>
-                <ul>
-                    <table width="100%">
-                        <thead>
-                        <tr>
-                            <th width="15%">品目</th>
-                            <th width="15%">商品名称</th>
-                            <th width="10%">品牌</th>
-                            <th width="10%">型号</th>
-                            <th width="5%">竞价限制单价</th>
-                            <th width="3%">数量</th>
-                            <th width="5%">报价单价</th>
-                        </tr>
-                        </thead>
-                        <tbody>
-                        {total_tmp}
-                        </tbody>
-                    </table>
-                </ul>
+                <p>全部报价记录</p>
+                {total_tmp}
             </div>
             '''
         else:
-            gys_html = ""
+            bj_html = ""
 
-        return gys_html
+        return bj_html
     except Exception as e:
         return ""
-

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_pyx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "941B66B68E5D100427C374EB1CE237CB", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "F5B56265992BFA700D8E8647C7864AA5", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_pyx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城平舆县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "pyx"
+        self.host = "zmdpy.zmddzsc.cn"
         self.url = "https://zmdpy.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -52,7 +53,6 @@ class Details:
         buycount = root.xpath('//i[@class="buycount"]/text()').extract_first(0)
         sAccount = root.xpath('//i[@class="onepice"]/text()').extract_first(0)
         singleAccount = round((float(sAccount) * float(buycount)), 2)
-
         try:
             aa = float(re.findall('	allAccount = (.*?);', text)[0])
         except:
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdpy.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_pyx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_pyx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 3
lzz_theme/zmdszfcgdzsc/zmd_pyx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城平舆县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdpy.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_qsx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "2A88E5F5A2F4C9E68527DC4E0DFE1E43", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "292A7D5432869FF8A0B36D0FC9900A17", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 7
lzz_theme/zmdszfcgdzsc/zmd_qsx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城确山县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "qsx"
+        self.host = "zmdqs.zmddzsc.cn"
         self.url = "https://zmdqs.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -83,9 +84,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdqs.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +143,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_qsx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_qsx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 1 - 2
lzz_theme/zmdszfcgdzsc/zmd_qsx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城确山县
 ---------
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdqs.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_qyzq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "3F6D0C53BCB56CAAFB91D13117350595", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "835FA2756DFD26DF944F870456B4D5E9", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_qyzq_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城企业专区
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "qyzq"
+        self.host = "zmdqyzq.zmddzsc.cn"
         self.url = "https://zmdqyzq.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -52,7 +53,6 @@ class Details:
         buycount = root.xpath('//i[@class="buycount"]/text()').extract_first(0)
         sAccount = root.xpath('//i[@class="onepice"]/text()').extract_first(0)
         singleAccount = round((float(sAccount) * float(buycount)), 2)
-
         try:
             aa = float(re.findall('	allAccount = (.*?);', text)[0])
         except:
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdqyzq.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_qyzq", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_qyzq", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 3
lzz_theme/zmdszfcgdzsc/zmd_qyzq_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城企业专区
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdqyzq.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_rnx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "DF2F07A2A3392DC0DEB0052E67F5E284", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "6CD2C67671C6C8A7BE87978EF3D8A659", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_rnx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城汝南县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "rnx"
+        self.host = "zmdrn.zmddzsc.cn"
         self.url = "https://zmdrn.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdrn.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_rnx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_rnx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 3
lzz_theme/zmdszfcgdzsc/zmd_rnx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城汝南县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdrn.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_sbj_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "F5F8F112977D34137C3CE763583E9E4B", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "79B9E143E98A13FFDF36599C2F6555E6", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 7 - 6
lzz_theme/zmdszfcgdzsc/zmd_sbj_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城
 ---------
@@ -8,6 +8,7 @@ Created on 2025-01-21
 """
 import sys
 import os
+import time
 
 sys.path.append(os.path.dirname(os.getcwd()))
 from utils.attachment import AttachmentDownloader
@@ -27,6 +28,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "sbj"
+        self.host = "zmd.zmddzsc.cn"
         self.url = "https://zmd.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -86,9 +88,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmd.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -146,8 +147,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        with self.db_name.find({"parser_name": "ztpc_zmd_sbj", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_sbj", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_sbj_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmd.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 2
lzz_theme/zmdszfcgdzsc/zmd_sbjcgyx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城-采购意向公开
 ---------
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmd.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_scx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "9C4E702A41F2536C1167797893981015", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "82051DA95DA90A3611667AD72C7BD1BC", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_scx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城上蔡县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "scx"
+        self.host = "zmdsc.zmddzsc.cn"
         self.url = "https://zmdsc.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdsc.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_scx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_scx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_scx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城上蔡县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdby.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_sfq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "09CF9739283809ED6CC38FEADCD4D26B", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "CD4F4EE648F4E4DADFAB5738F281C49C", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_sfq_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城高新区
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "sfq"
+        self.host = "zmdsfq.zmddzsc.cn"
         self.url = "https://zmdsfq.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdsfq.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_sfq", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_sfq", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 2 - 3
lzz_theme/zmdszfcgdzsc/zmd_sfq_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城示范区
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdsfq.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_spx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "995BA8B796D9F6CDD9F2F9D58440249B", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "BB03A8AE1D99176813B0620927898C73", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_spx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城遂平县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "spx"
+        self.host = "zmdsp.zmddzsc.cn"
         self.url = "https://zmdsp.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdsp.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_spx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_spx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_spx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城遂平县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdsp.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_xcx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "E044F2E26F40364BCCEBD59199688157", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "46054129F1863D78B9AC11CA774EC0B2", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_xcx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城新蔡县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "xcx"
+        self.host = "zmdxc.zmddzsc.cn"
         self.url = "https://zmdxc.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdxc.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_xcx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_xcx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_xcx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城新蔡县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdxc.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_xpx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "96984EB729EBCE7711080B245445E9FD", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "F362D82FD6EBB3F39852A33A2009932A", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_xpx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城西平县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "xpx"
+        self.host = "zmdxp.zmddzsc.cn"
         self.url = "https://zmdxp.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdxp.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_xpx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_xpx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_xpx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城西平县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdxp.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_ycq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "3D8366D872461E9C1D9E5CAA12EDB052", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "C98889053201443564DE2A68DB71022A", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_ycq_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城驿城区
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "ycq"
+        self.host = "zmdyc.zmddzsc.cn"
         self.url = "https://zmdyc.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdyc.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_ycq", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_ycq", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_ycq_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城驿城区
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdyc.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_zszq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "52911FBE175B4A684F922B0756034116", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "0DDE47DDDAC18DAE4837E249B4C3584B", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_zszq_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城直属专区
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "zszq"
+        self.host = "zmdzszq.zmddzsc.cn"
         self.url = "https://zmdzszq.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdzszq.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_zszq", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_zszq", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_zszq_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-20
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城直属专区
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdzszq.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_zyx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "34829C33B7CC5A4B02020963356F37C4", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "F5ACFDB8B259FE2F50DB9BFB6161A797", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 6 - 8
lzz_theme/zmdszfcgdzsc/zmd_zyx_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城正阳县
 ---------
@@ -27,6 +27,7 @@ class Details:
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
         self.spath = "zyx"
+        self.host = "zmdzy.zmddzsc.cn"
         self.url = "https://zmdzy.zmddzsc.cn/loginCustomer/loginValidate.action"
         self.headers = {
             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -38,7 +39,6 @@ class Details:
         }
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url, self.spath)
 
@@ -83,9 +83,8 @@ class Details:
                    '//div[contains(@class,"quote-records")]','//a[text()="补充实物明细报价"]']
         distributorid = root.xpath('//*[@id="distributorId"]/@value').extract_first("")
         hid = item['competehref'].split('cpId=')[-1].replace("%27", "").replace("'", "")
-        bjjl_html = get_bj_html(hid, distributorid, "zmdzy.zmddzsc.cn")
-
-        gys_html = get_gys_html(hid, root, "zmdby.zmddzsc.cn")
+        bjjl_html = get_bj_html(hid, distributorid, self.host)
+        gys_html = get_gys_html(hid, root, self.host)
         title = item['title']
         new_title_suf = root.xpath(f'//a[@title="{title}"]/../text()').extract_first("")
         item['title'] = title + "".join(new_title_suf.split()).strip()
@@ -143,9 +142,8 @@ class Details:
 
     def start(self, limit=1):
         logger.debug("********** 详情页采集开始 **********")
-        time.sleep(30)
-        with self.db_name.find({"parser_name": "ztpc_zmd_zyx", "is_crawl": False, "failed": False},
-                               sort=[('publishtime', -1)]).limit(limit) as cursor:
+        time.sleep(60)
+        with self.db_name.find({"parser_name": "ztpc_zmd_zyx", "is_crawl": False, "failed": False}).limit(limit) as cursor:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)

+ 3 - 4
lzz_theme/zmdszfcgdzsc/zmd_zyx_list.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2025-01-21
+Created on 2025-04-03
 ---------
 @summary: 驻马店市政府采购电子商城正阳县
 ---------
@@ -13,7 +13,7 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.RedisDB import RedisFilter
 from collections import namedtuple
 from utils.tools import *
-from zmd_login import create_cookie,get_bj_html
+from zmd_login import create_cookie
 from parsel import Selector
 import requests
 import json
@@ -34,7 +34,6 @@ class Crawl_Zmd:
         self.base_url = "https://zmdzy.zmddzsc.cn"
 
     def get_cookies(self):
-
         if not os.path.isfile(f'./zmd_{self.spath}_ck.json'):
             create_cookie(self.url,self.spath)
 
@@ -140,7 +139,7 @@ class Crawl_Zmd:
 
     def start_list(self, menus):
         logger.debug("********** 列表页开始 **********")
-        time.sleep(random.randint(3, 15))
+        time.sleep(random.randint(3, 10))
         for menu in menus:
             logger.debug(f"++++++ {menu.channel} 开始采集 ++++++")
             page = menu.crawl_page

+ 1 - 1
lzz_theme/ztlbsww/ztlbsww_ck.json

@@ -1 +1 @@
-{"SESSION": "ZGZmODg2ZTMtMzZkZi00ODFjLTkyYTEtODdhMTczM2JmOGE4", "JSESSIONID": "1E7313EB64D92F66F7CF399869A978A8"}
+{"SESSION": "Yjk3ODU3OTUtMDQwNy00M2IyLTlkYzktN2E4MTI0MDVhZWJl", "JSESSIONID": "C1CF7A4DAAF803644D9CD3D7656D7144"}