Explorar o código

4月代码提交

dzr hai 3 meses
pai
achega
8acb910736
Modificáronse 100 ficheiros con 11972 adicións e 0 borrados
  1. 109 0
      a_albbdqycgpt_dqysj_gyszms/阿里巴巴大企业采购平台-大企业商机.py
  2. 109 0
      a_albbdqycgpt_dqysj_zb/阿里巴巴大企业采购平台-大企业商机.py
  3. 94 0
      a_albbdqycgpt_ggzx_jjgs/阿里巴巴公告中心.py
  4. 54 0
      a_albbdqycgpt_ggzx_jjgs/阿里巴巴公告中心Friefox.py
  5. 94 0
      a_albbdqycgpt_ggzx_xjgs/阿里巴巴公告中心.py
  6. 54 0
      a_albbdqycgpt_ggzx_xjgs/阿里巴巴公告中心Friefox.py
  7. 94 0
      a_albbdqycgpt_ggzx_zbgs/阿里巴巴公告中心.py
  8. 54 0
      a_albbdqycgpt_ggzx_zbgs/阿里巴巴公告中心Friefox.py
  9. 125 0
      a_bjgc_bggs/变更公示-列表页.py
  10. 146 0
      a_bjgc_bggs/变更公示-详情页.py
  11. 48 0
      a_bjgc_cggg_tpcg/bjgc_cggg_details.py
  12. 123 0
      a_bjgc_cggg_tpcg/采购公告-谈判采购-列表页.py
  13. 48 0
      a_bjgc_cggg_xbcg/bjgc_cggg_details.py
  14. 123 0
      a_bjgc_cggg_xbcg/采购公告-询比采购-列表页.py
  15. 48 0
      a_bjgc_cggg_zjsjcg/bjgc_cggg_details.py
  16. 123 0
      a_bjgc_cggg_zjsjcg/采购公告-中介随机采购-列表页.py
  17. 125 0
      a_bjgc_jggs/结果公示-列表页.py
  18. 181 0
      a_bjgc_jggs/结果公示-详情页.py
  19. 125 0
      a_bjgc_lbgs/流标公示-列表页.py
  20. 167 0
      a_bjgc_lbgs/流标公示-详情页.py
  21. 125 0
      a_bjgc_qxgs/取消公示-列表页.py
  22. 90 0
      a_bjgc_qxgs/取消公示-详情页.py
  23. 94 0
      a_dfxwszhcgpt_xjgg/Dfxwszhcgpt.py
  24. 57 0
      a_dfxwszhcgpt_xjgg/dfxwszhcgpt_details.py
  25. 94 0
      a_dfxwszhcgpt_zbgg/Dfxwszhcgpt.py
  26. 57 0
      a_dfxwszhcgpt_zbgg/dfxwszhcgpt_details.py
  27. 100 0
      a_gdsggzyjypt_bcgdzb_jggg/gd_utils.py
  28. 111 0
      a_gdsggzyjypt_bcgdzb_jggg/gdsggzyjypt_encrypt.js
  29. 140 0
      a_gdsggzyjypt_bcgdzb_jggg/补充耕地指标-结果公告-列表页.py
  30. 138 0
      a_gdsggzyjypt_bcgdzb_jggg/补充耕地指标-结果公告-详情页.py
  31. 100 0
      a_gdsggzyjypt_bcgdzb_jygg/gd_utils.py
  32. 111 0
      a_gdsggzyjypt_bcgdzb_jygg/gdsggzyjypt_encrypt.js
  33. 140 0
      a_gdsggzyjypt_bcgdzb_jygg/补充耕地指标-交易公告-列表页.py
  34. 162 0
      a_gdsggzyjypt_bcgdzb_jygg/补充耕地指标-交易公告-详情页.py
  35. 100 0
      a_gdsggzyjypt_fkzbjggg/gd_utils.py
  36. 111 0
      a_gdsggzyjypt_fkzbjggg/gdsggzyjypt_encrypt.js
  37. 163 0
      a_gdsggzyjypt_fkzbjggg/其他交易-复垦指标交易公告-详情页.py
  38. 138 0
      a_gdsggzyjypt_fkzbjggg/其他交易-复垦指标结果公告-列表页.py
  39. 100 0
      a_gdsggzyjypt_fkzbjygg/gd_utils.py
  40. 111 0
      a_gdsggzyjypt_fkzbjygg/gdsggzyjypt_encrypt.js
  41. 139 0
      a_gdsggzyjypt_fkzbjygg/其他交易-复垦指标交易公告-列表页.py
  42. 163 0
      a_gdsggzyjypt_fkzbjygg/其他交易-复垦指标交易公告-详情页.py
  43. 100 0
      a_gdsggzyjypt_gycq_jggg/gd_utils.py
  44. 111 0
      a_gdsggzyjypt_gycq_jggg/gdsggzyjypt_encrypt.js
  45. 137 0
      a_gdsggzyjypt_gycq_jggg/国有产权-结果公告-列表页.py
  46. 162 0
      a_gdsggzyjypt_gycq_jggg/国有产权-详情页.py
  47. 100 0
      a_gdsggzyjypt_hlzy_jggg/gd_utils.py
  48. 111 0
      a_gdsggzyjypt_hlzy_jggg/gdsggzyjypt_encrypt.js
  49. 140 0
      a_gdsggzyjypt_hlzy_jggg/河流资源-结果公告-列表页.py
  50. 162 0
      a_gdsggzyjypt_hlzy_jggg/河流资源-详情页.py
  51. 100 0
      a_gdsggzyjypt_hlzy_jygg/gd_utils.py
  52. 111 0
      a_gdsggzyjypt_hlzy_jygg/gdsggzyjypt_encrypt.js
  53. 140 0
      a_gdsggzyjypt_hlzy_jygg/河流资源-交易公告-列表页.py
  54. 162 0
      a_gdsggzyjypt_hlzy_jygg/河流资源-详情页.py
  55. 100 0
      a_gdsggzyjypt_hyzy_jggg/gd_utils.py
  56. 111 0
      a_gdsggzyjypt_hyzy_jggg/gdsggzyjypt_encrypt.js
  57. 140 0
      a_gdsggzyjypt_hyzy_jggg/海洋资源-结果公告-列表页.py
  58. 162 0
      a_gdsggzyjypt_hyzy_jggg/海洋资源-详情页.py
  59. 100 0
      a_gdsggzyjypt_hyzy_jygg/gd_utils.py
  60. 111 0
      a_gdsggzyjypt_hyzy_jygg/gdsggzyjypt_encrypt.js
  61. 140 0
      a_gdsggzyjypt_hyzy_jygg/海洋资源-交易公告-列表页.py
  62. 162 0
      a_gdsggzyjypt_hyzy_jygg/海洋资源-详情页.py
  63. 100 0
      a_gdsggzyjypt_kysyq_jggg/gd_utils.py
  64. 111 0
      a_gdsggzyjypt_kysyq_jggg/gdsggzyjypt_encrypt.js
  65. 140 0
      a_gdsggzyjypt_kysyq_jggg/空域使用权-结果公告-列表页.py
  66. 162 0
      a_gdsggzyjypt_kysyq_jggg/空域使用权-详情页.py
  67. 100 0
      a_gdsggzyjypt_kysyq_jygg/gd_utils.py
  68. 111 0
      a_gdsggzyjypt_kysyq_jygg/gdsggzyjypt_encrypt.js
  69. 140 0
      a_gdsggzyjypt_kysyq_jygg/空域使用权-交易公告-列表页.py
  70. 162 0
      a_gdsggzyjypt_kysyq_jygg/空域使用权-详情页.py
  71. 100 0
      a_gdsggzyjypt_lqjy_xxpl/gd_utils.py
  72. 111 0
      a_gdsggzyjypt_lqjy_xxpl/gdsggzyjypt_encrypt.js
  73. 139 0
      a_gdsggzyjypt_lqjy_xxpl/林权交易-信息披露-列表页.py
  74. 162 0
      a_gdsggzyjypt_lqjy_xxpl/林权交易-信息披露-详情页.py
  75. 100 0
      a_gdsggzyjypt_pwq_jggg/gd_utils.py
  76. 111 0
      a_gdsggzyjypt_pwq_jggg/gdsggzyjypt_encrypt.js
  77. 140 0
      a_gdsggzyjypt_pwq_jggg/排污权-结果公告-列表页.py
  78. 159 0
      a_gdsggzyjypt_pwq_jggg/排污权-详情页.py
  79. 100 0
      a_gdsggzyjypt_pwq_jygg/gd_utils.py
  80. 111 0
      a_gdsggzyjypt_pwq_jygg/gdsggzyjypt_encrypt.js
  81. 140 0
      a_gdsggzyjypt_pwq_jygg/排污权-交易公告-列表页.py
  82. 159 0
      a_gdsggzyjypt_pwq_jygg/排污权-详情页.py
  83. 100 0
      a_gdsggzyjypt_sfsszc_jggg/gd_utils.py
  84. 111 0
      a_gdsggzyjypt_sfsszc_jggg/gdsggzyjypt_encrypt.js
  85. 139 0
      a_gdsggzyjypt_sfsszc_jggg/涉法涉诉资产-结果公告-列表页.py
  86. 162 0
      a_gdsggzyjypt_sfsszc_jggg/涉法涉诉资产-详情页.py
  87. 100 0
      a_gdsggzyjypt_sfsszc_jygg/gd_utils.py
  88. 111 0
      a_gdsggzyjypt_sfsszc_jygg/gdsggzyjypt_encrypt.js
  89. 140 0
      a_gdsggzyjypt_sfsszc_jygg/涉法涉诉资产-交易公告-列表页.py
  90. 162 0
      a_gdsggzyjypt_sfsszc_jygg/涉法涉诉资产-详情页.py
  91. 100 0
      a_gdsggzyjypt_tdky_kyqjggg/gd_utils.py
  92. 111 0
      a_gdsggzyjypt_tdky_kyqjggg/gdsggzyjypt_encrypt.js
  93. 162 0
      a_gdsggzyjypt_tdky_kyqjggg/土地矿业-矿业权-详情页.py
  94. 142 0
      a_gdsggzyjypt_tdky_kyqjggg/土地矿业-矿业权交易结果公告-列表页.py
  95. 100 0
      a_gdsggzyjypt_tdky_kyqjygg/gd_utils.py
  96. 111 0
      a_gdsggzyjypt_tdky_kyqjygg/gdsggzyjypt_encrypt.js
  97. 162 0
      a_gdsggzyjypt_tdky_kyqjygg/土地矿业-矿业权-详情页.py
  98. 140 0
      a_gdsggzyjypt_tdky_kyqjygg/土地矿业-矿业权交易公告-列表页.py
  99. 100 0
      a_gdsggzyjypt_tdky_tdsyqjggg/gd_utils.py
  100. 111 0
      a_gdsggzyjypt_tdky_tdsyqjggg/gdsggzyjypt_encrypt.js

+ 109 - 0
a_albbdqycgpt_dqysj_gyszms/阿里巴巴大企业采购平台-大企业商机.py

@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2022-11-01
+---------
+@summary: 阿里巴巴大企业采购平台
+---------
+@author: lzz
+"""
+
+import json
+import re
+from collections import namedtuple
+
+import feapder
+from items.spider_item import MgpListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'dataType', 'crawl_page'])
+        self.site = "阿里巴巴大企业采购平台"
+
+        self.menus = [
+            Menu('大企业商机-供应商招募书', 'a_albbdqycgpt_dqysj_gyszms', 'recruitment', 1),       # 最大  51  页
+        ]
+
+        self.headers = {
+            "authority": "s.1688.com",
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "referer": "https://s.1688.com/newbuyoffer/buyoffer_search.htm?spm=b26110225.8744419.kfgk0ypb.1.44c420a44MhOd8&n=y&filt=y&dataType=bidding,formalbidding&famousCompany=true",
+            "upgrade-insecure-requests": "1",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
+        }
+
+    def start_requests(self):
+        url = "https://s.1688.com/newbuyoffer/buyoffer_search.htm"
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1, proxies=False)
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        params = {
+            "n": "y",
+            "filt": "y",
+            "dataType": f"{menu.get('dataType')}",
+            "famousCompany": "true",
+            "buyType": "",
+            "beginPage": f"{page}"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+
+        data = re.compile('window.data.offerresultData = successDataCheck\((.*?)\);')
+        text = response.content.decode()
+
+        list_data = "".join(data.findall(text))
+        info_list = json.loads(list_data).get('data').get('buyOffer')
+        for info in info_list:
+            href = info.get('detailUrl')
+            title = info.get('subject').strip()
+            create_time = info.get('gmtCreate')
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href',)
+            list_item.parse = "self.detail_get"
+            list_item.deal_detail = ['//div[@class="bidding-detail__main-content"]','//div[@class="main-offer-info"]']
+            list_item.request_params = {"headers": self.headers}
+            list_item.proxies = False
+            list_item.parse_url = href
+            list_item.files = {
+                "list_xpath": '//ul[@class="bidding-detail-announcement__attach detail-property__list"]/li/span[@class="detail-property__content"]/ul/li',
+                "url_xpath": './a/@href',
+                "name_xpath": './span/text()',
+                "files_type": (
+                    'zip', 'docx', 'ftp', 'pdf', 'doc', 'rar', 'gzzb', 'jpg',
+                    'png', 'zbid', 'xls', 'xlsx', 'swp'
+                ),  # 需要下载的附件类型
+                # "file_type":'pdf', # 默认的附件类型,用于url中未带附件类型的
+                "url_key": 'file',  # 用于区别连接是否为正常附件连接的url关键词 必须携带,如无可填http
+                "host": '',  # 需要拼接url的host
+            }
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="detail:normal_details").start()

+ 109 - 0
a_albbdqycgpt_dqysj_zb/阿里巴巴大企业采购平台-大企业商机.py

@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2022-11-01
+---------
+@summary: 阿里巴巴大企业采购平台
+---------
+@author: lzz
+"""
+
+import json
+import re
+from collections import namedtuple
+
+import feapder
+from items.spider_item import MgpListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'dataType', 'crawl_page'])
+        self.site = "阿里巴巴大企业采购平台"
+
+        self.menus = [
+            Menu('大企业商机-招标', 'a_albbdqycgpt_dqysj_zb', 'bidding,formalbidding', 1),       # 最大  2  页
+        ]
+
+        self.headers = {
+            "authority": "s.1688.com",
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "referer": "https://s.1688.com/newbuyoffer/buyoffer_search.htm?spm=b26110225.8744419.kfgk0ypb.1.44c420a44MhOd8&n=y&filt=y&dataType=bidding,formalbidding&famousCompany=true",
+            "upgrade-insecure-requests": "1",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
+        }
+
+    def start_requests(self):
+        url = "https://s.1688.com/newbuyoffer/buyoffer_search.htm"
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1, proxies=False)
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        params = {
+            "n": "y",
+            "filt": "y",
+            "dataType": f"{menu.get('dataType')}",
+            "famousCompany": "true",
+            "buyType": "",
+            "beginPage": f"{page}"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+
+        data = re.compile('window.data.offerresultData = successDataCheck\((.*?)\);')
+        text = response.content.decode()
+
+        list_data = "".join(data.findall(text))
+        info_list = json.loads(list_data).get('data').get('buyOffer')
+        for info in info_list:
+            href = info.get('detailUrl')
+            title = info.get('subject').strip()
+            create_time = info.get('gmtCreate')
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href',)
+            list_item.parse = "self.detail_get"
+            list_item.deal_detail = ['//div[@class="bidding-detail__main-content"]','//div[@class="main-offer-info"]']
+            list_item.request_params = {"headers": self.headers}
+            list_item.proxies = False
+            list_item.parse_url = href
+            list_item.files = {
+                "list_xpath": '//ul[@class="bidding-detail-announcement__attach detail-property__list"]/li/span[@class="detail-property__content"]/ul/li',
+                "url_xpath": './a/@href',
+                "name_xpath": './span/text()',
+                "files_type": (
+                    'zip', 'docx', 'ftp', 'pdf', 'doc', 'rar', 'gzzb', 'jpg',
+                    'png', 'zbid', 'xls', 'xlsx', 'swp'
+                ),  # 需要下载的附件类型
+                # "file_type":'pdf', # 默认的附件类型,用于url中未带附件类型的
+                "url_key": 'file',  # 用于区别连接是否为正常附件连接的url关键词 必须携带,如无可填http
+                "host": '',  # 需要拼接url的host
+            }
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="detail:normal_details").start()

+ 94 - 0
a_albbdqycgpt_ggzx_jjgs/阿里巴巴公告中心.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-10-13
+---------
+@summary: 阿里巴巴大企业采购平台
+---------
+@author: lzz
+"""
+import time
+from collections import namedtuple
+
+import feapder
+from items.spider_item import BidingListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'noticeType', 'crawl_page'])
+        self.site = "阿里巴巴大企业采购平台"
+
+        self.menus = [
+            Menu('公告中心-竞价公示', 'a_albbdqycgpt_ggzx_jjgs', 'auctionNotice', 1),    #  共  1 页
+        ]
+
+        self.headers = {
+            "authority": "quotation.1688.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "origin": "https://page.1688.com",
+            "referer": "https://page.1688.com/",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36"
+        }
+
+    def start_requests(self):
+        url = 'https://quotation.1688.com/ajax/notice/get_notice_list.json'
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        params = {
+            "relatedSelf": "false",
+            "noticeType": f"{menu.get('noticeType')}",
+            "subject": "",
+            "orderBy": "",
+            "orderByDirection": "",
+            "pageSize": "10",
+            "pageIndex": f"{page}",
+            "filterExpired": "false"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('noticeList')
+        for info in info_list:
+            title = info.get('subject').strip()
+            href_id = info.get('id')
+            href = f'https://page.1688.com/html/notice_detail.html?noticeId={href_id}'
+            create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(str(info.get('createDate'))[:-3])))
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ("href",)
+            list_item.parse = "self.detail_get"
+            list_item.render_time = 1
+            list_item.deal_detail = ['//div[@class="main__notice"]']
+            list_item.proxies = False
+            list_item.parse_url = href
+            yield list_item
+
+        time.sleep(3)
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:Albbdqycgpt").start()

+ 54 - 0
a_albbdqycgpt_ggzx_jjgs/阿里巴巴公告中心Friefox.py

@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-10-13
+---------
+@summary:  阿里巴巴快照页1-公告中心
+---------
+@author: lzz
+"""
+import time
+
+import feapder
+from feapder.network.response import Response
+from items.spider_item import DataBakItem
+
+
+class Spider(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=50)
+        for item in data_list:
+            yield feapder.Request(url=item.get("parse_url"),
+                                  render=True,
+                                  callback=eval(item.get("parse")),
+                                  item=item,
+                                  deal_detail=item.get("deal_detail"))
+
+    def detail_get(self, request, response):
+        items = request.item
+        data_item = DataBakItem(**items)
+
+        browser = response.browser
+        browser.refresh()
+        time.sleep(2)
+        response = Response.from_dict(
+            {
+                "url": browser.current_url,
+                "cookies": browser.cookies,
+                "_content": browser.page_source.encode(),
+                "status_code": 200,
+                "elapsed": 666,
+            }
+        )
+
+        html = response.xpath('//div[@class="main__notice"]').extract_first()  # 标书详细内容
+        exhtml = response.xpath('//div[@class="main__notice"]/header[@class="header"]/h2').extract_first()
+        if exhtml:
+            html = html.replace(exhtml, '')
+
+        data_item.contenthtml = html
+        yield data_item
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:Albbdqycgpt").start()

+ 94 - 0
a_albbdqycgpt_ggzx_xjgs/阿里巴巴公告中心.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-10-13
+---------
+@summary: 阿里巴巴大企业采购平台
+---------
+@author: lzz
+"""
+import time
+from collections import namedtuple
+
+import feapder
+from items.spider_item import BidingListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'noticeType', 'crawl_page'])
+        self.site = "阿里巴巴大企业采购平台"
+
+        self.menus = [
+            Menu('公告中心-询价公示', 'a_albbdqycgpt_ggzx_xjgs', 'buyofferNotice',30),    #  共  500 页
+        ]
+
+        self.headers = {
+            "authority": "quotation.1688.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "origin": "https://page.1688.com",
+            "referer": "https://page.1688.com/",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36"
+        }
+
+    def start_requests(self):
+        url = 'https://quotation.1688.com/ajax/notice/get_notice_list.json'
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        params = {
+            "relatedSelf": "false",
+            "noticeType": f"{menu.get('noticeType')}",
+            "subject": "",
+            "orderBy": "",
+            "orderByDirection": "",
+            "pageSize": "10",
+            "pageIndex": f"{page}",
+            "filterExpired": "false"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('noticeList')
+        for info in info_list:
+            title = info.get('subject').strip()
+            href_id = info.get('id')
+            href = f'https://page.1688.com/html/notice_detail.html?noticeId={href_id}'
+            create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(str(info.get('createDate'))[:-3])))
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ("href",)
+            list_item.parse = "self.detail_get"
+            list_item.render_time = 1
+            list_item.deal_detail = ['//div[@class="main__notice"]']
+            list_item.proxies = False
+            list_item.parse_url = href
+            yield list_item
+
+        time.sleep(3)
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:Albbdqycgpt").start()

+ 54 - 0
a_albbdqycgpt_ggzx_xjgs/阿里巴巴公告中心Friefox.py

@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-10-13
+---------
+@summary:  阿里巴巴快照页1-公告中心
+---------
+@author: lzz
+"""
+import time
+
+import feapder
+from feapder.network.response import Response
+from items.spider_item import DataBakItem
+
+
+class Spider(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=50)
+        for item in data_list:
+            yield feapder.Request(url=item.get("parse_url"),
+                                  render=True,
+                                  callback=eval(item.get("parse")),
+                                  item=item,
+                                  deal_detail=item.get("deal_detail"))
+
+    def detail_get(self, request, response):
+        items = request.item
+        data_item = DataBakItem(**items)
+
+        browser = response.browser
+        browser.refresh()
+        time.sleep(2)
+        response = Response.from_dict(
+            {
+                "url": browser.current_url,
+                "cookies": browser.cookies,
+                "_content": browser.page_source.encode(),
+                "status_code": 200,
+                "elapsed": 666,
+            }
+        )
+
+        html = response.xpath('//div[@class="main__notice"]').extract_first()  # 标书详细内容
+        exhtml = response.xpath('//div[@class="main__notice"]/header[@class="header"]/h2').extract_first()
+        if exhtml:
+            html = html.replace(exhtml, '')
+
+        data_item.contenthtml = html
+        yield data_item
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:Albbdqycgpt").start()

+ 94 - 0
a_albbdqycgpt_ggzx_zbgs/阿里巴巴公告中心.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-10-13
+---------
+@summary: 阿里巴巴大企业采购平台
+---------
+@author: lzz
+"""
+import time
+from collections import namedtuple
+
+import feapder
+from items.spider_item import BidingListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'noticeType', 'crawl_page'])
+        self.site = "阿里巴巴大企业采购平台"
+
+        self.menus = [
+            Menu('公告中心-招标公示', 'a_albbdqycgpt_ggzx_zbgs', 'biddingNotice', 1),    #  共  500 页
+        ]
+
+        self.headers = {
+            "authority": "quotation.1688.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "origin": "https://page.1688.com",
+            "referer": "https://page.1688.com/",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36"
+        }
+
+    def start_requests(self):
+        url = 'https://quotation.1688.com/ajax/notice/get_notice_list.json'
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        params = {
+            "relatedSelf": "false",
+            "noticeType": f"{menu.get('noticeType')}",
+            "subject": "",
+            "orderBy": "",
+            "orderByDirection": "",
+            "pageSize": "10",
+            "pageIndex": f"{page}",
+            "filterExpired": "false"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('noticeList')
+        for info in info_list:
+            title = info.get('subject').strip()
+            href_id = info.get('id')
+            href = f'https://page.1688.com/html/notice_detail.html?noticeId={href_id}'
+            create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(str(info.get('createDate'))[:-3])))
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ("href",)
+            list_item.parse = "self.detail_get"
+            list_item.render_time = 1
+            list_item.deal_detail = ['//div[@class="main__notice"]']
+            list_item.proxies = False
+            list_item.parse_url = href
+            yield list_item
+
+        time.sleep(3)
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:Albbdqycgpt").start()

+ 54 - 0
a_albbdqycgpt_ggzx_zbgs/阿里巴巴公告中心Friefox.py

@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2023-10-13
+---------
+@summary:  阿里巴巴快照页1-公告中心
+---------
+@author: lzz
+"""
+import time
+
+import feapder
+from feapder.network.response import Response
+from items.spider_item import DataBakItem
+
+
+class Spider(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=50)
+        for item in data_list:
+            yield feapder.Request(url=item.get("parse_url"),
+                                  render=True,
+                                  callback=eval(item.get("parse")),
+                                  item=item,
+                                  deal_detail=item.get("deal_detail"))
+
+    def detail_get(self, request, response):
+        items = request.item
+        data_item = DataBakItem(**items)
+
+        browser = response.browser
+        browser.refresh()
+        time.sleep(2)
+        response = Response.from_dict(
+            {
+                "url": browser.current_url,
+                "cookies": browser.cookies,
+                "_content": browser.page_source.encode(),
+                "status_code": 200,
+                "elapsed": 666,
+            }
+        )
+
+        html = response.xpath('//div[@class="main__notice"]').extract_first()  # 标书详细内容
+        exhtml = response.xpath('//div[@class="main__notice"]/header[@class="header"]/h2').extract_first()
+        if exhtml:
+            html = html.replace(exhtml, '')
+
+        data_item.contenthtml = html
+        yield data_item
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:Albbdqycgpt").start()

+ 125 - 0
a_bjgc_bggs/变更公示-列表页.py

@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import get_today_of_day,timestamp_to_date
+import json
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('变更公示', 'a_bjgc_bggs', 5),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/notice/queryNoticeList"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "data": {
+                "businessId": "",
+                "biddingType": "0",
+                "purchasingInformation": "",
+                "transactionSupplierName": "",
+                "regionVal": [],
+                "requestId": "1531362372728",
+                "requirementName": "",
+                "type": "2",
+                "page": page,
+                "pageSize": 10,
+                "province": "",
+                "city": "",
+                "region": "",
+                "startTime": f"{get_today_of_day(-3)}",
+                "endTime": f"{get_today_of_day()}"
+            }
+        }
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = self.headers
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            hid = info.get('id')
+            htp = info.get('type')
+            appId = info.get('appId')
+            href = f"https://cg.zbj.com/publicityDetails?id={hid}&type={htp}"
+            if appId == "HLJGCY":
+                title = info.get('purchasingInformation','').strip() + info.get('name').strip() + "更正公告"
+            else:
+                title = info.get('name').strip()
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:10]))
+
+            area = "全国"
+            city = ""
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.unique_key = ('href',create_time)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area or "全国"  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+
+            ddata = {
+                "data": {
+                    "id": f"{hid}"
+                }
+            }
+            ddata = json.dumps(ddata, separators=(',', ':'))
+            list_item.request_params = {"data":ddata}
+            list_item.parse_url = "https://bridgezhyc.zbj.com/api/buyer/queryPurchasingDemandHistoryById"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_bggs").start()

+ 146 - 0
a_bjgc_bggs/变更公示-详情页.py

@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+from feapder.utils.tools import timestamp_to_date
+from untils.tools import extract_file_type
+from untils.attachment import AttachmentDownloader
+from feapder.network.selector import Selector
+
+
+headers = {
+    "authority": "bridgezhyc.zbj.com",
+    "accept": "application/json, text/plain, */*",
+    "accept-language": "zh-CN,zh;q=0.9",
+    "accesstoken": "undefined",
+    "cache-control": "no-cache",
+    "content-type": "application/json",
+    "logintoken": "undefined",
+    "origin": "https://cg.zbj.com",
+    "pragma": "no-cache",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+    "x-auth-token": "undefined",
+    "x-requested-with": "XMLHttpRequest"
+}
+
+
+def formatdata(date):
+    if date:
+        date = timestamp_to_date(int(str(date)[:10]), "%Y-%m-%d")
+    return date
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=30)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item,
+                                  deal_detail=item.get("deal_detail"), **request_params,
+                                  callback='parse')
+
+    def download_midware(self, request):
+        request.headers = headers
+
+    def parse(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+
+
+        detail_info = response.json.get('data')
+
+        xqmc = detail_info.get('purchaseDemandName')
+        if detail_info.get('appId') == "HLJGCY":
+            xqmc = detail_info.get('purchasingInformation') + xqmc + "询比采购公告"
+
+        shtml = ""
+        if detail_info.get('list'):
+            for key in detail_info.get('list'):
+                old_html = ""
+                new_html = ""
+
+                if "zhengcai/base4pub" in key.get('oldValue'):
+                    for s1 in key.get('oldValue').split(','):
+                        f_url = "https://base4pubzhyc.zbjimg.com/" + s1
+                        f_name = s1.split('/')[5]
+                        t1 = f'''
+                        <a href="{f_url}">{f_name}</a>
+                        <br>
+                        '''
+                        old_html += t1
+                else:
+                    old_html += key.get('oldValue')
+
+                if "zhengcai/base4pub" in key.get('newValue'):
+                    for s2 in key.get('newValue').split(','):
+                        f_url = "https://base4pubzhyc.zbjimg.com/" + s2
+                        f_name = s2.split('/')[5]
+                        t2 = f'''
+                        <a href="{f_url}">{f_name}</a>
+                        <br>
+                        '''
+                        new_html += t2
+                else:
+                    new_html += key.get('newValue')
+
+                temp = f'''
+                <tr>
+                    <td>{key.get('keyName')}: {old_html}</td>
+                    <td>{key.get('keyName')}: {new_html}</td>
+                </tr>
+                '''
+                shtml += temp
+
+        html = f'''
+            <table width="96%">
+                <tr>
+                    <td colspan="2">公告编号:{detail_info.get('id')}</td>
+                </tr>
+                <tr>
+                    <td colspan="2">创建时间:{detail_info.get('createTimeName')[:10]}</td>
+                </tr>
+                <tr>
+                    <td colspan="2">需求名称:<a href="" class="">{xqmc}</a>
+                    </td>
+                </tr>
+                <tr>
+                    <td width="48%">原内容</td>
+                    <td>修改后内容</td>
+                </tr>
+                {shtml}
+            </table>
+            '''
+
+        list_item.contenthtml = html
+
+        file_list = Selector(html).xpath('//a')
+        attachments = {}
+        if file_list:
+            for info in file_list:
+                file_url = info.xpath('./@href').extract_first()
+                file_name = info.xpath('./text()').extract_first("").strip()
+                file_type = extract_file_type(file_name, file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies(), headers=headers)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:bjgc_bggs").start()

+ 48 - 0
a_bjgc_cggg_tpcg/bjgc_cggg_details.py

@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=30)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item, files_info=item.get("files"),
+                                  deal_detail=item.get("deal_detail"), render_time=5, render=True,
+                                  callback=eval(item.get("parse")),  **request_params)
+
+
+    def detail_get(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+        xpath_list = ['//div[@class="project"]','//div[@class="thirdIndex"]',
+                      '//div[@class="detail-body content-grid relative bg-white mt-[-160px] pb-[160px] px-[50px] pt-[60px]"]/div[@class="text-14"]']
+        html = ""
+        for xpath in xpath_list:
+            html = response.xpath(xpath).extract_first()  # 标书详细内容
+            if html:
+                break
+        html = html.replace('文件下载1', '')
+        html = html.replace('预览1', '')
+        html = html.replace('需求文件:', '')
+        html = html.replace('登录后查看', '')
+
+        list_item.contenthtml = html
+
+        yield list_item
+
+
+if __name__ == "__main__":
+    Details(redis_key="lzz:bjgc_cggg").start()

+ 123 - 0
a_bjgc_cggg_tpcg/采购公告-谈判采购-列表页.py

@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import timestamp_to_date
+import json
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'typeid', 'crawl_page'])
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('采购公告-谈判采购', 'a_bjgc_cggg_tpcg', 2, 2),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "referer": "https://cg.zbj.com/buyerHall",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/platform/queryPurchase"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            title = info.get('purchaseDemandName').strip()
+            hid = info.get('originOrderNo')
+            if len(hid) > 16:
+                href = f"https://cg.zbj.com/newdetailsDemand?id={hid}"
+            else:
+                href = f"https://chinazhyc.zbj.com/detailsDemand?id={hid}"
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:-3]))
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href', create_time)
+            list_item.parse = "self.detail_get"
+            list_item.request_params = {"headers": self.headers}
+            list_item.proxies = False
+            list_item.deal_detail = ['//div[@class="project"]']
+            list_item.parse_url = href
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        data = {
+            "data": {
+                "purchasetypeLevel1Id": None,
+                "purchasetypeLevel2Id": None,
+                "purchasetypeLevel3Id": None,
+                "purchasetypeLevel1IdName": "",
+                "purchasetypeLevel2IdName": "",
+                "purchasetypeLevel3IdName": "",
+                "town": "",
+                "expirytTimeSort": 0,
+                "priceSort": 0,
+                "releaseTimeSort": 0,
+                "highPrice": None,
+                "lowPrice": None,
+                "page": page,
+                "biddingType": menu.get('typeid'),
+                "purchaseState": 0,
+                "baseAppId": "99",
+                "appId": "99",
+                "pageSize": 40,
+                "cityCode": "",
+                "province": "",
+                "city": "",
+                "otherTown": "",
+                "defaultRegion": "",
+                "townType": 1,
+                "title": ""
+            }
+        }
+        data = json.dumps(data)
+        request.data = data
+        request.headers = self.headers
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_cggg").start()

+ 48 - 0
a_bjgc_cggg_xbcg/bjgc_cggg_details.py

@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=30)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item, files_info=item.get("files"),
+                                  deal_detail=item.get("deal_detail"), render_time=5, render=True,
+                                  callback=eval(item.get("parse")),  **request_params)
+
+
+    def detail_get(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+        xpath_list = ['//div[@class="project"]','//div[@class="thirdIndex"]',
+                      '//div[@class="detail-body content-grid relative bg-white mt-[-160px] pb-[160px] px-[50px] pt-[60px]"]/div[@class="text-14"]']
+        html = ""
+        for xpath in xpath_list:
+            html = response.xpath(xpath).extract_first()  # 标书详细内容
+            if html:
+                break
+        html = html.replace('文件下载1', '')
+        html = html.replace('预览1', '')
+        html = html.replace('需求文件:', '')
+        html = html.replace('登录后查看', '')
+
+        list_item.contenthtml = html
+
+        yield list_item
+
+
+if __name__ == "__main__":
+    Details(redis_key="lzz:bjgc_cggg").start()

+ 123 - 0
a_bjgc_cggg_xbcg/采购公告-询比采购-列表页.py

@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import timestamp_to_date
+import json
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'typeid', 'crawl_page'])
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('采购公告-询比采购', 'a_bjgc_cggg_xbcg', 1, 2),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "referer": "https://cg.zbj.com/buyerHall",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/platform/queryPurchase"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            title = info.get('purchaseDemandName').strip()
+            hid = info.get('originOrderNo')
+            if len(hid) > 16:
+                href = f"https://cg.zbj.com/newdetailsDemand?id={hid}"
+            else:
+                href = f"https://chinazhyc.zbj.com/detailsDemand?id={hid}"
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:-3]))
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href', create_time)
+            list_item.parse = "self.detail_get"
+            list_item.request_params = {"headers": self.headers}
+            list_item.proxies = False
+            list_item.deal_detail = ['//div[@class="project"]']
+            list_item.parse_url = href
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        data = {
+            "data": {
+                "purchasetypeLevel1Id": None,
+                "purchasetypeLevel2Id": None,
+                "purchasetypeLevel3Id": None,
+                "purchasetypeLevel1IdName": "",
+                "purchasetypeLevel2IdName": "",
+                "purchasetypeLevel3IdName": "",
+                "town": "",
+                "expirytTimeSort": 0,
+                "priceSort": 0,
+                "releaseTimeSort": 0,
+                "highPrice": None,
+                "lowPrice": None,
+                "page": page,
+                "biddingType": menu.get('typeid'),
+                "purchaseState": 0,
+                "baseAppId": "99",
+                "appId": "99",
+                "pageSize": 40,
+                "cityCode": "",
+                "province": "",
+                "city": "",
+                "otherTown": "",
+                "defaultRegion": "",
+                "townType": 1,
+                "title": ""
+            }
+        }
+        data = json.dumps(data)
+        request.data = data
+        request.headers = self.headers
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_cggg").start()

+ 48 - 0
a_bjgc_cggg_zjsjcg/bjgc_cggg_details.py

@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-25
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=30)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item, files_info=item.get("files"),
+                                  deal_detail=item.get("deal_detail"), render_time=5, render=True,
+                                  callback=eval(item.get("parse")),  **request_params)
+
+
+    def detail_get(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+        xpath_list = ['//div[@class="project"]','//div[@class="thirdIndex"]',
+                      '//div[@class="detail-body content-grid relative bg-white mt-[-160px] pb-[160px] px-[50px] pt-[60px]"]/div[@class="text-14"]']
+        html = ""
+        for xpath in xpath_list:
+            html = response.xpath(xpath).extract_first()  # 标书详细内容
+            if html:
+                break
+        html = html.replace('文件下载1', '')
+        html = html.replace('预览1', '')
+        html = html.replace('需求文件:', '')
+        html = html.replace('登录后查看', '')
+
+        list_item.contenthtml = html
+
+        yield list_item
+
+
+if __name__ == "__main__":
+    Details(redis_key="lzz:bjgc_cggg").start()

+ 123 - 0
a_bjgc_cggg_zjsjcg/采购公告-中介随机采购-列表页.py

@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-25
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import timestamp_to_date
+import json
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'typeid', 'crawl_page'])
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('采购公告-中介随机采购', 'a_bjgc_cggg_zjsjcg', 7, 2),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "referer": "https://cg.zbj.com/buyerHall",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/platform/queryPurchase"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            title = info.get('purchaseDemandName').strip()
+            hid = info.get('originOrderNo')
+            if len(hid) > 16:
+                href = f"https://cg.zbj.com/newdetailsDemand?id={hid}"
+            else:
+                href = f"https://chinazhyc.zbj.com/detailsDemand?id={hid}"
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:-3]))
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href', create_time)
+            list_item.parse = "self.detail_get"
+            list_item.request_params = {"headers": self.headers}
+            list_item.proxies = False
+            list_item.deal_detail = ['//div[@class="project"]']
+            list_item.parse_url = href
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        data = {
+            "data": {
+                "purchasetypeLevel1Id": None,
+                "purchasetypeLevel2Id": None,
+                "purchasetypeLevel3Id": None,
+                "purchasetypeLevel1IdName": "",
+                "purchasetypeLevel2IdName": "",
+                "purchasetypeLevel3IdName": "",
+                "town": "",
+                "expirytTimeSort": 0,
+                "priceSort": 0,
+                "releaseTimeSort": 0,
+                "highPrice": None,
+                "lowPrice": None,
+                "page": page,
+                "biddingType": menu.get('typeid'),
+                "purchaseState": 0,
+                "baseAppId": "99",
+                "appId": "99",
+                "pageSize": 40,
+                "cityCode": "",
+                "province": "",
+                "city": "",
+                "otherTown": "",
+                "defaultRegion": "",
+                "townType": 1,
+                "title": ""
+            }
+        }
+        data = json.dumps(data)
+        request.data = data
+        request.headers = self.headers
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_cggg").start()

+ 125 - 0
a_bjgc_jggs/结果公示-列表页.py

@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import get_today_of_day,timestamp_to_date
+import json
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('结果公示', 'a_bjgc_jggs', 30),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/notice/queryNoticeList"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "data": {
+                "businessId": "",
+                "biddingType": "0",
+                "purchasingInformation": "",
+                "transactionSupplierName": "",
+                "regionVal": [],
+                "requestId": "1531362372728",
+                "requirementName": "",
+                "type": "3",
+                "page": page,
+                "pageSize": 10,
+                "province": "",
+                "city": "",
+                "region": "",
+                "startTime": f"{get_today_of_day(-3)}",
+                "endTime": f"{get_today_of_day()}"
+            }
+        }
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = self.headers
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            hid = info.get('id')
+            htp = info.get('type')
+            appId = info.get('appId')
+            href = f"https://cg.zbj.com/publicityDetails?id={hid}&type={htp}"
+            if appId == "HLJGCY":
+                title = info.get('purchasingInformation','').strip() + info.get('name').strip() + "结果公告"
+            else:
+                title = info.get('name').strip()
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:10]))
+
+            area = "全国"
+            city = ""
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.unique_key = ('href',create_time)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area or "全国"  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+
+            ddata = {
+                "data": {
+                    "id": f"{hid}"
+                }
+            }
+            ddata = json.dumps(ddata, separators=(',', ':'))
+            list_item.request_params = {"data":ddata}
+            list_item.parse_url = "https://bridgezhyc.zbj.com/api/buyer/queryAnnouncementById"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_jggs").start()

+ 181 - 0
a_bjgc_jggs/结果公示-详情页.py

@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+from feapder.utils.tools import timestamp_to_date
+from untils.tools import extract_file_type
+from untils.attachment import AttachmentDownloader
+
+
+
+
+headers = {
+    "authority": "bridgezhyc.zbj.com",
+    "accept": "application/json, text/plain, */*",
+    "accept-language": "zh-CN,zh;q=0.9",
+    "accesstoken": "undefined",
+    "cache-control": "no-cache",
+    "content-type": "application/json",
+    "logintoken": "undefined",
+    "origin": "https://cg.zbj.com",
+    "pragma": "no-cache",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+    "x-auth-token": "undefined",
+    "x-requested-with": "XMLHttpRequest"
+}
+
+def formatdata(date):
+    if date:
+        date = timestamp_to_date(int(str(date)[:10]),"%Y-%m-%d")
+    return date
+
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=80)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item,
+                                  deal_detail=item.get("deal_detail"), **request_params,
+                                  callback='parse')
+
+
+    def download_midware(self, request):
+        request.headers = headers
+
+    def parse(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+
+        detail_info = response.json.get('data')[0]
+
+        if detail_info.get('content'):
+            html = detail_info.get('content')
+
+            file_url_org = detail_info.get('biddingList')[0].get('enterpriseCertificateFile')
+            attachments = {}
+            if file_url_org:
+                file_url = "https://base4pubzhyc.zbjimg.com/" + file_url_org
+                file_name = file_url_org.split('/')[5]
+                file_type = extract_file_type(file_name,file_url)
+                attachment = AttachmentDownloader().fetch_attachment(
+                    file_name=file_name, file_type=file_type, download_url=file_url,
+                    proxies=request.proxies(),headers=headers)
+                attachments[str(len(attachments) + 1)] = attachment
+
+                fhtml = f'''
+                <div style="font-size: 10pt; margin-bottom: 8pt; text-indent: 2em; line-height: 30px;">
+                    相关附件:
+                    <span style="text-indent: initial;">
+                        <a href="{file_url}">{file_name}</a>
+                    </span>
+                </div>
+                '''
+                html += fhtml
+
+            if attachments:
+                list_item.projectinfo = {"attachments": attachments}
+
+        else:
+            cgInfoJs = {"1-1": "询比采购", "1-2": "谈判采购", "1-3": "竞价采购",
+                        "1-4": "小额采购", "1-5": "直接采购", "1-6": "中介询比采购",
+                        "1-7": "中介随机采购", "1-8": "中介谈判采购"}
+
+            cgfs = cgInfoJs.get(f"1-{detail_info.get('biddingType')}")
+
+            cgjg = ""
+            if detail_info.get('biddingType') == 5 and detail_info.get('isCancel') == 1:
+                cgjg = "已取消"
+            elif detail_info.get('type') == 3:
+                cgjg = "成功评选"
+            elif detail_info.get('type') == 4:
+                cgjg = "流标"
+
+            zxgys = ""
+            if detail_info.get('biddingList'):
+                for gys in detail_info.get('biddingList'):
+
+                    if detail_info.get('biddingType') != 5:
+                        psjg = "中选"
+                    elif detail_info.get('state') != 13 and detail_info.get('state') != 14:
+                        psjg = "中选"
+                    else:
+                        psjg = "已取消"
+
+                    yhl = (detail_info.get('purchaserBudget') - detail_info.get('transactionAmount')) / detail_info.get('purchaserBudget')
+
+
+                    if detail_info.get('numOfSelected') == 1:
+                        zt = "现成交"
+                    elif detail_info.get('numOfSelected') > 1:
+                        zt = f"第{gys.get('indexs')}中选人"
+                    else:
+                        zt = ""
+
+                    temp = f'''
+                    <tr>
+                        <td>{zt}</td>
+                        <td><span>{gys.get('enterpriseName')}</span></td>
+                        <td><span>{psjg}</span></td>
+                        <td><span>{detail_info.get('quotationTimeName')[:10]}</span></td>
+                        <td><span>{detail_info.get('transactionAmount')}</span></td>
+                        <td><span>{round(yhl,4) * 100}%</span></td>
+                    </tr>
+                    '''
+                    zxgys += temp
+
+            html = f'''
+                <div class="page-content">
+                    <ul class="section">
+                        <li><label>项目名称</label>
+                            <a href="">{detail_info.get('bulletinName')}</a>
+                        </li>
+                        <li><label>采购编号</label>{detail_info.get('id')}</li>
+                        <li><label>预算金额</label><span class="price">¥{detail_info.get('purchaserBudget')}</span>
+                        </li>
+                        <li><label>采购方式</label><span>{cgfs}</span></li>
+                    </ul>
+                    <ul class="section">
+                        <li><label>采购人:</label><span>{detail_info.get('purchaser')}</span></li>
+                        <li><label>联系人:</label><span>{detail_info.get('contacts')}</span></li>
+                    </ul>
+                    <ul class="section">
+                        <li><label>采购结果</label><span>{cgjg}</span></li>
+                        <li><label>报价供应商数</label><span>{detail_info.get('quotationSuppliersNumber')}</span></li>
+                        <li><label>公告日期</label><span>{formatdata(detail_info.get('createTime'))}</span></li>
+                
+                        <li class="clearfix"><label>中选供应商</label>
+                            <table class="result">
+                                <thead>
+                                <th>状态</th>
+                                <th>供应商名称</th>
+                                <th>评审结果</th>
+                                <th>公告日期</th>
+                                <th>成交金额</th>
+                                <th>优惠率</th>
+                                </thead>
+                                {zxgys}
+                            </table>
+                        </li>
+                    </ul>
+                </div>
+                '''
+
+        list_item.contenthtml = html
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:bjgc_jggs").start()

+ 125 - 0
a_bjgc_lbgs/流标公示-列表页.py

@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import get_today_of_day,timestamp_to_date
+import json
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('结果公示', 'a_bjgc_lbgs', 5),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/notice/queryNoticeList"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "data": {
+                "businessId": "",
+                "biddingType": "0",
+                "purchasingInformation": "",
+                "transactionSupplierName": "",
+                "regionVal": [],
+                "requestId": "1531362372728",
+                "requirementName": "",
+                "type": "4",
+                "page": page,
+                "pageSize": 10,
+                "province": "",
+                "city": "",
+                "region": "",
+                "startTime": f"{get_today_of_day(-3)}",
+                "endTime": f"{get_today_of_day()}"
+            }
+        }
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = self.headers
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            hid = info.get('id')
+            htp = info.get('type')
+            appId = info.get('appId')
+            href = f"https://cg.zbj.com/publicityDetails?id={hid}&type={htp}"
+            if appId == "HLJGCY":
+                title = info.get('purchasingInformation','').strip() + info.get('name').strip() + "终止公告"
+            else:
+                title = info.get('name').strip()
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:10]))
+
+            area = "全国"
+            city = ""
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.unique_key = ('href',create_time)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area or "全国"  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+
+            ddata = {
+                "data": {
+                    "id": f"{hid}"
+                }
+            }
+            ddata = json.dumps(ddata, separators=(',', ':'))
+            list_item.request_params = {"data":ddata}
+            list_item.parse_url = "https://bridgezhyc.zbj.com/api/buyer/queryAnnouncementById"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_lbgs").start()

+ 167 - 0
a_bjgc_lbgs/流标公示-详情页.py

@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+from feapder.utils.tools import timestamp_to_date
+
+
+
+headers = {
+    "authority": "bridgezhyc.zbj.com",
+    "accept": "application/json, text/plain, */*",
+    "accept-language": "zh-CN,zh;q=0.9",
+    "accesstoken": "undefined",
+    "cache-control": "no-cache",
+    "content-type": "application/json",
+    "logintoken": "undefined",
+    "origin": "https://cg.zbj.com",
+    "pragma": "no-cache",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+    "x-auth-token": "undefined",
+    "x-requested-with": "XMLHttpRequest"
+}
+
+def formatdata(date):
+    if date:
+        date = timestamp_to_date(int(str(date)[:10]),"%Y-%m-%d")
+    return date
+
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=50)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item,
+                                  deal_detail=item.get("deal_detail"), **request_params,
+                                  callback='parse')
+
+
+    def download_midware(self, request):
+        request.headers = headers
+
+    def parse(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+
+        detail_info = response.json.get('data')[0]
+
+
+        cgInfoJs = {"1-1": "询比采购", "1-2": "谈判采购", "1-3": "竞价采购",
+                    "1-4": "小额采购", "1-5": "直接采购", "1-6": "中介询比采购",
+                    "1-7": "中介随机采购", "1-8": "中介谈判采购"}
+
+        cgfs = cgInfoJs.get(f"1-{detail_info.get('biddingType')}")
+
+        cgjg = ""
+        if detail_info.get('biddingType') == 5 and detail_info.get('isCancel') == 1:
+            cgjg = "已取消"
+        elif detail_info.get('type') == 3:
+            cgjg = "成功评选"
+        elif detail_info.get('type') == 4:
+            cgjg = "流标"
+
+        if detail_info.get('winningWay') == 1:
+            pxfs = "价格最低"
+        else:
+            pxfs = "综合评分"
+
+        if detail_info.get('rateType') == 0:
+            bjfs = "价格"
+            lx = ""
+        else:
+            bjfs = "百分比"
+            if detail_info.get('rateType') == 1:
+                lx = "优惠下浮率/折扣率报价"
+            else:
+                lx = "费率/折扣报价"
+
+        sdb = detail_info.get('selectedBudget')
+        if sdb:
+            sdb_list = sdb.split(',')
+            extra_html = f'''
+            <div style="margin-top: 10px; margin-bottom: 10px;">1、供应商需要按总价{detail_info.get('purchaserBudget')}进行报价;</div>
+            '''
+            index = 2
+            for s in sdb_list:
+                st = f'''
+                <div style="margin-top: 10px; margin-bottom: 10px;">{index}、第{index-1}中选供应商按平台报价*({s}%)计算成交金额;</div>
+                '''
+                extra_html += st
+                index += 1
+        else:
+            extra_html = ""
+
+        sxjg_info = detail_info.get('filtrateRecord')
+        if sxjg_info:
+            if "," in sxjg_info:
+                si = sxjg_info.split(',')
+                sxjg = f"本次采购采购人已使用筛选工具,合理报价区间为:所有报价平均价的{float(si[0])*100}%至{float(si[-1])*100}%,剩余家供应商进入最终评审。"
+            else:
+                sxjg = "本次采购采购人已放弃使用筛选工具,所有报价供应商进入最终评审。"
+        else:
+            sxjg = ""
+
+        html = f'''
+            <div class="page-content">
+                <ul class="section">
+                    <li><label>项目名称</label>
+                        <a href="">{detail_info.get('bulletinName')}</a>
+                    </li>
+                    <li><label>采购编号</label>{detail_info.get('id')}</li>
+                    <li><label>预算金额</label><span class="price">¥{detail_info.get('purchaserBudget')}</span>
+                    </li>
+                    <li><label>采购方式</label><span>{cgfs}</span></li>
+                    <li>
+                        <label>报价方式</label>
+                        <span>{bjfs}</span></li>
+                    <li>
+                    <li><label>类型</label>
+                        <span>{lx}</span></li>
+                    <label>评选方式</label>
+                    <span>{pxfs}</span></li>
+                    <li><label>筛选结果</label>
+                        <div class="result2">
+                            {sxjg}
+                        </div>
+                    </li>
+                </ul>
+                <ul class="section">
+                    <li><label>采购人:</label><span>{detail_info.get('purchaser')}</span></li>
+                    <li><label>联系人:</label><span>{detail_info.get('contacts')}</span></li>
+                </ul>
+                <ul class="section">
+                    <li><label>采购结果</label><span>{cgjg}</span></li>
+                    <li><label>备注</label><span>{detail_info.get('cancelCause')}</span></li>
+                    <li><label>报价供应商数</label><span>{detail_info.get('quotationSuppliersNumber')}</span></li>
+                    <li><label>公告日期</label><span>{formatdata(detail_info.get('createTime'))}</span></li>
+
+                    <li style="display: flex; margin-bottom: 10px;">
+                        <label>允许中选家数</label>
+                        <div>
+                            <div>{detail_info.get('numOfSelected')}家中选</div>
+                            {extra_html}
+                        </div>
+                    </li>
+                </ul>
+            </div>
+            '''
+
+        list_item.contenthtml = html
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:bjgc_lbgs").start()

+ 125 - 0
a_bjgc_qxgs/取消公示-列表页.py

@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from feapder.utils.tools import get_today_of_day,timestamp_to_date
+import json
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "八戒公采"
+
+        self.menus = [
+            Menu('取消公示', 'a_bjgc_qxgs', 3),
+        ]
+
+        self.headers = {
+            "authority": "bridgezhyc.zbj.com",
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "accesstoken": "undefined",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "logintoken": "undefined",
+            "origin": "https://cg.zbj.com",
+            "pragma": "no-cache",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+            "x-auth-token": "undefined",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://bridgezhyc.zbj.com/api/notice/queryNoticeList"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "data": {
+                "businessId": "",
+                "biddingType": "0",
+                "purchasingInformation": "",
+                "transactionSupplierName": "",
+                "regionVal": [],
+                "requestId": "1531362372728",
+                "requirementName": "",
+                "type": "1",
+                "page": page,
+                "pageSize": 10,
+                "province": "",
+                "city": "",
+                "region": "",
+                "startTime": f"{get_today_of_day(-3)}",
+                "endTime": f"{get_today_of_day()}"
+            }
+        }
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = self.headers
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('data')
+        for info in info_list:
+            hid = info.get('id')
+            htp = info.get('type')
+            appId = info.get('appId')
+            href = f"https://cg.zbj.com/publicityDetails?id={hid}&type={htp}"
+            if appId == "HLJGCY":
+                title = info.get('purchasingInformation','').strip() + info.get('name').strip() + "更正公告"
+            else:
+                title = info.get('name').strip()
+            create_time = timestamp_to_date(int(str(info.get('publishDate'))[:10]))
+
+            area = "全国"
+            city = ""
+
+            list_item = MgpListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.unique_key = ('href',create_time)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area or "全国"  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+
+            ddata = {
+                "data": {
+                    "id": f"{hid}"
+                }
+            }
+            ddata = json.dumps(ddata, separators=(',', ':'))
+            list_item.request_params = {"data":ddata}
+            list_item.parse_url = "https://bridgezhyc.zbj.com/api/buyer/queryPurchasingDemandHistoryById"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:bjgc_qxgs").start()

+ 90 - 0
a_bjgc_qxgs/取消公示-详情页.py

@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-17
+---------
+@summary: 八戒公采
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+from feapder.utils.tools import timestamp_to_date
+
+
+headers = {
+    "authority": "bridgezhyc.zbj.com",
+    "accept": "application/json, text/plain, */*",
+    "accept-language": "zh-CN,zh;q=0.9",
+    "accesstoken": "undefined",
+    "cache-control": "no-cache",
+    "content-type": "application/json",
+    "logintoken": "undefined",
+    "origin": "https://cg.zbj.com",
+    "pragma": "no-cache",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+    "x-auth-token": "undefined",
+    "x-requested-with": "XMLHttpRequest"
+}
+
+
+def formatdata(date):
+    if date:
+        date = timestamp_to_date(int(str(date)[:10]), "%Y-%m-%d")
+    return date
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=30)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item,
+                                  deal_detail=item.get("deal_detail"), **request_params,
+                                  callback='parse')
+
+    def download_midware(self, request):
+        request.headers = headers
+
+    def parse(self, request, response):
+
+        items = request.item
+        list_item = DataBakItem(**items)
+        list_item.competehref = list_item.href
+        list_item.href = "#"
+
+        detail_info = response.json.get('data')
+
+        xqmc = detail_info.get('purchaseDemandName')
+        if detail_info.get('appId') == "HLJGCY":
+            xqmc = detail_info.get('purchasingInformation') + xqmc + "询比采购公告"
+
+
+        html = f'''
+            <table width="96%">
+                <tr>
+                    <td colspan="2">公告编号:{detail_info.get('id')}</td>
+                </tr>
+                <tr>
+                    <td colspan="2">创建时间:{detail_info.get('createTimeName')[:10]}</td>
+                </tr>
+                <tr>
+                    <td colspan="2">需求名称:<a href="" class="">{xqmc}</a>
+                    </td>
+                </tr>
+                <tr>
+                    <td>取消或变更公告</td>
+                </tr>
+                <tr>
+                    <td>{detail_info.get('cancelCause')}</td>
+                </tr>
+            </table>
+            '''
+
+        list_item.contenthtml = html
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:bjgc_qxgs").start()

+ 94 - 0
a_dfxwszhcgpt_xjgg/Dfxwszhcgpt.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-01-06
+---------
+@summary: 东方希望数字化采购平台
+---------
+@author: lzz
+"""
+from collections import namedtuple
+
+import feapder
+from items.spider_item import BidingListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'sourceType', 'purchaseType', 'crawl_page'])
+        self.site = "东方希望数字化采购平台"
+
+        self.menus = [
+            Menu('询价公告', 'a_dfxwszhcgpt_xjgg', 'BR_ACCEPTED', '2', 10),
+        ]
+        self.headers = {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Origin": "https://srm.easthope.cn",
+            "Referer": "https://srm.easthope.cn/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
+        }
+
+    def start_requests(self):
+        url = "https://srm-gw.easthope.cn/ssrc/v1/53/source-notices/br-list/advance/public"
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        sourceType = request.item.get('sourceType')
+        purchaseType = request.item.get('purchaseType')
+        params = {
+            "sourceType": f"{sourceType}",
+            "page": f"{page - 1}",
+            "size": "10",
+            "bidTitle": "",
+            "projectType": "",
+            "startDate": "",
+            "endDate": "",
+            "industry": "",
+            "purchaseType": f"{purchaseType}"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('content')
+        for info in info_list:
+            sourceType = info.get('sourceType')
+            noticeId = info.get('noticeId')
+            sourceHeaderId = info.get('sourceHeaderId')
+            advanceNoticeId = info.get('advanceNoticeId')
+            sourceFrom = info.get('sourceFrom')
+            href = f"https://srm.easthope.cn/oauth/public/default/source_notice_advance_detail.html?type={sourceType}&from=ZBXX_HOME&noticeId={noticeId}&sourceHeaderId={sourceHeaderId}&advanceNoticeId={advanceNoticeId}&sourceFrom={sourceFrom}"
+            title = info.get('bidTitle').strip()
+            create_time = info.get('approvedDate')
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href', 'title')
+            list_item.parse = "self.detail_get"
+            list_item.deal_detail = ['//div[@class="notice-detail-body"]']
+            list_item.proxies = False
+            list_item.parse_url = href
+            yield list_item
+
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:DfxwszhcgptNew").start()

+ 57 - 0
a_dfxwszhcgpt_xjgg/dfxwszhcgpt_details.py

@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-01-06
+---------
+@summary: 东方希望数字化采购平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+
+
+class Spider(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=50)
+        for item in data_list:
+            # log.debug(item)
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"),
+                                  render=True,
+                                  render_time=5,
+                                  callback=eval(item.get("parse")),
+                                  item=item,
+                                  deal_detail=item.get("deal_detail"),
+                                  **request_params)
+
+    def download_midware(self, request):
+        request.headers = {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Cache-Control": "max-age=0",
+            "Upgrade-Insecure-Requests": "1",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+        }
+
+    def detail_get(self, request, response):
+        items = request.item
+        data_item = DataBakItem(**items)
+
+        html = ''
+        for xpath in request.deal_detail:
+            html = response.xpath(xpath).extract_first()  # 标书详细内容
+            if html is not None:
+                break
+
+        html = html.replace('我要参与</button>', '').replace('我要提问</button>', '')
+        html = html.replace('<div>招标技术附件</div>', '').replace('附件列表</a>', '')
+        html = html.replace('<div>招标商务附件</div>', '').replace('附件列表 </a>', '')
+        html = html.replace('<div>附件</div>', '').replace('查看</a>', '')
+        html = html.replace('浏览详情</div>', '')
+        data_item.contenthtml = html
+        yield data_item
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:DfxwszhcgptNew").start()

+ 94 - 0
a_dfxwszhcgpt_zbgg/Dfxwszhcgpt.py

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-01-06
+---------
+@summary: 东方希望数字化采购平台
+---------
+@author: lzz
+"""
+from collections import namedtuple
+
+import feapder
+from items.spider_item import BidingListItem
+
+
+class Spider(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'sourceType', 'purchaseType', 'crawl_page'])
+        self.site = "东方希望数字化采购平台"
+
+        self.menus = [
+            Menu('招标公告', 'a_dfxwszhcgpt_zbgg', 'BR', '1', 3),
+        ]
+        self.headers = {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Origin": "https://srm.easthope.cn",
+            "Referer": "https://srm.easthope.cn/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
+        }
+
+    def start_requests(self):
+        url = "https://srm-gw.easthope.cn/ssrc/v1/53/source-notices/br-list/advance/public"
+        for menu in self.menus:
+            yield feapder.Request(url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        sourceType = request.item.get('sourceType')
+        purchaseType = request.item.get('purchaseType')
+        params = {
+            "sourceType": f"{sourceType}",
+            "page": f"{page - 1}",
+            "size": "10",
+            "bidTitle": "",
+            "projectType": "",
+            "startDate": "",
+            "endDate": "",
+            "industry": "",
+            "purchaseType": f"{purchaseType}"
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('content')
+        for info in info_list:
+            sourceType = info.get('sourceType')
+            noticeId = info.get('noticeId')
+            sourceHeaderId = info.get('sourceHeaderId')
+            advanceNoticeId = info.get('advanceNoticeId')
+            sourceFrom = info.get('sourceFrom')
+            href = f"https://srm.easthope.cn/oauth/public/default/source_notice_advance_detail.html?type={sourceType}&from=ZBXX_HOME&noticeId={noticeId}&sourceHeaderId={sourceHeaderId}&advanceNoticeId={advanceNoticeId}&sourceFrom={sourceFrom}"
+            title = info.get('bidTitle').strip()
+            create_time = info.get('approvedDate')
+
+            area = "全国"  # 省份
+            city = ""  # 城市
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = create_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area  # 城市默认:全国
+            list_item.city = city  # 城市 默认为空
+
+            list_item.unique_key = ('href', 'title')
+            list_item.parse = "self.detail_get"
+            list_item.deal_detail = ['//div[@class="notice-detail-body"]']
+            list_item.proxies = False
+            list_item.parse_url = href
+            yield list_item
+
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:DfxwszhcgptNew").start()

+ 57 - 0
a_dfxwszhcgpt_zbgg/dfxwszhcgpt_details.py

@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-01-06
+---------
+@summary: 东方希望数字化采购平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import DataBakItem
+
+
+class Spider(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=50)
+        for item in data_list:
+            # log.debug(item)
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"),
+                                  render=True,
+                                  render_time=5,
+                                  callback=eval(item.get("parse")),
+                                  item=item,
+                                  deal_detail=item.get("deal_detail"),
+                                  **request_params)
+
+    def download_midware(self, request):
+        request.headers = {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Cache-Control": "max-age=0",
+            "Upgrade-Insecure-Requests": "1",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+        }
+
+    def detail_get(self, request, response):
+        items = request.item
+        data_item = DataBakItem(**items)
+
+        html = ''
+        for xpath in request.deal_detail:
+            html = response.xpath(xpath).extract_first()  # 标书详细内容
+            if html is not None:
+                break
+
+        html = html.replace('我要参与</button>', '').replace('我要提问</button>', '')
+        html = html.replace('<div>招标技术附件</div>', '').replace('附件列表</a>', '')
+        html = html.replace('<div>招标商务附件</div>', '').replace('附件列表 </a>', '')
+        html = html.replace('<div>附件</div>', '').replace('查看</a>', '')
+        html = html.replace('浏览详情</div>', '')
+        data_item.contenthtml = html
+        yield data_item
+
+
+if __name__ == "__main__":
+    Spider(redis_key="lzz:DfxwszhcgptNew").start()

+ 100 - 0
a_gdsggzyjypt_bcgdzb_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_bcgdzb_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_bcgdzb_jggg/补充耕地指标-结果公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('补充耕地指标-结果公告', 'a_gdsggzyjypt_bcgdzb_jggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "P",
+            "tradingProcess": "2F16,2F17,3F16",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_bcgdzb_jggg",user="gdsggzyjypt_bcgdzb_jggg").start()
+

+ 138 - 0
a_gdsggzyjypt_bcgdzb_jggg/补充耕地指标-结果公告-详情页.py

@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item, proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/', list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + file_html
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_bcgdzb_jggg").start()

+ 100 - 0
a_gdsggzyjypt_bcgdzb_jygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_bcgdzb_jygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_bcgdzb_jygg/补充耕地指标-交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('补充耕地指标-交易公告', 'a_gdsggzyjypt_bcgdzb_jygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "P",
+            "tradingProcess": "2F11,2F19,3F11,3F19",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_bcgdzb_jygg",user="gdsggzyjypt_bcgdzb_jygg").start()
+

+ 162 - 0
a_gdsggzyjypt_bcgdzb_jygg/补充耕地指标-交易公告-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_bcgdzb_jygg").start()

+ 100 - 0
a_gdsggzyjypt_fkzbjggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_fkzbjggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 163 - 0
a_gdsggzyjypt_fkzbjggg/其他交易-复垦指标交易公告-详情页.py

@@ -0,0 +1,163 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=10)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_fkzbjygg").start()

+ 138 - 0
a_gdsggzyjypt_fkzbjggg/其他交易-复垦指标结果公告-列表页.py

@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('复垦指标结果公告', 'a_gdsggzyjypt_fkzbjggg', 1),
+        ]
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "Z",
+            "tradingProcess": "3L12",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_fkzbjygg", user="1131").start()
+

+ 100 - 0
a_gdsggzyjypt_fkzbjygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_fkzbjygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 139 - 0
a_gdsggzyjypt_fkzbjygg/其他交易-复垦指标交易公告-列表页.py

@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('复垦指标交易公告', 'a_gdsggzyjypt_fkzbjygg', 1),
+        ]
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "Z",
+            "tradingProcess": "3L11",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_fkzbjygg", user="1134").start()
+

+ 163 - 0
a_gdsggzyjypt_fkzbjygg/其他交易-复垦指标交易公告-详情页.py

@@ -0,0 +1,163 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=10)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_fkzbjygg").start()

+ 100 - 0
a_gdsggzyjypt_gycq_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_gycq_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 137 - 0
a_gdsggzyjypt_gycq_jggg/国有产权-结果公告-列表页.py

@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import json
+from collections import namedtuple
+
+import feapder
+from items.spider_item import MgpListItem
+
+from gd_utils import *
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('国有产权-结果公告', 'a_gdsggzyjypt_gycq_jggg', 5),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "C",
+            "tradingProcess": "3B42,701,753,2B1D,2B1B,2B2A,2B2C,2B37,2B39",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_gycq_jygg", user="1126").start()
+

+ 162 - 0
a_gdsggzyjypt_gycq_jggg/国有产权-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=30)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_gycq_jygg").start()

+ 100 - 0
a_gdsggzyjypt_hlzy_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_hlzy_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_hlzy_jggg/河流资源-结果公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('河流资源-结果公告', 'a_gdsggzyjypt_hlzy_jggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "N",
+            "tradingProcess": "1801,3I14,2I13,2I14",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_hlzy_jygg",user="gdsggzyjypt_hlzy_jggg").start()
+

+ 162 - 0
a_gdsggzyjypt_hlzy_jggg/河流资源-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_hlzy_jygg").start()

+ 100 - 0
a_gdsggzyjypt_hlzy_jygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_hlzy_jygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_hlzy_jygg/河流资源-交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('河流资源-交易公告', 'a_gdsggzyjypt_hlzy_jygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "N",
+            "tradingProcess": "2I11,1800,3I11",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_hlzy_jygg",user="gdsggzyjypt_hlzy_jygg").start()
+

+ 162 - 0
a_gdsggzyjypt_hlzy_jygg/河流资源-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_hlzy_jygg").start()

+ 100 - 0
a_gdsggzyjypt_hyzy_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_hyzy_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_hyzy_jggg/海洋资源-结果公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('海洋资源-结果公告', 'a_gdsggzyjypt_hyzy_jggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "S",
+            "tradingProcess": "2I13,2I14,3I14,1801",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_hyzy_jygg",user="gdsggzyjypt_hyzy_jggg").start()
+

+ 162 - 0
a_gdsggzyjypt_hyzy_jggg/海洋资源-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_hyzy_jygg").start()

+ 100 - 0
a_gdsggzyjypt_hyzy_jygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_hyzy_jygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_hyzy_jygg/海洋资源-交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('海洋资源-交易公告', 'a_gdsggzyjypt_hyzy_jygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "S",
+            "tradingProcess": "1800,2I11,3I11",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_hyzy_jygg",user="gdsggzyjypt_hyzy_jygg").start()
+

+ 162 - 0
a_gdsggzyjypt_hyzy_jygg/海洋资源-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_hyzy_jygg").start()

+ 100 - 0
a_gdsggzyjypt_kysyq_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_kysyq_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_kysyq_jggg/空域使用权-结果公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('空域使用权-结果公告', 'a_gdsggzyjypt_kysyq_jggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "O",
+            "tradingProcess": "2I13,2I14,3I14,1801",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_kysyq_jygg",user="gdsggzyjypt_kysyq_jggg").start()
+

+ 162 - 0
a_gdsggzyjypt_kysyq_jggg/空域使用权-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_kysyq_jygg").start()

+ 100 - 0
a_gdsggzyjypt_kysyq_jygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_kysyq_jygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_kysyq_jygg/空域使用权-交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('空域使用权-交易公告', 'a_gdsggzyjypt_kysyq_jygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "O",
+            "tradingProcess": "2I11,1800,3I11",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_kysyq_jygg",user="gdsggzyjypt_kysyq_jygg").start()
+

+ 162 - 0
a_gdsggzyjypt_kysyq_jygg/空域使用权-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_kysyq_jygg").start()

+ 100 - 0
a_gdsggzyjypt_lqjy_xxpl/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_lqjy_xxpl/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 139 - 0
a_gdsggzyjypt_lqjy_xxpl/林权交易-信息披露-列表页.py

@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('林权交易-信息披露', 'a_gdsggzyjypt_lqjy_xxpl', 1),
+        ]
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "G",
+            "tradingProcess": "3D11,3D14,2D11,2D14,2D1D,1200,1202",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_lqjy_xxpl",user="gdsggzyjypt_lqjy_xxpl").start()
+

+ 162 - 0
a_gdsggzyjypt_lqjy_xxpl/林权交易-信息披露-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_lqjy_xxpl").start()

+ 100 - 0
a_gdsggzyjypt_pwq_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_pwq_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_pwq_jggg/排污权-结果公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('排污权-结果公告', 'a_gdsggzyjypt_pwq_jggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "F",
+            "tradingProcess": "3E13,3E16,2E16,2E17,1050,1051",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_pwq_jygg",user="gdsggzyjypt_pwq_jggg").start()
+

+ 159 - 0
a_gdsggzyjypt_pwq_jggg/排污权-详情页.py

@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=20)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                  deal_detail=item.get("deal_detail"), **request_params,
+                                  callback='parse')
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_pwq_jygg").start()

+ 100 - 0
a_gdsggzyjypt_pwq_jygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_pwq_jygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_pwq_jygg/排污权-交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('排污权-交易公告', 'a_gdsggzyjypt_pwq_jygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "F",
+            "tradingProcess": "2E11,2E18,2E19,1050,3E13",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_pwq_jygg",user="gdsggzyjypt_pwq_jygg").start()
+

+ 159 - 0
a_gdsggzyjypt_pwq_jygg/排污权-详情页.py

@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_list = self.get_tasks_by_rabbitmq(limit=20)
+        for item in data_list:
+            request_params = item.get("request_params")
+            yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                  deal_detail=item.get("deal_detail"), **request_params,
+                                  callback='parse')
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_pwq_jygg").start()

+ 100 - 0
a_gdsggzyjypt_sfsszc_jggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_sfsszc_jggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 139 - 0
a_gdsggzyjypt_sfsszc_jggg/涉法涉诉资产-结果公告-列表页.py

@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import json
+from collections import namedtuple
+
+import feapder
+from items.spider_item import MgpListItem
+
+from gd_utils import *
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('涉法涉诉资产-结果公告', 'a_gdsggzyjypt_sfsszc_jggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "M",
+            "tradingProcess": "3I14,1801,2I13,2I14",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_sfsszc_jygg",user="gdsggzyjypt_sfsszc_jggg").start()
+

+ 162 - 0
a_gdsggzyjypt_sfsszc_jggg/涉法涉诉资产-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_sfsszc_jygg").start()

+ 100 - 0
a_gdsggzyjypt_sfsszc_jygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_sfsszc_jygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 140 - 0
a_gdsggzyjypt_sfsszc_jygg/涉法涉诉资产-交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('涉法涉诉资产-交易公告', 'a_gdsggzyjypt_sfsszc_jygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "M",
+            "tradingProcess": "3I11,2I11,1800",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_sfsszc_jygg",user="gdsggzyjypt_sfsszc_jygg").start()
+

+ 162 - 0
a_gdsggzyjypt_sfsszc_jygg/涉法涉诉资产-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-25
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=20)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+                
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_sfsszc_jygg").start()

+ 100 - 0
a_gdsggzyjypt_tdky_kyqjggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_tdky_kyqjggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 162 - 0
a_gdsggzyjypt_tdky_kyqjggg/土地矿业-矿业权-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=30)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_tdky_kyqjygg").start()

+ 142 - 0
a_gdsggzyjypt_tdky_kyqjggg/土地矿业-矿业权交易结果公告-列表页.py

@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('土地矿业-矿业权结果公告', 'a_gdsggzyjypt_tdky_kyqjggg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "B",
+            "tradingProcess": "652,653,3A41,3A51,2A18,2A1E",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+            pubServicePlat = info.get('pubServicePlat')
+            noticeSecondTypeDesc = info.get('noticeSecondTypeDesc')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_tdky_kyqjygg", user="1125").start()
+

+ 100 - 0
a_gdsggzyjypt_tdky_kyqjygg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_tdky_kyqjygg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

+ 162 - 0
a_gdsggzyjypt_tdky_kyqjygg/土地矿业-矿业权-详情页.py

@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from feapder.network.selector import Selector
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+from untils.tools import extract_file_type
+
+from gd_utils import *
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        while True:
+            data_list = self.get_tasks_by_rabbitmq(limit=30)
+            for item in data_list:
+                request_params = item.get("request_params")
+                yield feapder.Request(url=item.get("parse_url"), item=item,proxies=False,
+                                      deal_detail=item.get("deal_detail"), **request_params,
+                                      callback='parse')
+
+            break
+
+    def download_midware(self, request):
+        en_str = get_enstr(request.params)
+        request.proxies = get_proxy(socks5h=True)
+
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+        attachments = {}
+        items = request.item
+        list_item = DataBakItem(**items)
+
+        detail_info = response.json.get('data').get('tradingNoticeColumnModelList')
+        ggxx_info = detail_info[0].get('multiKeyValueTableList')[0]
+
+        tphtml = ""
+        if ggxx_info:
+            for gd in ggxx_info:
+                temps = f'''
+                    <tr>
+                        <th colspan="1"><span>{gd.get('key')}</span></th>
+                        <td colspan="3"><span>{gd.get('value', '无')}</span>
+                        </td>
+                    </tr>
+                    '''
+                tphtml += temps
+
+        ggxx_html = f'''
+            <section>
+                <h2 id="公告信息" class="subtitle">公告信息</h2>
+                <div class="mt-2">
+                    <div>
+                        <div>
+                            <table>
+                                <tbody>
+                                {tphtml}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
+                </div>
+            </section>
+            '''
+
+        ggnr_html = detail_info[1].get('richtext') or ""
+        try:
+            f_list = detail_info[-1].get('noticeFileBOList')
+        except:
+            f_list = None
+        if f_list:
+            ff_html = ""
+            index = 1
+            for f in f_list:
+                f_id = f.get('rowGuid')
+                version = "".join(re.findall('new/jygg/(.*?)/',list_item.href))
+                f_url = f"https://ygp.gdzwfw.gov.cn/ggzy-portal/base/sys-file/download/{version}/{f_id}"
+                f_name = f.get('fileName').strip()
+                temp = f'''
+                <li>
+                    <span>附件名称 {index}</span>
+                    <div>
+                        <div>
+                            <a href="{f_url}">{f_name}</a>
+                        </div>
+                    </div>
+                </li>
+                '''
+                index += 1
+                ff_html += temp
+                f_type = extract_file_type(f_name, f_url)
+                if f_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=f_name, file_type=f_type, download_url=f_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+            file_html = f'''
+            <div class="fileList">
+                <h2 id="相关附件" class="subtitle">相关附件</h2>
+                <ul>
+                    {ff_html}
+                </ul>
+            </div>
+            '''
+        else:
+            file_html = ""
+
+        list_item.contenthtml = ggxx_html + ggnr_html + file_html
+
+        iframe_url = Selector(ggnr_html).xpath('//iframe/@src').extract_first()
+
+        fm_type = extract_file_type('公告内容', iframe_url)
+        if fm_type:
+            attachmentf = AttachmentDownloader().fetch_attachment(
+                file_name='公告内容', file_type=fm_type, download_url=iframe_url,
+                proxies=request.proxies)
+            attachments[str(len(attachments) + 1)] = attachmentf
+
+        file_list = Selector(ggnr_html).xpath('//a[@href]')
+        if file_list:
+            for info in file_list:
+                file_name = "".join(info.xpath('.//text()').extract()).strip()
+                file_url = info.xpath('./@href').extract_first()
+                file_type = extract_file_type(file_name,file_url)
+                if file_type:
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url,
+                        proxies=request.proxies)
+                    attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            list_item.projectinfo = {"attachments": attachments}
+
+        yield list_item
+
+
+if __name__ == '__main__':
+    Details(redis_key="lzz:gdsggzyjypt_tdky_kyqjygg").start()

+ 140 - 0
a_gdsggzyjypt_tdky_kyqjygg/土地矿业-矿业权交易公告-列表页.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: 广东省公共资源交易平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import MgpListItem
+from collections import namedtuple
+from gd_utils import *
+import json
+
+
+
+
+
+class Feapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.site = "广东省公共资源交易平台"
+
+        self.menus = [
+            Menu('土地矿业-矿业权交易公告', 'a_gdsggzyjypt_tdky_kyqjygg', 1),
+        ]
+
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/search/v2/items"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1)
+
+    def download_midware(self, request):
+        page = request.page
+        data = {
+            "type": "trading-type",
+            "openConvert": False,
+            "keyword": "",
+            "siteCode": "44",
+            "secondType": "B",
+            "tradingProcess": "650,651,654,655,656,657,658,659,660,2A11,2A1C,3A21,3A31,3A61,3A71,3A81,3A91,3AA1,3AB1,3AC1",
+            "thirdType": "[]",
+            "projectType": "",
+            "publishStartTime": "",
+            "publishEndTime": "",
+            "pageNo": page,
+            "pageSize": 10
+        }
+        en_str = get_enstr(data)
+        data = json.dumps(data, separators=(',', ':'))
+        request.data = data
+        request.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json",
+            "Origin": "https://ygp.gdzwfw.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+            "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+            "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+            "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+            "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+        }
+
+
+    def parse(self, request, response):
+
+        menu = request.item
+        info_list = response.json.get('data').get('pageData')
+        for info in info_list:
+            noticeSecondType = info.get('noticeSecondType')
+            edition = info.get('edition')
+            noticeId = info.get('noticeId')
+            projectCode = info.get('projectCode')
+            tradingProcess = info.get('tradingProcess')
+            siteCode = info.get('regionCode')
+            publishDate = info.get('publishDate')
+
+            params = {
+                "siteCode": f"{siteCode}",
+                "tradingType": f"{noticeSecondType}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}"
+            }
+
+            nodeId = get_nodeId(params,proxies=request.proxies()).get(noticeId)
+            info['nodeId'] = nodeId
+
+            href = create_href(info)
+            title = info.get('noticeTitle').strip()
+            create_time = deal_time(publishDate)
+            regionName = info.get('regionName','').strip()
+
+            area = "广东"
+            city = regionName
+
+            list_item = MgpListItem()         # 存储数据的管道
+            list_item.href = href             # 标书链接
+            list_item.unique_key = ('href',)
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title           # 标题
+            list_item.site = self.site
+            list_item.publishtime = create_time
+            list_item.area = area         # 城市默认:全国
+            list_item.city = city         # 城市 默认为空
+
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+
+            dparams = {
+                "nodeId": f"{nodeId}",
+                "version": f"{edition}",
+                "tradingType": f"{noticeSecondType}",
+                "noticeId": f"{noticeId}",
+                "bizCode": f"{tradingProcess}",
+                "projectCode": f"{projectCode}",
+                "siteCode": f"{siteCode}"
+            }
+            list_item.request_params = {"params":dparams}
+            list_item.deal_detail = []  # 抽取正文xpath
+            list_item.proxies = False
+            list_item.parse_url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/detail"
+
+
+            yield list_item
+
+        # 无限翻页
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    Feapder(redis_key="lzz:gdsggzyjypt_tdky_kyqjygg", user="1127").start()
+

+ 100 - 0
a_gdsggzyjypt_tdky_tdsyqjggg/gd_utils.py

@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-01-04
+---------
+@summary: utils
+---------
+@author: Lzz
+"""
+from urllib import parse
+import execjs
+import requests
+from untils.tools import get_proxy
+
+
+def get_nodeId(params,proxies=False):
+    proxy = proxies
+    en_str = get_enstr(params)
+    headers = {
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "zh-CN,zh;q=0.9",
+        "Cache-Control": "no-cache",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Referer": "https://ygp.gdzwfw.gov.cn/ggzy-portal/",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
+        "X-Dgi-Req-App": en_str.get('X-Dgi-Req-App'),
+        "X-Dgi-Req-Nonce": en_str.get('X-Dgi-Req-Nonce'),
+        "X-Dgi-Req-Signature": en_str.get('X-Dgi-Req-Signature'),
+        "X-Dgi-Req-Timestamp": en_str.get('X-Dgi-Req-Timestamp'),
+    }
+
+    url = "https://ygp.gdzwfw.gov.cn/ggzy-portal/center/apis/trading-notice/new/nodeList"
+    retry = 0
+    while retry < 3:
+        try:
+            res = requests.get(url, headers=headers, params=params, proxies=proxy, timeout=60, verify=False)
+            nodeId_info = res.json().get('data')
+            nodeId_dict = {}
+            for nd in nodeId_info:
+                dsList = nd.get('dsList')
+                for i in dsList:
+                    for k, v in i.items():
+                        for child in v:
+                            nodeId_dict[child] = nd.get('nodeId')
+                nodeId_dict[nd.get('noticeId')] = nd.get('nodeId')
+            if nodeId_dict:
+                break
+            proxy = get_proxy()
+            retry += 1
+        except:
+            retry += 1
+
+    return nodeId_dict
+
+
+
+
+def get_enstr(data):
+
+    p_list = []
+    for key, value in data.items():
+        if str(value) == "False":
+            value = "false"
+        p_list.append(f"{key}={value}")
+    p_str = parse.quote("&".join(p_list), safe="&=")
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('get_pm',p_str)
+
+    return pm
+
+
+def create_href(data):
+
+    with open('./gdsggzyjypt_encrypt.js','r') as fr:
+        ex_js = fr.read()
+    ctx = execjs.compile(ex_js)
+    pm = ctx.call('create_href',data)
+
+    return pm
+
+def deal_time(tm):
+    if tm and len(tm) == 8:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8]
+    elif tm and len(tm) == 14:
+        pbtime = tm[:4] + "-" + tm[4:6] + "-" + tm[6:8] + " " + tm[8:10] + ":" + tm[10:12] + ":" + tm[12:]
+    else:
+        pbtime = tm
+
+    return pbtime
+
+
+
+
+
+
+
+
+

+ 111 - 0
a_gdsggzyjypt_tdky_tdsyqjggg/gdsggzyjypt_encrypt.js

@@ -0,0 +1,111 @@
+const jsdom = require("jsdom");
+const {JSDOM} = jsdom;
+const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`, {
+    url: "https://example.org/",
+    referrer: "https://example.com/",
+    contentType: "text/html",
+});
+window = dom.window;
+document = window.document;
+location = window.location;
+navigator = window.navigator
+
+CryptoJS = require('crypto-js')
+
+function get_pm(req_pm){
+
+    const sF = "zxcvbnmlkjhgfdsaqwertyuiop0987654321QWERTYUIOPLKJHGFDSAZXCVBNM"
+      , ine = sF + "-@#$%^&*+!";
+
+    function qu(e=[]) {
+        return e.map(t=>ine[t]).join("")
+    }
+    const  a = Date.now()
+      , l = ane(16)
+      , c = qu([8, 28, 20, 42, 21, 53, 65, 6])
+      , d = {
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 54, 25, 25])]: qu([11, 11, 0, 21, 62, 25, 24, 19, 20, 15, 7]),
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 60, 24, 5, 2, 18])]: l,
+        [qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 40, 23, 6, 18, 14, 20, 15, 6, 25])]: a.toString()
+    }
+
+    function sne(e, t) {
+        switch (arguments.length) {
+        case 1:
+            return parseInt(Math.random() * e + 1, 10);
+        case 2:
+            return parseInt(Math.random() * (t - e + 1) + e, 10);
+        default:
+            return 0
+        }
+    }
+
+    function ane(e) {
+        return [...Array(e)].map(()=>sF[sne(0, 61)]).join("")
+    }
+
+
+    function lne(e) {
+        let t = "";
+        return typeof e == "object" ? t = Object.keys(e).map(n=>`${n}=${e[n]}`).sort().join("&") : typeof e == "string" && (t = e.split("&").sort().join("&")),
+        t
+    }
+
+    function $g(e={}) {
+        const {p: t, t: n, n: u, k: o} = e
+          , r = lne(t);
+        return CryptoJS.SHA256(u + o + decodeURIComponent(r) + n).toString()
+    }
+
+
+    p = $g({
+        p: req_pm,
+        t: a,
+        n: l,
+        k: c
+    })
+
+    d[[qu([56, 62, 52, 11, 23, 62, 39, 18, 16, 62, 53, 23, 11, 5, 15, 20, 22, 19, 18])]] = p
+
+    return d
+
+}
+
+
+
+function create_href(e) {
+    const t = e.pubServicePlat
+        , n = e.noticeSecondTypeDesc;
+    if (e.edition === "v0") {
+        const pm = {
+                source: t,
+                titleDetails: n
+            },
+            b_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/jygg/v0/${e.noticeId}`;
+
+        const sParams = new URLSearchParams(pm);
+        const mUrl = `${b_url}?${sParams.toString()}`;
+
+        return mUrl
+    }
+
+    const c = {
+        noticeId: e.noticeId,
+        projectCode: e.projectCode,
+        bizCode: e.tradingProcess,
+        siteCode: e.regionCode,
+        publishDate: e.publishDate,
+        source: t,
+        titleDetails: n,
+        classify: e.projectType
+    }
+        , base_url = `https://ygp.gdzwfw.gov.cn/ggzy-portal/#/44/new/jygg/${e.edition}/${e.noticeSecondType}`;
+
+    const searchParams = new URLSearchParams(c);
+    const mergedUrl = `${base_url}?${searchParams.toString()}`;
+
+    return mergedUrl
+}
+
+
+

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio