瀏覽代碼

代码提交

lizongze 8 月之前
父節點
當前提交
553c9351fe

+ 90 - 0
湖北/恩施州咸丰县公共资源交易监督管理局-土地交易信息-列表页.py

@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-11-26
+---------
+@summary: 恩施州咸丰县公共资源交易监督管理局
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import BidingListItem
+from collections import namedtuple
+import time
+
+
+class ZtbpcFeapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+
+        self.site = "恩施州咸丰县公共资源交易监督管理局"
+
+        Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
+
+        self.menus = [
+            Menu('土地交易信息', 'hb_eszxfxggzyjyjdglj_tdjyxx', 1),
+        ]
+
+        self.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Length": "0",
+            "Origin": "http://jypt.gtj.enshi.gov.cn",
+            "Pragma": "no-cache",
+            "Referer": "http://jypt.gtj.enshi.gov.cn/upload//ggjyzx/index.html?type=2&code=422826",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "http://jypt.gtj.enshi.gov.cn/api/message/findProcessNoticeByPage/2/11/422826/es_construction_land"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1, method="POST", proxies=False)
+
+    def download_midware(self, request):
+        page = request.page
+        url = f"http://jypt.gtj.enshi.gov.cn/api/message/findProcessNoticeByPage/{page}/11/422826/es_construction_land"
+        params = {
+            "currentmillisecond": f"{int(time.time() * 1000)}"
+        }
+        request.url = url
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('records')
+        for info in info_list:
+            title = info.get('msgNumber').strip()
+            hid = info.get('id')
+            href = f"http://jypt.gtj.enshi.gov.cn/upload//ggjyzx/detail.html?id={hid}"
+            publish_time = info.get('createDate').split('.')[0]
+
+            area = "湖北"
+            city = "恩施土家族苗族自治州"
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = publish_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area or "全国"  # 省份 默认:全国
+            list_item.city = city  # 城市 默认 为空
+
+            list_item.unique_key = ('href',)
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            list_item.deal_detail = ['//div[@class="info"]', '//div[@class="detail"]']
+            list_item.request_params = {"conn_html": True}
+            list_item.parse_url = href
+            list_item.infoformat = 3
+
+            yield list_item
+
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    ZtbpcFeapder(redis_key="detail:chrome").start()

+ 91 - 0
衡阳市公共资源交易服务平台/工程建设-列表页.py

@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-11-27
+---------
+@summary: 衡阳市公共资源交易服务平台
+---------
+@author: lzz
+"""
+import feapder
+from items.spider_item import BidingListItem
+from collections import namedtuple
+
+
+class ZtbpcFeapder(feapder.BiddingListSpider):
+
+    def start_callback(self):
+
+        self.site = "衡阳市公共资源交易服务平台"
+
+        Menu = namedtuple('Menu', ['channel', 'code', 'tid', 'crawl_page'])
+
+        self.menus = [
+            Menu('工程建设-住建工程', 'hn_hysggzyjyfwpt_gcjs_zjgc', 'CONSTRUCTION', 1),
+            Menu('工程建设-交通工程', 'hn_hysggzyjyfwpt_gcjs_jtgc', 'TRANSPORATATION', 1),
+            Menu('工程建设-水利工程', 'hn_hysggzyjyfwpt_gcjs_slgc', 'WATER_CONSERVANCY', 1),
+            Menu('工程建设-其他', 'hn_hysggzyjyfwpt_gcjs_qt', 'OTHER', 1),
+        ]
+
+        self.headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Referer": "https://hengyang.hnsggzy.com/gczb/index.jhtml",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+        }
+
+    def start_requests(self):
+        for menu in self.menus:
+            start_url = "https://hengyang.hnsggzy.com/tradeApi/constructionTender/listByFile"
+            yield feapder.Request(url=start_url, item=menu._asdict(), page=1, proxies=False)
+
+    def download_midware(self, request):
+        page = request.page
+        menu = request.item
+        params = {
+            "current": f"{page}",
+            "size": "10",
+            "regionCode": "430400",
+            "tenderProjectType": menu.get('tid')
+        }
+        request.params = params
+        request.headers = self.headers
+
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('records')
+        for info in info_list:
+            title = info.get('bidSectionName').strip()
+            hid = info.get('id')
+            regionCode = info.get('regionCode')
+            bidSectionId = info.get('bidSectionId')
+            href = f"https://hengyang.hnsggzy.com/gczb/index.jhtml#/resources/transactionDetail/{menu.get('tid').lower().replace('_c','C')}?id={hid}&regionCode={regionCode}&bidSectionId={bidSectionId}"
+            publish_time = info.get('noticeSendTime')
+
+            area = "湖南"
+            city = "衡阳市"
+
+            list_item = BidingListItem()  # 存储数据的管道
+            list_item.href = href  # 标书链接
+            list_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            list_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            list_item.title = title  # 标题
+            list_item.publishtime = publish_time  # 标书发布时间
+            list_item.site = self.site
+            list_item.area = area or "全国"  # 省份 默认:全国
+            list_item.city = city  # 城市 默认 为空
+
+            list_item.unique_key = ('href',)
+            list_item.parse = "self.detail_get"  # 详情页回调方法
+            list_item.parse_url = f"https://hengyang.hnsggzy.com/tradeApi/constructionTender/{hid}"
+
+            yield list_item
+
+        request = self.infinite_pages(request, response)
+        yield request
+
+
+if __name__ == "__main__":
+    ZtbpcFeapder(redis_key="lzz:hysggzyjyfwpt_gcjs_zjgc").start()

+ 386 - 0
衡阳市公共资源交易服务平台/工程建设-详情页.py

@@ -0,0 +1,386 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-11-27
+---------
+@summary: 深圳市公共资源交易公共服务平台
+---------
+@author: lzz
+"""
+import re
+
+import feapder
+from items.spider_item import DataBakItem
+from untils.attachment import AttachmentDownloader
+import requests
+import time
+
+headers = {
+    "Accept": "application/json, text/plain, */*",
+    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+    "Cache-Control": "no-cache",
+    "Connection": "keep-alive",
+    "Pragma": "no-cache",
+    "Referer": "https://hengyang.hnsggzy.com/gczb/index.jhtml",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+}
+
+
+def get_gid(hid, proxies=False):
+    url = f"https://hengyang.hnsggzy.com/tradeApi/constructionTender/getSection?fileId={hid}"
+    try:
+        response = requests.get(url, headers=headers, timeout=30, proxies=proxies, verify=False)
+        return response.json().get('data')[0].get('id')
+    except:
+        return ""
+
+
+def ggxx_info(hid, proxies=False):
+    url = f"https://hengyang.hnsggzy.com/tradeApi/constructionNotice/selectbyconstructionsectionid/{hid}"
+    try:
+        response = requests.get(url, headers=headers, timeout=30, proxies=proxies, verify=False)
+        dt = response.json().get('data').get('noticeList')[0]
+        if dt:
+            html = f'''
+            <table>
+                <tr>
+                    <th>公告标题</th>
+                    <td colspan="3">
+                        {dt.get('noticeName')}
+                    </td>
+                </tr>
+                <tr>
+                    <th>公告源URL</th>
+                    <td colspan="3">/</td>
+                </tr>
+                <tr>
+                    <th>公告类型</th>
+                    <td>{dt.get('bulletinName')}</td>
+                    <th>公告发布时间</th>
+                    <td>{dt.get('noticeSendTime')}</td>
+                </tr>
+                <tr>
+                    <th>公告发布责任人</th>
+                    <td>{dt.get('bulletinDuty')}</td>
+                    <th>招标文件/资格预审文件获取开始时间</th>
+                    <td>{dt.get('docGetStartTime')}</td>
+                </tr>
+                <tr>
+                    <th>招标文件/资格预审文件获取截止时间</th>
+                    <td colspan="3">{dt.get('docGetEndTime')}</td>
+                </tr>
+                <tr>
+                    <th>公告内容</th>
+                    <td colspan="3">
+                        <div>{dt.get('noticeContent')}</div>
+                    </td>
+                </tr>
+            </table>
+            '''
+            return html.replace('None', '')
+        else:
+            return ""
+    except:
+        return ""
+
+
+def gsxx_info(gid, proxies=False):
+    if not gid:
+        return ""
+    url = f"https://hengyang.hnsggzy.com/tradeApi/constructionNotice/selectWinningBidNotice/{gid}/0"
+    try:
+        response = requests.get(url, headers=headers, timeout=30, proxies=proxies, verify=False)
+        dt = response.json().get('data')[0]
+        if dt:
+            html = f'''
+            <table>
+                <tr>
+                    <th>公示标题</th>
+                    <td colspan="3">
+                        <div>
+                            {dt.get('publicityName')}
+                        </div>
+                    </td>
+                </tr>
+                <tr>
+                    <th>交易平台</th>
+                    <td>{dt.get('platform')}</td>
+                    <th>公示类型</th>
+                    <td>中标候选人公示</td>
+                </tr>
+                <tr>
+                    <th>公示发布责任人</th>
+                    <td>{dt.get('bulletinDuty')}</td>
+                    <th>公示发布时间</th>
+                    <td>{dt.get('publicityReferTime')}</td>
+                </tr>
+                <tr>
+                    <th>公示开始时间</th>
+                    <td>{dt.get('publicityStartTime')}</td>
+                    <th>公示结束时间</th>
+                    <td>{dt.get('publicityEndTime')}</td>
+                </tr>
+                <tr>
+                    <th>公示内容</th>
+                    <td colspan="3">
+                        <div>{dt.get('publicityContent')}</div>
+                    </td>
+                </tr>
+            </table>
+            '''
+            return html.replace('None', '')
+        else:
+            return ""
+    except:
+        return ""
+
+
+def jggs_info(gid, proxies=False):
+    if not gid:
+        return ""
+    url = f"https://hengyang.hnsggzy.com/tradeApi/constructionNotice/selectWinningBidNotice/{gid}/1"
+    try:
+        response = requests.get(url, headers=headers, timeout=30, proxies=proxies, verify=False)
+        dt = response.json().get('data')[0]
+        if dt:
+            html = f'''
+            <table>
+                <tr>
+                    <th>公告标题</th>
+                    <td colspan="3">
+                        {dt.get('publicityName')}
+                    </td>
+                </tr>
+                <tr>
+                    <th>交易平台</th>
+                    <td colspan="3">{dt.get('platform')}</td>
+                </tr>
+                <tr>
+                    <th>公告类型</th>
+                    <td>中标结果公告</td>
+                    <th>公示发布时间</th>
+                    <td>{dt.get('publicityReferTime')}</td>
+                </tr>
+                <tr>
+                    <th>公告内容</th>
+                    <td colspan="3">
+                        <div>{dt.get('publicityContent')}</div>
+                    </td>
+                </tr>
+            </table>
+            '''
+            return html.replace('None', '')
+        else:
+            return ""
+    except:
+        return ""
+
+
+class Details(feapder.BiddingDetailSpider):
+
+    def start_requests(self):
+        data_lsit = self.get_tasks_by_rabbitmq(limit=10)
+        for item in data_lsit:
+            request_params = item.get("request_params")
+            timeout = request_params.get('timeout', 20)
+            request_params.pop('timeout', None)
+            yield feapder.Request(url=item.get("parse_url"), item=item,
+                                  deal_detail=item.get("deal_detail"), callback=eval(item.get("parse")),
+                                  **request_params, timeout=timeout, proxies=False)
+
+    def download_midware(self, request):
+        request.headers = headers
+
+    def detail_get(self, request, response):
+        yield_list = []
+        items = request.item
+        list_item = DataBakItem(**items)
+        old_title = list_item.title
+        old_href = list_item.href
+
+        hid = "".join(re.findall("\?id=(.*?)&", list_item.href))
+
+        dt = response.json.get('data')
+        cProject = dt.get('constructionProject')
+        cSectionList = dt.get('constructionSectionList', [])
+        cTender = dt.get('constructionTender')
+
+        isMajorProject = "是" if str(cTender.get('isMajorProject', '')) == "1" else "否"
+        bdinfo_html = ""
+        index = 1
+        for bd in cSectionList:
+            temp = f'''
+            <tr>
+                <td rowspan="1" colspan="1">
+                    <div class="cell">
+                        <div>{index}</div>
+                    </div>
+                </td>
+                <td rowspan="1" colspan="1">
+                    <div class="cell">{bd.get('bidSectionCode')}</div>
+                </td>
+                <td rowspan="1" colspan="1">
+                    <div class="cell">{bd.get('bidSectionName')}</div>
+                </td>
+                <td rowspan="1" colspan="1">
+                    <div class="cell"><span> {bd.get('contractReckonPrice')} 万元</span>
+                    </div>
+                </td>
+            </tr>
+            '''
+            index += 1
+            bdinfo_html += temp
+
+        html = f'''
+        <div>
+            <div>项目信息</div>
+            <table>
+                <tr>
+                    <th>项目名称</th>
+                    <td colspan="3">{cProject.get('projectName')}</td>
+                </tr>
+                <tr>
+                    <th>项目编号</th>
+                    <td>{cProject.get('projectCode')}</td>
+                    <th>项目所在行政区</th>
+                    <td>{cProject.get('regionCode')}</td>
+                </tr>
+                <tr>
+                    <th>项目行业分类</th>
+                    <td>{cProject.get('industriesType')}</td>
+                    <th>资金来源</th>
+                    <td>{cProject.get('fundSource')}</td>
+                </tr>
+                <tr>
+                    <th>投资项目统一代码</th>
+                    <td colspan="3">{cProject.get('investProjectCode')}</td>
+                </tr>
+            </table>
+            <div>招标项目信息</div>
+            <table>
+                <tr>
+                    <th>招标项目编号</th>
+                    <td>{cTender.get('tenderProjectCode')}</td>
+                    <th>招标项目名称</th>
+                    <td>{cTender.get('tenderProjectName')}</td>
+                </tr>
+                <tr>
+                    <th>招标项目类型</th>
+                    <td>{cTender.get('tenderProjectType')}</td>
+                    <th>项目业主名称</th>
+                    <td>{cTender.get('ownerName')}</td>
+                </tr>
+                <tr>
+                    <th>招标人名称</th>
+                    <td>{cTender.get('tendererName')}</td>
+                    <th>招标代理机构名称</th>
+                    <td>{cTender.get('tenderAgencyName')}</td>
+                </tr>
+                <tr></tr>
+                <tr>
+                    <th>招标方式</th>
+                    <td>{cTender.get('tenderMode')}</td>
+                    <th>招标组织形式</th>
+                    <td>{cTender.get('tenderOrganizeForm')}</td>
+                </tr>
+                <tr>
+                    <th>是否重大项目</th>
+                    <td colspan="3">{isMajorProject}</td>
+                </tr>
+                <tr>
+                    <th>招标内容与范围</th>
+                    <td colspan="3">
+                        {cTender.get('tenderContent')}
+                    </td>
+                </tr>
+            </table>
+            <div>标段信息</div>
+            <div style="width: 100%;">
+                <div class="hidden-columns">
+                    <div></div>
+                    <div></div>
+                    <div></div>
+                    <div></div>
+                    <div></div>
+                    <div></div>
+                </div>
+                <div>
+                    <table style="width: 1200px;">
+                        <colgroup>
+                            <col width="60">
+                            <col width="465">
+                            <col width="464">
+                            <col width="120">
+                        </colgroup>
+                        <thead>
+                        <tr class="">
+                            <th colspan="1" rowspan="1">
+                                <div class="cell">序号</div>
+                            </th>
+                            <th colspan="1" rowspan="1">
+                                <div class="cell">标段(包)编号</div>
+                            </th>
+                            <th colspan="1" rowspan="1">
+                                <div class="cell">标段(包)名称</div>
+                            </th>
+                            <th colspan="1" rowspan="1">
+                                <div class="cell">标段合同估算价</div>
+                            </th>
+                        </tr>
+                        </thead>
+                    </table>
+                </div>
+                <div>
+                    <table cellspacing="0" cellpadding="0" border="0" style="width: 1200px;">
+                        <colgroup>
+                            <col width="60">
+                            <col width="465">
+                            <col width="464">
+                            <col width="120">
+                        </colgroup>
+                        <tbody>
+                        {bdinfo_html}
+                        </tbody>
+                    </table>
+                </div>
+            </div>
+        </div>
+        '''
+        list_item.contenthtml = html.replace('None', '')
+        list_item.href = old_href + f"&{int(time.time() * 1000)}"
+        list_item.title = old_title + f"_项目信息"
+
+        attachments = {}
+
+        file_url = f"https://hengyang.hnsggzy.com/tradeApi/attach/proxy/download?zhaoBiaoFileId={hid}&region=430400&jytype=CONSTRUCTION"
+
+        attachment = AttachmentDownloader().fetch_attachment(
+            file_name="招标文件", file_type="pdf", download_url=file_url)
+
+        if attachment.__contains__("fid"):
+            attachments[str(len(attachments) + 1)] = attachment
+            list_item.projectinfo = {"attachments": attachments}
+        yield_list.append(list_item)
+
+        ggxx_html = ggxx_info(hid)
+        gid = get_gid(hid)
+        gsxx_html = gsxx_info(gid)
+        jggs_html = jggs_info(gid)
+        hh_list = {"公告信息": ggxx_html, "公示信息": gsxx_html, "结果公示": jggs_html}
+        for chan, html in hh_list.items():
+            if html:
+                itemsc = request.item
+                list_itemc = DataBakItem(**itemsc)
+
+                list_itemc.href = old_href + f"&{int(time.time() * 1000)}"
+                list_itemc.title = old_title + f"_{chan}"
+
+                list_itemc.contenthtml = html
+                yield_list.append(list_itemc)
+
+        for yd in yield_list:
+            yd.is_check_text = False
+            yield yd
+
+
+if __name__ == "__main__":
+    Details(redis_key="lzz:hysggzyjyfwpt_gcjs_zjgc").start()