dzr il y a 2 semaines
Parent
commit
1a931e1a35

+ 44 - 0
hb_hbzwfww_npzgs_njpc/utils.py

@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-16 
+---------
+@summary:  
+---------
+@author: Dzr
+"""
+
+import math
+import random
+import string
+import time
+
+
+def hex36(num: int) -> str:
+    if num == 0:
+        return '0'
+
+    alphabet = string.digits + string.ascii_lowercase
+    base36 = ''
+    while num:
+        num, i = divmod(num, 36)
+        base36 = alphabet[i] + base36
+    return base36
+
+
+def token():
+    """36进制字符串"""
+    '''
+        function token() {
+            var Ie = 911
+                , Pe = 20170706
+                , Fe = 1e9;
+            extoken = (n = Math.floor(10 * Math.random()) * Fe,
+                (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
+            return extoken
+        }
+    '''
+    ie = 911
+    pe = 20170706
+    fe = 10 ** 9  # 1e9
+    n = math.floor(10 * random.random()) * fe
+    return hex36((int(time.time() * 1000) % fe + n) * ie + pe)

+ 12 - 26
hb_hbzwfww_npzgs_njpc/湖北政务服务网-拟批准公示-列表页.py

@@ -6,30 +6,16 @@ Created on 2025-02-15
 ---------
 @author: lzz
 """
-import feapder
-from items.njpc_item import NjpcListItem
-from collections import namedtuple
 import json
-import execjs
-
+from collections import namedtuple
 
-def token():
-    ex_js = '''
-    function token() {
-        var Ie = 911
-            , Pe = 20170706
-            , Fe = 1e9;
-        extoken = (n = Math.floor(10 * Math.random()) * Fe,
-            (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
+import feapder
+from items.njpc_item import NjpcListItem
 
-        return extoken
-    }
-    '''
-    ctx = execjs.compile(ex_js)
-    return ctx.call('token')
+from utils import token
 
 
-class NjpcSpider(feapder.PlanToBuildListSpider):
+class Spider(feapder.PlanToBuildListSpider):
 
     def start_callback(self):
         Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
@@ -100,9 +86,10 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
         info_list = response.json.get('data').get('list')
         for info in info_list:
             approvecode = info.get('pro_code') or info.get('_id')
-            detail_href = "https://tzxm.hubei.gov.cn/xxgk?" + approvecode
             projectname = info.get('pro_name').strip()
             publish_time = info.get('start_date').strip()
+            url = "/".join(["https://tzxm.hubei.gov.cn/xxgk", publish_time, approvecode])
+            detail_href = f"{url}?q={menu['channel']}"
 
             area = "湖北"    # 省份
             city = ""      # 城市
@@ -110,7 +97,7 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
 
             data_item = NjpcListItem()  # 存储数据的管道
             data_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
-            data_item.unique_key = ('href', publish_time,'projectname')
+            data_item.unique_key = ('href', publish_time, 'projectname')
             data_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
             data_item.projectname = projectname      # 项目名称
             data_item.publishtime = publish_time     # 发布时间
@@ -122,14 +109,13 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
             data_item.district = district  # 城市 默认为空
             data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/HP_GS_info/customShow"
             data_item.href = detail_href  # 详情链接
-            ddata = {
+            json_str = json.dumps({
                 "_id": info.get('_id'),
                 "screenKey": "SCR_l5oxx6va7f",
                 "switchLoginRequired": "off"
-            }
-            ddata = json.dumps(ddata)
+            })
             data_item.request_params = {
-                "data": ddata,
+                "data": json_str,
                 "method": "POST"
             }
             data_item.parser = "detail_get"
@@ -140,4 +126,4 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
 
 
 if __name__ == "__main__":
-    NjpcSpider(redis_key="lzz:hbzwfww_npzgs").start()
+    Spider(redis_key="lzz:hbzwfww_npzgs").start()

+ 13 - 24
hb_hbzwfww_npzgs_njpc/湖北政务服务网-受理-拟批准-详情页.py → hb_hbzwfww_npzgs_njpc/湖北政务服务网-拟批准公示-详情页.py

@@ -2,34 +2,19 @@
 """
 Created on 2025-02-15
 ---------
-@summary: 湖北政务服务网-受理公告-拟批准公示
+@summary: 湖北政务服务网-受理公告/拟批准公示
 ---------
 @author: lzz
 """
-import execjs
 import feapder
 from items.njpc_item import DataNjpcItem
 from untils.attachment import AttachmentDownloader
 from untils.tools import search, extract_file_type
 
+from utils import token
 
-def token():
-    ex_js = '''
-    function token() {
-        var Ie = 911
-            , Pe = 20170706
-            , Fe = 1e9;
-        extoken = (n = Math.floor(10 * Math.random()) * Fe,
-            (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
 
-        return extoken
-    }
-    '''
-    ctx = execjs.compile(ex_js)
-    return ctx.call('token')
-
-
-class Details(feapder.PlanToBuildDetailSpider):
+class Spider(feapder.PlanToBuildDetailSpider):
 
     def start_requests(self):
         data_list = self.get_tasks_by_rabbitmq(limit=100)
@@ -66,20 +51,24 @@ class Details(feapder.PlanToBuildDetailSpider):
         file_html = ""
         if detail_info.get('environment_document'):
             attachments = {}
+            downloader = AttachmentDownloader()
             for info in detail_info.get('environment_document'):
                 file_url = info.get('downloadUrl')
-                file_name = info.get('name','').strip()
-                file_type = extract_file_type(file_name,file_url)
+                file_name = info.get('name', '').strip()
+                file_type = extract_file_type(file_name, file_url)
                 if file_type:
-                    attachment = AttachmentDownloader().fetch_attachment(
-                        file_name=file_name, file_type=file_type, download_url=file_url)
+                    attachment = downloader.fetch_attachment(
+                        file_name=file_name,
+                        file_type=file_type,
+                        download_url=file_url
+                    )
                     attachments[str(len(attachments) + 1)] = attachment
                 temp = f'''
                     <a href="{file_url}">{file_name}</a>
                 '''
                 file_html += temp
 
-            if attachments:
+            if len(attachments) > 0:
                 data_item.projectinfo = {"attachments": attachments}
 
         if detail_info.get('is_sl') == "A00002":
@@ -271,4 +260,4 @@ class Details(feapder.PlanToBuildDetailSpider):
 
 
 if __name__ == '__main__':
-    Details(redis_key="lzz:hbzwfww_npzgs").start()
+    Spider(redis_key="lzz:hbzwfww_npzgs").start()

+ 1 - 1
hb_hbzwfww_slgs_njpc/湖北政务服务网-受理公告-详情页.py

@@ -2,7 +2,7 @@
 """
 Created on 2025-02-15
 ---------
-@summary: 湖北政务服务网-受理公告-拟批准公示
+@summary: 湖北政务服务网-受理公告/拟批准公示
 ---------
 @author: lzz
 """