dzr пре 2 недеља
родитељ
комит
4a57f55293

+ 44 - 0
hb_hbzwfww_pfgg_njpc/utils.py

@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-16 
+---------
+@summary:  
+---------
+@author: Dzr
+"""
+
+import math
+import random
+import string
+import time
+
+
+def hex36(num: int) -> str:
+    if num == 0:
+        return '0'
+
+    alphabet = string.digits + string.ascii_lowercase
+    base36 = ''
+    while num:
+        num, i = divmod(num, 36)
+        base36 = alphabet[i] + base36
+    return base36
+
+
+def token():
+    """36进制字符串"""
+    '''
+        function token() {
+            var Ie = 911
+                , Pe = 20170706
+                , Fe = 1e9;
+            extoken = (n = Math.floor(10 * Math.random()) * Fe,
+                (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
+            return extoken
+        }
+    '''
+    ie = 911
+    pe = 20170706
+    fe = 10 ** 9  # 1e9
+    n = math.floor(10 * random.random()) * fe
+    return hex36((int(time.time() * 1000) % fe + n) * ie + pe)

+ 51 - 71
hb_hbzwfww_pfgg_njpc/湖北政务服务网-批复公告-列表页.py

@@ -9,28 +9,13 @@ Created on 2025-02-15
 import json
 from collections import namedtuple
 
-import execjs
 import feapder
 from items.njpc_item import NjpcListItem
 
+from utils import token
 
-def token():
-    ex_js = '''
-    function token() {
-        var Ie = 911
-            , Pe = 20170706
-            , Fe = 1e9;
-        extoken = (n = Math.floor(10 * Math.random()) * Fe,
-            (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
 
-        return extoken
-    }
-    '''
-    ctx = execjs.compile(ex_js)
-    return ctx.call('token')
-
-
-class NjpcSpider(feapder.PlanToBuildListSpider):
+class Spider(feapder.PlanToBuildListSpider):
 
     def start_callback(self):
         Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
@@ -44,59 +29,9 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
         for menu in self.menus:
             yield feapder.Request(url=url, item=menu._asdict(), page=1)
 
-    def parse(self, request, response):
-        menu = request.item
-        info_list = response.json.get('data').get('list')
-        for info in info_list:
-            approvecode = info.get('project_code') or info.get('_id')
-            detail_href = "https://tzxm.hubei.gov.cn/xxgk" + approvecode
-            projectname = info.get('project_name').strip()
-            publish_time = info.get('real_finish_date').strip()
-            approvecontent = info.get('item_sortname')
-            approvedept = info.get('depart')[0].get('name')
-            reply_res_dict = {"A00001": "许可/同意", "A00002": "不许可/不同意"}
-            approvestatus = reply_res_dict.get(info.get('reply_res'))
-
-            area = "湖北"  # 省份
-            city = ""  # 城市
-            district = ""
-
-            data_item = NjpcListItem()  # 存储数据的管道
-            data_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
-            data_item.unique_key = ('href', publish_time, 'projectname')
-            data_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
-            data_item.projectname = projectname  # 项目名称
-            data_item.publishtime = publish_time  # 发布时间
-            data_item.approvecode = approvecode
-            data_item.approvestatus = approvestatus
-            data_item.approvedept = approvedept
-            data_item.approvecontent = approvecontent
-
-            data_item.site = self.site
-            data_item.area = area or "全国"  # 城市默认:全国
-            data_item.city = city  # 城市 默认为空
-            data_item.district = district  # 城市 默认为空
-            data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/approved_pass/customShow"
-            data_item.href = detail_href  # 详情链接
-            ddata = {
-                "_id": info.get('_id'),
-                "screenKey": "SCR_l47zuqhopb",
-                "switchLoginRequired": "off"
-            }
-            ddata = json.dumps(ddata)
-
-            data_item.request_params = {"data": ddata,
-                                        "method": "POST"}
-            data_item.parser = "detail_get"
-
-            yield data_item
-
-        request = self.infinite_pages(request, response)
-        yield request
-
     def download_midware(self, request):
         page = request.page
-        data = {
+        data = json.dumps({
             "countLimit": 1000,
             "screenKey": "SCR_l47zuqhopb",
             "advSearch": {
@@ -131,8 +66,7 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
             },
             "indexHints": [],
             "switchLoginRequired": "off"
-        }
-        data = json.dumps(data)
+        })
         request.data = data
         request.headers = {
             "Accept": "application/json, text/plain, */*",
@@ -147,6 +81,52 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
             "extoken": f"{token()}",
         }
 
+    def parse(self, request, response):
+        menu = request.item
+        info_list = response.json.get('data').get('list')
+        for info in info_list:
+            approvecode = info.get('project_code') or info.get('_id')
+            projectname = info.get('project_name').strip()
+            publish_time = info.get('real_finish_date').strip()
+            detail_href = "/".join(["https://tzxm.hubei.gov.cn/xxgk", publish_time, approvecode])
+            approvecontent = info.get('item_sortname')
+            approvedept = info.get('depart')[0].get('name')
+            reply_res_dict = {"A00001": "许可/同意", "A00002": "不许可/不同意"}
+            approvestatus = reply_res_dict.get(info.get('reply_res'))
+
+            area = "湖北"  # 省份
+            city = ""  # 城市
+            district = ""
+
+            data_item = NjpcListItem()  # 存储数据的管道
+            data_item.channel = menu.get("channel")  # 最上方定义的抓取栏目 (编辑器定的)
+            data_item.unique_key = ('href', publish_time, 'projectname')
+            data_item.spidercode = menu.get("code")  # 最上方定义的爬虫code(编辑器定的)
+            data_item.projectname = projectname  # 项目名称
+            data_item.publishtime = publish_time  # 发布时间
+            data_item.approvecode = approvecode
+            data_item.approvestatus = approvestatus
+            data_item.approvedept = approvedept
+            data_item.approvecontent = approvecontent
+
+            data_item.site = self.site
+            data_item.area = area or "全国"  # 城市默认:全国
+            data_item.city = city  # 城市 默认为空
+            data_item.district = district  # 城市 默认为空
+            data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/approved_pass/customShow"
+            data_item.href = detail_href  # 详情链接
+            json_str = json.dumps({
+                "_id": info.get('_id'),
+                "screenKey": "SCR_l47zuqhopb",
+                "switchLoginRequired": "off"
+            })
+            data_item.request_params = {"data": json_str, "method": "POST"}
+            data_item.parser = "detail_get"
+            yield data_item
+
+        request = self.infinite_pages(request, response)
+        yield request
+
 
 if __name__ == "__main__":
-    NjpcSpider(redis_key="lzz:hbzwfww_pfgg").start()
+    Spider(redis_key="lzz:hbzwfww_pfgg").start()

+ 7 - 21
hb_hbzwfww_pfgg_njpc/湖北政务服务网-批复公告-详情页.py

@@ -6,28 +6,13 @@ Created on 2025-02-15
 ---------
 @author: lzz
 """
-import execjs
 import feapder
 from items.njpc_item import DataNjpcItem
 
+from utils import token
 
-def token():
-    ex_js = '''
-    function token() {
-        var Ie = 911
-            , Pe = 20170706
-            , Fe = 1e9;
-        extoken = (n = Math.floor(10 * Math.random()) * Fe,
-            (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
 
-        return extoken
-    }
-    '''
-    ctx = execjs.compile(ex_js)
-    return ctx.call('token')
-
-
-class Details(feapder.PlanToBuildDetailSpider):
+class Spider(feapder.PlanToBuildDetailSpider):
 
     def start_requests(self):
         data_list = self.get_tasks_by_rabbitmq(limit=100)
@@ -35,11 +20,12 @@ class Details(feapder.PlanToBuildDetailSpider):
             # log.debug(item)
             request_params = item.get("request_params")
             timeout = request_params.pop('timeout', 10)
-            yield feapder.Request(url=item.get("parser_url"),
+            url = item["parser_url"]
+            yield feapder.Request(url,
+                                  timeout=timeout,
+                                  callback=item.get("parser"),
                                   item=item,
                                   deal_detail=item.get("deal_detail"),
-                                  callback=item.get("parser"),
-                                  timeout=timeout,
                                   **request_params)
 
     def download_midware(self, request):
@@ -115,4 +101,4 @@ class Details(feapder.PlanToBuildDetailSpider):
 
 
 if __name__ == '__main__':
-    Details(redis_key="lzz:hbzwfww_pfgg").start()
+    Spider(redis_key="lzz:hbzwfww_pfgg").start()