|
@@ -9,28 +9,13 @@ Created on 2025-02-15
|
|
|
import json
|
|
|
from collections import namedtuple
|
|
|
|
|
|
-import execjs
|
|
|
import feapder
|
|
|
from items.njpc_item import NjpcListItem
|
|
|
|
|
|
+from utils import token
|
|
|
|
|
|
-def token():
|
|
|
- ex_js = '''
|
|
|
- function token() {
|
|
|
- var Ie = 911
|
|
|
- , Pe = 20170706
|
|
|
- , Fe = 1e9;
|
|
|
- extoken = (n = Math.floor(10 * Math.random()) * Fe,
|
|
|
- (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
|
|
|
|
|
|
- return extoken
|
|
|
- }
|
|
|
- '''
|
|
|
- ctx = execjs.compile(ex_js)
|
|
|
- return ctx.call('token')
|
|
|
-
|
|
|
-
|
|
|
-class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
+class Spider(feapder.PlanToBuildListSpider):
|
|
|
|
|
|
def start_callback(self):
|
|
|
Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
|
|
@@ -44,59 +29,9 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
for menu in self.menus:
|
|
|
yield feapder.Request(url=url, item=menu._asdict(), page=1)
|
|
|
|
|
|
- def parse(self, request, response):
|
|
|
- menu = request.item
|
|
|
- info_list = response.json.get('data').get('list')
|
|
|
- for info in info_list:
|
|
|
- approvecode = info.get('project_code') or info.get('_id')
|
|
|
- detail_href = "https://tzxm.hubei.gov.cn/xxgk" + approvecode
|
|
|
- projectname = info.get('project_name').strip()
|
|
|
- publish_time = info.get('real_finish_date').strip()
|
|
|
- approvecontent = info.get('item_sortname')
|
|
|
- approvedept = info.get('depart')[0].get('name')
|
|
|
- reply_res_dict = {"A00001": "许可/同意", "A00002": "不许可/不同意"}
|
|
|
- approvestatus = reply_res_dict.get(info.get('reply_res'))
|
|
|
-
|
|
|
- area = "湖北" # 省份
|
|
|
- city = "" # 城市
|
|
|
- district = ""
|
|
|
-
|
|
|
- data_item = NjpcListItem() # 存储数据的管道
|
|
|
- data_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
|
|
|
- data_item.unique_key = ('href', publish_time, 'projectname')
|
|
|
- data_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
|
|
|
- data_item.projectname = projectname # 项目名称
|
|
|
- data_item.publishtime = publish_time # 发布时间
|
|
|
- data_item.approvecode = approvecode
|
|
|
- data_item.approvestatus = approvestatus
|
|
|
- data_item.approvedept = approvedept
|
|
|
- data_item.approvecontent = approvecontent
|
|
|
-
|
|
|
- data_item.site = self.site
|
|
|
- data_item.area = area or "全国" # 城市默认:全国
|
|
|
- data_item.city = city # 城市 默认为空
|
|
|
- data_item.district = district # 城市 默认为空
|
|
|
- data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/approved_pass/customShow"
|
|
|
- data_item.href = detail_href # 详情链接
|
|
|
- ddata = {
|
|
|
- "_id": info.get('_id'),
|
|
|
- "screenKey": "SCR_l47zuqhopb",
|
|
|
- "switchLoginRequired": "off"
|
|
|
- }
|
|
|
- ddata = json.dumps(ddata)
|
|
|
-
|
|
|
- data_item.request_params = {"data": ddata,
|
|
|
- "method": "POST"}
|
|
|
- data_item.parser = "detail_get"
|
|
|
-
|
|
|
- yield data_item
|
|
|
-
|
|
|
- request = self.infinite_pages(request, response)
|
|
|
- yield request
|
|
|
-
|
|
|
def download_midware(self, request):
|
|
|
page = request.page
|
|
|
- data = {
|
|
|
+ data = json.dumps({
|
|
|
"countLimit": 1000,
|
|
|
"screenKey": "SCR_l47zuqhopb",
|
|
|
"advSearch": {
|
|
@@ -131,8 +66,7 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
},
|
|
|
"indexHints": [],
|
|
|
"switchLoginRequired": "off"
|
|
|
- }
|
|
|
- data = json.dumps(data)
|
|
|
+ })
|
|
|
request.data = data
|
|
|
request.headers = {
|
|
|
"Accept": "application/json, text/plain, */*",
|
|
@@ -147,6 +81,52 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
"extoken": f"{token()}",
|
|
|
}
|
|
|
|
|
|
+ def parse(self, request, response):
|
|
|
+ menu = request.item
|
|
|
+ info_list = response.json.get('data').get('list')
|
|
|
+ for info in info_list:
|
|
|
+ approvecode = info.get('project_code') or info.get('_id')
|
|
|
+ projectname = info.get('project_name').strip()
|
|
|
+ publish_time = info.get('real_finish_date').strip()
|
|
|
+ detail_href = "/".join(["https://tzxm.hubei.gov.cn/xxgk", publish_time, approvecode])
|
|
|
+ approvecontent = info.get('item_sortname')
|
|
|
+ approvedept = info.get('depart')[0].get('name')
|
|
|
+ reply_res_dict = {"A00001": "许可/同意", "A00002": "不许可/不同意"}
|
|
|
+ approvestatus = reply_res_dict.get(info.get('reply_res'))
|
|
|
+
|
|
|
+ area = "湖北" # 省份
|
|
|
+ city = "" # 城市
|
|
|
+ district = ""
|
|
|
+
|
|
|
+ data_item = NjpcListItem() # 存储数据的管道
|
|
|
+ data_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
|
|
|
+ data_item.unique_key = ('href', publish_time, 'projectname')
|
|
|
+ data_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
|
|
|
+ data_item.projectname = projectname # 项目名称
|
|
|
+ data_item.publishtime = publish_time # 发布时间
|
|
|
+ data_item.approvecode = approvecode
|
|
|
+ data_item.approvestatus = approvestatus
|
|
|
+ data_item.approvedept = approvedept
|
|
|
+ data_item.approvecontent = approvecontent
|
|
|
+
|
|
|
+ data_item.site = self.site
|
|
|
+ data_item.area = area or "全国" # 城市默认:全国
|
|
|
+ data_item.city = city # 城市 默认为空
|
|
|
+ data_item.district = district # 城市 默认为空
|
|
|
+ data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/approved_pass/customShow"
|
|
|
+ data_item.href = detail_href # 详情链接
|
|
|
+ json_str = json.dumps({
|
|
|
+ "_id": info.get('_id'),
|
|
|
+ "screenKey": "SCR_l47zuqhopb",
|
|
|
+ "switchLoginRequired": "off"
|
|
|
+ })
|
|
|
+ data_item.request_params = {"data": json_str, "method": "POST"}
|
|
|
+ data_item.parser = "detail_get"
|
|
|
+ yield data_item
|
|
|
+
|
|
|
+ request = self.infinite_pages(request, response)
|
|
|
+ yield request
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- NjpcSpider(redis_key="lzz:hbzwfww_pfgg").start()
|
|
|
+ Spider(redis_key="lzz:hbzwfww_pfgg").start()
|