|
@@ -6,30 +6,16 @@ Created on 2025-02-15
|
|
---------
|
|
---------
|
|
@author: lzz
|
|
@author: lzz
|
|
"""
|
|
"""
|
|
-import feapder
|
|
|
|
-from items.njpc_item import NjpcListItem
|
|
|
|
-from collections import namedtuple
|
|
|
|
import json
|
|
import json
|
|
-import execjs
|
|
|
|
-
|
|
|
|
|
|
+from collections import namedtuple
|
|
|
|
|
|
-def token():
|
|
|
|
- ex_js = '''
|
|
|
|
- function token() {
|
|
|
|
- var Ie = 911
|
|
|
|
- , Pe = 20170706
|
|
|
|
- , Fe = 1e9;
|
|
|
|
- extoken = (n = Math.floor(10 * Math.random()) * Fe,
|
|
|
|
- (((new Date).getTime() % Fe + n) * Ie + Pe).toString(36))
|
|
|
|
|
|
+import feapder
|
|
|
|
+from items.njpc_item import NjpcListItem
|
|
|
|
|
|
- return extoken
|
|
|
|
- }
|
|
|
|
- '''
|
|
|
|
- ctx = execjs.compile(ex_js)
|
|
|
|
- return ctx.call('token')
|
|
|
|
|
|
+from utils import token
|
|
|
|
|
|
|
|
|
|
-class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
|
|
|
+class Spider(feapder.PlanToBuildListSpider):
|
|
|
|
|
|
def start_callback(self):
|
|
def start_callback(self):
|
|
Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
|
|
Menu = namedtuple('Menu', ['channel', 'code', 'crawl_page'])
|
|
@@ -100,9 +86,10 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
info_list = response.json.get('data').get('list')
|
|
info_list = response.json.get('data').get('list')
|
|
for info in info_list:
|
|
for info in info_list:
|
|
approvecode = info.get('pro_code') or info.get('_id')
|
|
approvecode = info.get('pro_code') or info.get('_id')
|
|
- detail_href = "https://tzxm.hubei.gov.cn/xxgk?" + approvecode
|
|
|
|
projectname = info.get('pro_name').strip()
|
|
projectname = info.get('pro_name').strip()
|
|
publish_time = info.get('start_date').strip()
|
|
publish_time = info.get('start_date').strip()
|
|
|
|
+ url = "/".join(["https://tzxm.hubei.gov.cn/xxgk", publish_time, approvecode])
|
|
|
|
+ detail_href = f"{url}?q={menu['channel']}"
|
|
|
|
|
|
area = "湖北" # 省份
|
|
area = "湖北" # 省份
|
|
city = "" # 城市
|
|
city = "" # 城市
|
|
@@ -110,7 +97,7 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
|
|
|
data_item = NjpcListItem() # 存储数据的管道
|
|
data_item = NjpcListItem() # 存储数据的管道
|
|
data_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
|
|
data_item.channel = menu.get("channel") # 最上方定义的抓取栏目 (编辑器定的)
|
|
- data_item.unique_key = ('href', publish_time,'projectname')
|
|
|
|
|
|
+ data_item.unique_key = ('href', publish_time, 'projectname')
|
|
data_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
|
|
data_item.spidercode = menu.get("code") # 最上方定义的爬虫code(编辑器定的)
|
|
data_item.projectname = projectname # 项目名称
|
|
data_item.projectname = projectname # 项目名称
|
|
data_item.publishtime = publish_time # 发布时间
|
|
data_item.publishtime = publish_time # 发布时间
|
|
@@ -122,14 +109,13 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
data_item.district = district # 城市 默认为空
|
|
data_item.district = district # 城市 默认为空
|
|
data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/HP_GS_info/customShow"
|
|
data_item.parser_url = "https://tzxm.hubei.gov.cn:7216/api/proxy/custom/hb/hb_aiapp/xMod/HP_GS_info/customShow"
|
|
data_item.href = detail_href # 详情链接
|
|
data_item.href = detail_href # 详情链接
|
|
- ddata = {
|
|
|
|
|
|
+ json_str = json.dumps({
|
|
"_id": info.get('_id'),
|
|
"_id": info.get('_id'),
|
|
"screenKey": "SCR_l5oxx6va7f",
|
|
"screenKey": "SCR_l5oxx6va7f",
|
|
"switchLoginRequired": "off"
|
|
"switchLoginRequired": "off"
|
|
- }
|
|
|
|
- ddata = json.dumps(ddata)
|
|
|
|
|
|
+ })
|
|
data_item.request_params = {
|
|
data_item.request_params = {
|
|
- "data": ddata,
|
|
|
|
|
|
+ "data": json_str,
|
|
"method": "POST"
|
|
"method": "POST"
|
|
}
|
|
}
|
|
data_item.parser = "detail_get"
|
|
data_item.parser = "detail_get"
|
|
@@ -140,4 +126,4 @@ class NjpcSpider(feapder.PlanToBuildListSpider):
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- NjpcSpider(redis_key="lzz:hbzwfww_npzgs").start()
|
|
|
|
|
|
+ Spider(redis_key="lzz:hbzwfww_npzgs").start()
|