湖北政务服务网-备案查询-详情页.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2024-03-15
  4. ---------
  5. @summary: 湖北政务服务网-备案查询
  6. ---------
  7. @author: lzz
  8. """
  9. import feapder
  10. from items.njpc_item import DataNjpcItem
  11. from hbzwfww_area_dict import area_dict
  12. from utils import token as get_token
  13. class Spider(feapder.PlanToBuildDetailSpider):
  14. def start_requests(self):
  15. data_list = self.get_tasks_by_rabbitmq(limit=50)
  16. for item in data_list:
  17. # log.debug(item)
  18. request_params = item.get("request_params")
  19. timeout = request_params.pop('timeout', 10)
  20. yield feapder.Request(url=item.get("parser_url"),
  21. item=item,
  22. deal_detail=item.get("deal_detail"),
  23. callback=item.get("parser"),
  24. timeout=timeout,
  25. **request_params)
  26. def download_midware(self, request):
  27. token = get_token()
  28. request.headers = {
  29. "Accept": "application/json, text/plain, */*",
  30. "Accept-Language": "zh-CN,zh;q=0.9",
  31. "Connection": "keep-alive",
  32. "Content-Type": "application/json",
  33. "Origin": "http://zwfw.hubei.gov.cn",
  34. "Referer": "http://zwfw.hubei.gov.cn/",
  35. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
  36. "authtoken": "null",
  37. "compkey": "hb",
  38. "extoken": f"{token}",
  39. }
  40. def detail_get(self, request, response):
  41. items = request.item
  42. data_item = DataNjpcItem(**items)
  43. detail_info = response.json.get('data')
  44. baz_dict = {"BA01": "尚未审核", "BA02": "有效", "BA03": "已注销", "BA04": "无此信息"}
  45. ba_examine_status = baz_dict.get(detail_info.get('hb_Record_card'))
  46. unit_nature_dict = {"A00006": "集体企业", "A00001": "政府机关", "A00007": "私营企业", "A00012": "其他",
  47. "A00004": "国有及国有控股企业", "A00011": "村民委员会", "A00005": "股份制企业",
  48. "A00008": "港澳台及外资企业"}
  49. unit_nature = unit_nature_dict.get(detail_info.get('unit_nature')) or "暂无内容"
  50. ba_apply_status_dict = {"A00001": "备案注销", "A00002": "变更备案证", "A00003": "生成备案证", }
  51. ba_apply_status = ba_apply_status_dict.get(detail_info.get('ba_apply_status')) or "暂无内容"
  52. construct_nature_dict = {"4": "技改及其他", "0": "新建", "1": "扩建", "2": "迁建", "3": "改建", }
  53. construct_nature = construct_nature_dict.get(detail_info.get('construct_nature')) or "暂无内容"
  54. project_address = ''
  55. if detail_info.get('project_address'):
  56. area_code = detail_info.get('project_address')[0]
  57. darea = ""
  58. dcity = ""
  59. ddistrict = ""
  60. for tf in area_dict:
  61. if area_code and len(area_code) > 0:
  62. if area_code[0] == tf.get('_keye'):
  63. darea = tf.get('name')
  64. if len(area_code) > 1:
  65. for ts in tf.get('children'):
  66. if area_code[1] == ts.get('_keye'):
  67. dcity = ts.get('name')
  68. if len(area_code) > 2:
  69. for tt in ts.get('children'):
  70. if area_code[2] == tt.get('_keye'):
  71. ddistrict = tt.get('name')
  72. project_address = f"{darea}/{dcity}/{ddistrict}"
  73. html = f'''
  74. <form>
  75. <div>
  76. <div>
  77. <div><span>项目名称</span></div>
  78. <div><span>{detail_info.get('project_name')}</span></div>
  79. </div>
  80. <div>
  81. <div><span>项目代码</span></div>
  82. <div><span>{detail_info.get('project_code')}</span></div>
  83. </div>
  84. <div>
  85. <div><span>建设内容及规</span></div>
  86. <div><span>{detail_info.get('description')}</span></div>
  87. </div>
  88. <div>
  89. <div><span>项目单位</span></div>
  90. <div><span>{detail_info.get('unit_name')}</span></div>
  91. </div>
  92. <div>
  93. <div><span>法人代表姓名</span></div>
  94. <div><span>{detail_info.get('legal_name')}</span></div>
  95. </div>
  96. <div>
  97. <div><span>项目单位性质</span></div>
  98. <div><span>{unit_nature}</span></div>
  99. </div>
  100. <div>
  101. <div><span>所属行政区划</span></div>
  102. <div><span>{project_address}</span></div>
  103. </div>
  104. <div>
  105. <div><span>项目总投资</span></div>
  106. <div><span>{detail_info.get('amount')}</span></div>
  107. </div>
  108. <div>
  109. <div><span>建设性质</span></div>
  110. <div><span>{construct_nature}</span></div>
  111. </div>
  112. <div>
  113. <div><span>拟开工时间</span></div>
  114. <div><span>{detail_info.get('nstart_date')}</span></div>
  115. </div>
  116. <div>
  117. <div><span>备案审核状态</span></div>
  118. <div><span>{ba_examine_status}</span></div>
  119. </div>
  120. <div>
  121. <div><span>备案证状态</span></div>
  122. <div><span>{ba_apply_status}</span></div>
  123. </div>
  124. <div>
  125. <div><span>申报日期</span></div>
  126. <div><span>{detail_info.get('declaration_date')}</span></div>
  127. </div>
  128. </div>
  129. </form>
  130. '''
  131. data_item.contenthtml = html.replace('None', '暂无内容').replace('//', '')
  132. total_investment = detail_info.get('amount') or ''
  133. if total_investment:
  134. data_item.total_investment = str(total_investment) + "万元"
  135. yield data_item
  136. if __name__ == '__main__':
  137. Spider(redis_key="lzz:hbzwfww_bacx").start()