123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- # -*- coding: utf-8 -*-
- """
- Created on 2018-08-28 17:38:43
- ---------
- @summary: 创建spider
- ---------
- @author: Boris
- @email: boris_liu@foxmail.com
- """
- import getpass
- import os
- import re
- import feapder.utils.tools as tools
- from .create_init import CreateInit
- def deal_file_info(file, author):
- file = file.replace("{DATE}", tools.get_current_date())
- # file = file.replace("{USER}", getpass.getuser())
- file = file.replace("{USER}", author)
- return file
- class CreateSpider:
- def __init__(self):
- self._create_init = CreateInit()
- def cover_to_underline(self, key):
- regex = "[A-Z]*"
- capitals = re.findall(regex, key)
- if capitals:
- for pos, capital in enumerate(capitals):
- if not capital:
- continue
- if pos == 0:
- if len(capital) > 1:
- key = key.replace(capital, capital.lower() + "_", 1)
- else:
- key = key.replace(capital, capital.lower(), 1)
- else:
- if len(capital) > 1:
- key = key.replace(capital, "_" + capital.lower() + "_", 1)
- else:
- key = key.replace(capital, "_" + capital.lower(), 1)
- return key
- def get_spider_template(self, spider_type):
- if spider_type == 1:
- template_path = "air_spider_template.tmpl"
- elif spider_type == 2:
- template_path = "spider_template.tmpl"
- elif spider_type == 3:
- template_path = "batch_spider_template.tmpl"
- elif spider_type == 4:
- template_path = "spider_list_template.tmpl"
- elif spider_type == 5:
- template_path = "detail_template.tmpl"
- elif spider_type == 6:
- template_path = "njpc_list_template.tmpl"
- elif spider_type == 7:
- template_path = "njpc_detail_template.tmpl"
- else:
- raise ValueError("spider type error, support 1 2 3 4 5 6 7")
- template_path = os.path.abspath(
- os.path.join(__file__, "../../../templates", template_path)
- )
- with open(template_path, "r", encoding="utf-8") as file:
- spider_template = file.read()
- return spider_template
- def create_spider(self, spider_template, spider_name, author):
- spider_template = spider_template.replace("${spider_name}", spider_name)
- spider_template = deal_file_info(spider_template, author)
- return spider_template
- def save_spider_to_file(self, spider, spider_name):
- spider_underline = self.cover_to_underline(spider_name)
- spider_file = spider_underline + ".py"
- if os.path.exists(spider_file):
- confirm = input("%s 文件已存在 是否覆盖 (y/n). " % spider_file)
- if confirm != "y":
- print("取消覆盖 退出")
- return
- with open(spider_file, "w", encoding="utf-8") as file:
- file.write(spider)
- print("\n%s 生成成功" % spider_name)
- self._create_init.create()
- def create(self, spider_name, spider_type, author):
- # 检查spider_name
- if not re.search("^[a-zA-Z][a-zA-Z0-9_]*$", spider_name):
- raise Exception("爬虫名不符合命名规范,请用下划线命名或驼峰命名方式")
- if spider_name.islower():
- spider_name = tools.key2hump(spider_name)
- spider_template = self.get_spider_template(spider_type)
- spider = self.create_spider(spider_template, spider_name, author)
- self.save_spider_to_file(spider, spider_name)
|