create_spider.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2018-08-28 17:38:43
  4. ---------
  5. @summary: 创建spider
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import getpass
  11. import os
  12. import re
  13. import feapder.utils.tools as tools
  14. from .create_init import CreateInit
  15. def deal_file_info(file):
  16. file = file.replace("{DATE}", tools.get_current_date())
  17. file = file.replace("{USER}", getpass.getuser())
  18. return file
  19. class CreateSpider:
  20. def __init__(self):
  21. self._create_init = CreateInit()
  22. def cover_to_underline(self, key):
  23. regex = "[A-Z]*"
  24. capitals = re.findall(regex, key)
  25. if capitals:
  26. for pos, capital in enumerate(capitals):
  27. if not capital:
  28. continue
  29. if pos == 0:
  30. if len(capital) > 1:
  31. key = key.replace(capital, capital.lower() + "_", 1)
  32. else:
  33. key = key.replace(capital, capital.lower(), 1)
  34. else:
  35. if len(capital) > 1:
  36. key = key.replace(capital, "_" + capital.lower() + "_", 1)
  37. else:
  38. key = key.replace(capital, "_" + capital.lower(), 1)
  39. return key
  40. def get_spider_template(self, spider_type):
  41. if spider_type == 1:
  42. template_path = "air_spider_template.tmpl"
  43. elif spider_type == 2:
  44. template_path = "spider_template.tmpl"
  45. elif spider_type == 3:
  46. template_path = "batch_spider_template.tmpl"
  47. elif spider_type == 4:
  48. template_path = "spider_list_template.tmpl"
  49. else:
  50. raise ValueError("spider type error, support 1 2 3")
  51. template_path = os.path.abspath(
  52. os.path.join(__file__, "../../../templates", template_path)
  53. )
  54. with open(template_path, "r", encoding="utf-8") as file:
  55. spider_template = file.read()
  56. return spider_template
  57. def create_spider(self, spider_template, spider_name):
  58. spider_template = spider_template.replace("${spider_name}", spider_name)
  59. spider_template = deal_file_info(spider_template)
  60. return spider_template
  61. def save_spider_to_file(self, spider, spider_name):
  62. spider_underline = self.cover_to_underline(spider_name)
  63. spider_file = spider_underline + ".py"
  64. if os.path.exists(spider_file):
  65. confirm = input("%s 文件已存在 是否覆盖 (y/n). " % spider_file)
  66. if confirm != "y":
  67. print("取消覆盖 退出")
  68. return
  69. with open(spider_file, "w", encoding="utf-8") as file:
  70. file.write(spider)
  71. print("\n%s 生成成功" % spider_name)
  72. self._create_init.create()
  73. def create(self, spider_name, spider_type):
  74. # 检查spider_name
  75. if not re.search("^[a-zA-Z][a-zA-Z0-9_]*$", spider_name):
  76. raise Exception("爬虫名不符合命名规范,请用下划线命名或驼峰命名方式")
  77. if spider_name.islower():
  78. spider_name = tools.key2hump(spider_name)
  79. spider_template = self.get_spider_template(spider_type)
  80. spider = self.create_spider(spider_template, spider_name)
  81. self.save_spider_to_file(spider, spider_name)