create_spider.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2018-08-28 17:38:43
  4. ---------
  5. @summary: 创建spider
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import getpass
  11. import os
  12. import re
  13. import feapder.utils.tools as tools
  14. from .create_init import CreateInit
  15. def deal_file_info(file):
  16. file = file.replace("{DATE}", tools.get_current_date())
  17. # file = file.replace("{USER}", getpass.getuser())
  18. file = file.replace("{USER}", os.path.basename(os.getcwd()))
  19. return file
  20. class CreateSpider:
  21. def __init__(self):
  22. self._create_init = CreateInit()
  23. def cover_to_underline(self, key):
  24. regex = "[A-Z]*"
  25. capitals = re.findall(regex, key)
  26. if capitals:
  27. for pos, capital in enumerate(capitals):
  28. if not capital:
  29. continue
  30. if pos == 0:
  31. if len(capital) > 1:
  32. key = key.replace(capital, capital.lower() + "_", 1)
  33. else:
  34. key = key.replace(capital, capital.lower(), 1)
  35. else:
  36. if len(capital) > 1:
  37. key = key.replace(capital, "_" + capital.lower() + "_", 1)
  38. else:
  39. key = key.replace(capital, "_" + capital.lower(), 1)
  40. return key
  41. def get_spider_template(self, spider_type):
  42. if spider_type == 1:
  43. template_path = "air_spider_template.tmpl"
  44. elif spider_type == 2:
  45. template_path = "spider_template.tmpl"
  46. elif spider_type == 3:
  47. template_path = "batch_spider_template.tmpl"
  48. elif spider_type == 4:
  49. template_path = "spider_list_template.tmpl"
  50. else:
  51. raise ValueError("spider type error, support 1 2 3")
  52. template_path = os.path.abspath(
  53. os.path.join(__file__, "../../../templates", template_path)
  54. )
  55. with open(template_path, "r", encoding="utf-8") as file:
  56. spider_template = file.read()
  57. return spider_template
  58. def create_spider(self, spider_template, spider_name):
  59. spider_template = spider_template.replace("${spider_name}", spider_name)
  60. spider_template = deal_file_info(spider_template)
  61. return spider_template
  62. def save_spider_to_file(self, spider, spider_name):
  63. spider_underline = self.cover_to_underline(spider_name)
  64. spider_file = spider_underline + ".py"
  65. if os.path.exists(spider_file):
  66. confirm = input("%s 文件已存在 是否覆盖 (y/n). " % spider_file)
  67. if confirm != "y":
  68. print("取消覆盖 退出")
  69. return
  70. with open(spider_file, "w", encoding="utf-8") as file:
  71. file.write(spider)
  72. print("\n%s 生成成功" % spider_name)
  73. self._create_init.create()
  74. def create(self, spider_name, spider_type):
  75. # 检查spider_name
  76. if not re.search("^[a-zA-Z][a-zA-Z0-9_]*$", spider_name):
  77. raise Exception("爬虫名不符合命名规范,请用下划线命名或驼峰命名方式")
  78. if spider_name.islower():
  79. spider_name = tools.key2hump(spider_name)
  80. spider_template = self.get_spider_template(spider_type)
  81. spider = self.create_spider(spider_template, spider_name)
  82. self.save_spider_to_file(spider, spider_name)