create_spider.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2018-08-28 17:38:43
  4. ---------
  5. @summary: 创建spider
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import getpass
  11. import os
  12. import re
  13. import feapder.utils.tools as tools
  14. from .create_init import CreateInit
  15. def deal_file_info(file, author):
  16. file = file.replace("{DATE}", tools.get_current_date())
  17. # file = file.replace("{USER}", getpass.getuser())
  18. file = file.replace("{USER}", author)
  19. return file
  20. class CreateSpider:
  21. def __init__(self):
  22. self._create_init = CreateInit()
  23. def cover_to_underline(self, key):
  24. regex = "[A-Z]*"
  25. capitals = re.findall(regex, key)
  26. if capitals:
  27. for pos, capital in enumerate(capitals):
  28. if not capital:
  29. continue
  30. if pos == 0:
  31. if len(capital) > 1:
  32. key = key.replace(capital, capital.lower() + "_", 1)
  33. else:
  34. key = key.replace(capital, capital.lower(), 1)
  35. else:
  36. if len(capital) > 1:
  37. key = key.replace(capital, "_" + capital.lower() + "_", 1)
  38. else:
  39. key = key.replace(capital, "_" + capital.lower(), 1)
  40. return key
  41. def get_spider_template(self, spider_type):
  42. if spider_type == 1:
  43. template_path = "air_spider_template.tmpl"
  44. elif spider_type == 2:
  45. template_path = "spider_template.tmpl"
  46. elif spider_type == 3:
  47. template_path = "batch_spider_template.tmpl"
  48. elif spider_type == 4:
  49. template_path = "spider_list_template.tmpl"
  50. elif spider_type == 5:
  51. template_path = "detail_template.tmpl"
  52. elif spider_type == 6:
  53. template_path = "njpc_list_template.tmpl"
  54. elif spider_type == 7:
  55. template_path = "njpc_detail_template.tmpl"
  56. else:
  57. raise ValueError("spider type error, support 1 2 3 4 5 6 7")
  58. template_path = os.path.abspath(
  59. os.path.join(__file__, "../../../templates", template_path)
  60. )
  61. with open(template_path, "r", encoding="utf-8") as file:
  62. spider_template = file.read()
  63. return spider_template
  64. def create_spider(self, spider_template, spider_name, author):
  65. spider_template = spider_template.replace("${spider_name}", spider_name)
  66. spider_template = deal_file_info(spider_template, author)
  67. return spider_template
  68. def save_spider_to_file(self, spider, spider_name):
  69. spider_underline = self.cover_to_underline(spider_name)
  70. spider_file = spider_underline + ".py"
  71. if os.path.exists(spider_file):
  72. confirm = input("%s 文件已存在 是否覆盖 (y/n). " % spider_file)
  73. if confirm != "y":
  74. print("取消覆盖 退出")
  75. return
  76. with open(spider_file, "w", encoding="utf-8") as file:
  77. file.write(spider)
  78. print("\n%s 生成成功" % spider_name)
  79. self._create_init.create()
  80. def create(self, spider_name, spider_type, author):
  81. # 检查spider_name
  82. if not re.search("^[a-zA-Z][a-zA-Z0-9_]*$", spider_name):
  83. raise Exception("爬虫名不符合命名规范,请用下划线命名或驼峰命名方式")
  84. if spider_name.islower():
  85. spider_name = tools.key2hump(spider_name)
  86. spider_template = self.get_spider_template(spider_type)
  87. spider = self.create_spider(spider_template, spider_name, author)
  88. self.save_spider_to_file(spider, spider_name)