crawl_scheduler.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import random
  2. import time
  3. import traceback
  4. from utils.databases import mongo_table, int2long
  5. from utils.execptions import JyBasicException
  6. from utils.log import logger
  7. class Scheduler:
  8. def __init__(self, site, crawl_type, **kwargs):
  9. self.site = site
  10. self.crawl_type = crawl_type
  11. self.crawl_start = False
  12. self.count = None # 日采集数量
  13. self.total = None # 日采集上限
  14. self.account_id = None
  15. self.record_id = None
  16. self.user = None
  17. self.spider_code = None
  18. self.crawl_url = None
  19. self.crawl_params = None
  20. self.crawl_exception = None
  21. self.kwargs = kwargs
  22. self.crawl_error_tab = mongo_table('py_spider', 'crawl_error')
  23. def finished(self, execute_next_time=None):
  24. # logger.info("任务结束")
  25. self.sleep(execute_next_time)
  26. def err_record(self, e: JyBasicException):
  27. print(e)
  28. rows = {
  29. 'spidercode': self.spider_code,
  30. 'url': self.crawl_url,
  31. 'status_code': 10500,
  32. 'reason': e,
  33. 'params': '未知系统错误',
  34. 'crawl_time': int2long(int(time.time())),
  35. 'crawl_type': self.crawl_type,
  36. }
  37. self.crawl_error_tab.insert_one(rows)
  38. def __enter__(self):
  39. logger.info(f'[任务开始]')
  40. self.crawl_start = True
  41. return self
  42. @staticmethod
  43. def wait_for_next_task(wait_time=None):
  44. _sleep = (wait_time or random.choice(range(5, 15)))
  45. time.sleep(_sleep)
  46. @staticmethod
  47. def sleep(wait_time=None):
  48. sleep_time = (wait_time or 600)
  49. time.sleep(sleep_time)
  50. def __exit__(self, exc_type, exc_val, exc_tb):
  51. logger.info(f'[任务结束]')
  52. self.crawl_start = False
  53. if exc_type is not None:
  54. errmsg = traceback.extract_tb(exc_tb)
  55. e = JyBasicException(
  56. code=10500,
  57. reason=str(exc_type),
  58. title='未知系统错误'
  59. )
  60. self.err_record(e)
  61. logger.error(f'错误类型: {exc_type}, 错误内容: {exc_val}, 错误详情: {errmsg}')
  62. return True