import random import time import traceback from utils.databases import mongo_table, int2long from utils.execptions import JyBasicException from utils.log import logger class Scheduler: def __init__(self, site, crawl_type, **kwargs): self.site = site self.crawl_type = crawl_type self.crawl_start = False self.count = None # 日采集数量 self.total = None # 日采集上限 self.account_id = None self.record_id = None self.user = None self.spider_code = None self.crawl_url = None self.crawl_params = None self.crawl_exception = None self.kwargs = kwargs self.crawl_error_tab = mongo_table('py_spider', 'crawl_error') def finished(self, execute_next_time=None): # logger.info("任务结束") self.sleep(execute_next_time) def err_record(self, e: JyBasicException): print(e) rows = { 'spidercode': self.spider_code, 'url': self.crawl_url, 'status_code': 10500, 'reason': e, 'params': '未知系统错误', 'crawl_time': int2long(int(time.time())), 'crawl_type': self.crawl_type, } self.crawl_error_tab.insert_one(rows) def __enter__(self): logger.info(f'[任务开始]') self.crawl_start = True return self @staticmethod def wait_for_next_task(wait_time=None): _sleep = (wait_time or random.choice(range(5, 15))) time.sleep(_sleep) @staticmethod def sleep(wait_time=None): sleep_time = (wait_time or 600) time.sleep(sleep_time) def __exit__(self, exc_type, exc_val, exc_tb): logger.info(f'[任务结束]') self.crawl_start = False if exc_type is not None: errmsg = traceback.extract_tb(exc_tb) e = JyBasicException( code=10500, reason=str(exc_type), title='未知系统错误' ) self.err_record(e) logger.error(f'错误类型: {exc_type}, 错误内容: {exc_val}, 错误详情: {errmsg}') return True