123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- import random
- import time
- import traceback
- from utils.databases import mongo_table, int2long
- from utils.execptions import JyBasicException
- from utils.log import logger
- class Scheduler:
- def __init__(self, site, crawl_type, **kwargs):
- self.site = site
- self.crawl_type = crawl_type
- self.crawl_start = False
- self.count = None # 日采集数量
- self.total = None # 日采集上限
- self.account_id = None
- self.record_id = None
- self.user = None
- self.spider_code = None
- self.crawl_url = None
- self.crawl_params = None
- self.crawl_exception = None
- self.kwargs = kwargs
- self.crawl_error_tab = mongo_table('py_spider', 'crawl_error')
- def finished(self, execute_next_time=None):
- # logger.info("任务结束")
- self.sleep(execute_next_time)
- def err_record(self, e: JyBasicException):
- print(e)
- rows = {
- 'spidercode': self.spider_code,
- 'url': self.crawl_url,
- 'status_code': 10500,
- 'reason': e,
- 'params': '未知系统错误',
- 'crawl_time': int2long(int(time.time())),
- 'crawl_type': self.crawl_type,
- }
- self.crawl_error_tab.insert_one(rows)
- def __enter__(self):
- logger.info(f'[任务开始]')
- self.crawl_start = True
- return self
- @staticmethod
- def wait_for_next_task(wait_time=None):
- _sleep = (wait_time or random.choice(range(5, 15)))
- time.sleep(_sleep)
- @staticmethod
- def sleep(wait_time=None):
- sleep_time = (wait_time or 600)
- time.sleep(sleep_time)
- def __exit__(self, exc_type, exc_val, exc_tb):
- logger.info(f'[任务结束]')
- self.crawl_start = False
- if exc_type is not None:
- errmsg = traceback.extract_tb(exc_tb)
- e = JyBasicException(
- code=10500,
- reason=str(exc_type),
- title='未知系统错误'
- )
- self.err_record(e)
- logger.error(f'错误类型: {exc_type}, 错误内容: {exc_val}, 错误详情: {errmsg}')
- return True
|