3
0
Эх сурвалжийг харах

删除批次爬虫相关代码

dongzhaorui 1 жил өмнө
parent
commit
7bff274e9b

+ 0 - 105
FworkSpider/feapder/core/base_parser.py

@@ -7,12 +7,6 @@ Created on 2018-07-25 11:41:57
 @author: Boris
 @email:  boris_liu@foxmail.com
 """
-import os
-
-import feapder.utils.tools as tools
-from feapder.db.mysqldb import MysqlDB
-from feapder.network.item import UpdateItem
-from feapder.utils.log import log
 
 
 class BaseParser(object):
@@ -124,102 +118,3 @@ class BaseParser(object):
 
     def close(self):
         pass
-
-
-class BatchParser(BaseParser):
-    """
-    @summary: 批次爬虫模版
-    ---------
-    """
-
-    def __init__(
-        self, task_table, batch_record_table, task_state, date_format, mysqldb=None
-    ):
-        self._mysqldb = mysqldb or MysqlDB()  # mysqldb
-
-        self._task_table = task_table  # mysql中的任务表
-        self._batch_record_table = batch_record_table  # mysql 中的批次记录表
-        self._task_state = task_state  # mysql中任务表的state字段名
-        self._date_format = date_format  # 批次日期格式
-
-    def add_task(self):
-        """
-        @summary: 添加任务, 每次启动start_monitor 都会调用,且在init_task之前调用
-        ---------
-        ---------
-        @result:
-        """
-
-    def start_requests(self, task):
-        """
-        @summary:
-        ---------
-        @param task: 任务信息 list
-        ---------
-        @result:
-        """
-
-    def update_task_state(self, task_id, state=1, **kwargs):
-        """
-        @summary: 更新任务表中任务状态,做完每个任务时代码逻辑中要主动调用。可能会重写
-        调用方法为 yield lambda : self.update_task_state(task_id, state)
-        ---------
-        @param task_id:
-        @param state:
-        ---------
-        @result:
-        """
-
-        kwargs["id"] = task_id
-        kwargs[self._task_state] = state
-
-        sql = tools.make_update_sql(
-            self._task_table, kwargs, condition="id = {task_id}".format(task_id=task_id)
-        )
-
-        if self._mysqldb.update(sql):
-            log.debug("置任务%s状态成功" % task_id)
-        else:
-            log.error("置任务%s状态失败  sql=%s" % (task_id, sql))
-
-    def update_task_batch(self, task_id, state=1, **kwargs):
-        """
-        批量更新任务 多处调用,更新的字段必须一致
-        注意:需要 写成 yield update_task_batch(...) 否则不会更新
-        @param task_id:
-        @param state:
-        @param kwargs:
-        @return:
-        """
-        kwargs["id"] = task_id
-        kwargs[self._task_state] = state
-
-        update_item = UpdateItem(**kwargs)
-        update_item.table_name = self._task_table
-        update_item.name_underline = self._task_table + "_item"
-
-        return update_item
-
-    @property
-    def batch_date(self):
-        """
-        @summary: 获取批次时间
-        ---------
-        ---------
-        @result:
-        """
-
-        batch_date = os.environ.get("batch_date")
-        if not batch_date:
-            sql = 'select date_format(batch_date, "{date_format}") from {batch_record_table} order by id desc limit 1'.format(
-                date_format=self._date_format.replace(":%M", ":%i"),
-                batch_record_table=self._batch_record_table,
-            )
-            batch_info = MysqlDB().find(sql)  # (('2018-08-19'),)
-            if batch_info:
-                os.environ["batch_date"] = batch_date = batch_info[0][0]
-            else:
-                log.error("需先运行 start_monitor_task()")
-                os._exit(137)  # 使退出码为35072 方便爬虫管理器重启
-
-        return batch_date