Kaynağa Gözat

调整基类实例属性

dongzhaorui 1 yıl önce
ebeveyn
işleme
61c765602d
1 değiştirilmiş dosya ile 39 ekleme ve 36 silme
  1. 39 36
      FworkSpider/feapder/network/item.py

+ 39 - 36
FworkSpider/feapder/network/item.py

@@ -148,44 +148,38 @@ class UpdateItem(Item):
 class BaseItem(Item):
     """数据采集基础类"""
 
-    def __init__(self, business_id=None, save=True, **kwargs):
-        super(BaseItem, self).__init__(
-            save=save,
-            business_id=business_id or tools.get_uuid().replace('-', ''),  # 数据流水编号
-            **kwargs
-        )
-
-        self.site = ""  # 站点名称(数据源定义)
-        self.channel = ""  # 栏目名称(数据源定义)
-        self.spidercode = ""  # 爬虫代码(数据源定义)
+    def __init__(self, save=True, site='', channel='', spidercode='',
+                 area='全国', city='', district='', href='', pyuuid=None):
+        """
 
-        self.area = "全国"  # 省
-        self.city = ""  # 市
-        self.district = ""  # 区/县
+        @param pyuuid: 采集数据唯一标识
+        @param save: 是否保存到数据库
+        @param site: 站点名称(数据源定义)
+        @param channel: 栏目名称(数据源定义)
+        @param spidercode: 爬虫代码(数据源定义)
+        @param area: 省, 默认:全国
+        @param city: 市
+        @param district: 区/县
+        @param href: 采集地址
+        """
+        super(BaseItem, self).__init__()
 
-        self.href = ""  # 采集地址
+        self.pyuuid = pyuuid or tools.get_uuid().replace('-', '')
         self.comeintime = tools.ensure_int64(
             tools.get_current_timestamp()
         )  # 入库时间
 
-    @property
-    def save(self) -> bool:
-        return self.__dict__["save"]
+        self.save = save
 
-    @save.setter
-    def save(self, state: bool):
-        """
-        是否持久化存储本数据条目
-            持久化存储 save=True;临时存储 save=False
+        self.site = site
+        self.channel = channel
+        self.spidercode = spidercode
 
-        @param state: 持久化状态
-        """
-        self.__dict__["save"] = state
+        self.area = area
+        self.city = city
+        self.district = district
 
-    @property
-    def business_id(self):
-        """本条目数据流水编号"""
-        return self.__dict__["business_id"]
+        self.href = href
 
     @property
     def fingerprint(self):
@@ -234,12 +228,21 @@ class BaseListItem(BaseItem):
 class BaseDetailItem(BaseItem):
     """详情数据采集基础类"""
 
-    def __init__(self, **kwargs):
-        super(BaseDetailItem, self).__init__()
+    def __init__(self, title='', contenthtml='', detail='', sendflag='false',
+                 projectinfo=None, **kwargs):
+        """
+
+        @param title: 详情页标题
+        @param contenthtml: 详情页源码
+        @param detail: 清洗之后的详情页源码
+        @param sendflag: 该数据是否保存到正式库
+        @param projectinfo: 附件信息,格式详见剑鱼数据采集规范
+        """
+        super(BaseDetailItem, self).__init__(**kwargs)
 
-        self.title = ""  # 详情页标题
-        self.contenthtml = ""  # 详情页源码
-        self.detail = ""  # 清洗之后的详情页源码
-        self.projectinfo = None  # 附件信息,格式详见剑鱼数据采集规范
+        self.title = title
+        self.contenthtml = contenthtml
+        self.detail = detail
+        self.projectinfo = projectinfo
 
-        self.sendflag = "false"  # 该数据是否保存到正式库
+        self.sendflag = sendflag