Forráskód Böngészése

fix bug:修复 "spidercode" hash空值错误

dongzhaorui 2 éve
szülő
commit
aeff3d4720
1 módosított fájl, 22 hozzáadás és 3 törlés
  1. 22 3
      FworkSpider/feapder/core/parser_control.py

+ 22 - 3
FworkSpider/feapder/core/parser_control.py

@@ -497,6 +497,25 @@ class PaserControl(threading.Thread):
             log.error("失败心跳:\n {}".format(tools.dumps_json(items)))
         return send_success
 
+    @staticmethod
+    def get_spider_attribute(name, *args):
+        """获取对象属性"""
+        obj1, obj2 = args or (None, None)
+
+        val = None
+        if obj1 is not None:
+            if isinstance(obj1, dict):
+                val = obj1.get(name)
+                if not val and name == "spidercode":
+                    val = obj1.get("code")
+            else:
+                val = getattr(obj1, name, None)
+
+        if not val and obj2 is not None:
+            val = getattr(obj2, name, None)
+
+        return val if val is not None else ""
+
     def spider_heartbeat(self, request, response, **kwargs):
         """爬虫心跳"""
         parser = kwargs["parser"]
@@ -508,9 +527,9 @@ class PaserControl(threading.Thread):
         status_code = getattr(response, "status_code", -1)
 
         item = getattr(request, "item", {})
-        site = (item.get("site") if isinstance(item, dict) else getattr(item, "site", None)) or getattr(parser, "site", None) or "unknown"
-        channel = (item.get("channel") if isinstance(item, dict) else getattr(item, "channel", None)) or getattr(parser, "channel", None) or "unknown"
-        code = (item.get("code") or item.get("spidercode")) if isinstance(item, dict) else getattr(item, "spidercode", "unknown")
+        site = self.get_spider_attribute("site", item, parser)
+        channel = self.get_spider_attribute("channel", item, parser)
+        code = self.get_spider_attribute("spidercode", item, parser)
         business_type: str = parser.__business_type__  # 爬虫业务类型
         run_time = tools.get_current_date(date_format="%Y-%m-%d")  # 运行时间,单位:天
         spider_id = tools.get_md5(code + business_type + run_time)