|
@@ -497,6 +497,25 @@ class PaserControl(threading.Thread):
|
|
|
log.error("失败心跳:\n {}".format(tools.dumps_json(items)))
|
|
|
return send_success
|
|
|
|
|
|
+ @staticmethod
|
|
|
+ def get_spider_attribute(name, *args):
|
|
|
+ """获取对象属性"""
|
|
|
+ obj1, obj2 = args or (None, None)
|
|
|
+
|
|
|
+ val = None
|
|
|
+ if obj1 is not None:
|
|
|
+ if isinstance(obj1, dict):
|
|
|
+ val = obj1.get(name)
|
|
|
+ if not val and name == "spidercode":
|
|
|
+ val = obj1.get("code")
|
|
|
+ else:
|
|
|
+ val = getattr(obj1, name, None)
|
|
|
+
|
|
|
+ if not val and obj2 is not None:
|
|
|
+ val = getattr(obj2, name, None)
|
|
|
+
|
|
|
+ return val if val is not None else ""
|
|
|
+
|
|
|
def spider_heartbeat(self, request, response, **kwargs):
|
|
|
"""爬虫心跳"""
|
|
|
parser = kwargs["parser"]
|
|
@@ -508,9 +527,9 @@ class PaserControl(threading.Thread):
|
|
|
status_code = getattr(response, "status_code", -1)
|
|
|
|
|
|
item = getattr(request, "item", {})
|
|
|
- site = (item.get("site") if isinstance(item, dict) else getattr(item, "site", None)) or getattr(parser, "site", None) or "unknown"
|
|
|
- channel = (item.get("channel") if isinstance(item, dict) else getattr(item, "channel", None)) or getattr(parser, "channel", None) or "unknown"
|
|
|
- code = (item.get("code") or item.get("spidercode")) if isinstance(item, dict) else getattr(item, "spidercode", "unknown")
|
|
|
+ site = self.get_spider_attribute("site", item, parser)
|
|
|
+ channel = self.get_spider_attribute("channel", item, parser)
|
|
|
+ code = self.get_spider_attribute("spidercode", item, parser)
|
|
|
business_type: str = parser.__business_type__ # 爬虫业务类型
|
|
|
run_time = tools.get_current_date(date_format="%Y-%m-%d") # 运行时间,单位:天
|
|
|
spider_id = tools.get_md5(code + business_type + run_time)
|