|
@@ -102,37 +102,37 @@ class DataNjpcItem(BaseDetailItem):
|
|
|
kw = {k: v for k, v in kwargs.items() if k not in self.__ignore_attr__}
|
|
|
self.__dict__.update(kw)
|
|
|
|
|
|
- def pre_to_db(self):
|
|
|
- if not self.title:
|
|
|
- self.title = self.projectname
|
|
|
- log.debug("请检测 < title > 是否正确!")
|
|
|
-
|
|
|
+ def handle_publish_time(self):
|
|
|
# 时间格式处理
|
|
|
- cur_time = tools.get_current_date().split(' ')[-1]
|
|
|
- if "-" in str(self.publishtime) and ":" not in str(self.publishtime):
|
|
|
- self.publishtime = self.publishtime + " " + cur_time
|
|
|
- elif "-" not in str(self.publishtime):
|
|
|
- self.publishtime = tools.timestamp_to_date(int(str(self.publishtime)[:10]))
|
|
|
- if "00:00:00" in self.publishtime:
|
|
|
- self.publishtime = self.publishtime.split(' ')[0] + " " + cur_time
|
|
|
+ publishtime = str(self.publishtime)
|
|
|
+ time_str = tools.get_current_date().split(' ')[-1]
|
|
|
+ if "-" in publishtime and ":" not in publishtime:
|
|
|
+ publishtime = publishtime + " " + time_str
|
|
|
+ elif "-" not in publishtime:
|
|
|
+ publishtime = tools.timestamp_to_date(int(publishtime[:10]))
|
|
|
+ if "00:00:00" in publishtime:
|
|
|
+ publishtime = publishtime.split(' ')[0] + " " + time_str
|
|
|
else:
|
|
|
- if "-" in str(self.publishtime) and ":" in str(self.publishtime):
|
|
|
+ if "-" in publishtime and ":" in publishtime:
|
|
|
pass
|
|
|
else:
|
|
|
- raise ValueError("发布时间格式不正确 -> %r " % (self.publishtime))
|
|
|
+ raise ValueError("发布时间格式不正确 -> %r " % self.publishtime)
|
|
|
+
|
|
|
# 时间字符串转时间戳
|
|
|
- self.publishtime = int2long(tools.date_to_timestamp(self.publishtime))
|
|
|
+ self.publishtime = int2long(tools.date_to_timestamp(publishtime))
|
|
|
+
|
|
|
+ def handle_publish_time_overdue(self):
|
|
|
+ """超期发布时间处理"""
|
|
|
+ if not isinstance(self.publishtime, int):
|
|
|
+ raise TypeError("发布时间类型不正确 -> %s " % type(self.publishtime))
|
|
|
|
|
|
- if isinstance(self.publishtime, type(self.comeintime)) and self.publishtime > self.comeintime:
|
|
|
+ if self.publishtime > self.comeintime:
|
|
|
log.warning("发布时间大于当前时间,已设置当前时间为发布时间!")
|
|
|
self.publishtime = int2long(tools.get_current_timestamp())
|
|
|
|
|
|
- if not self.projectname or not self.publishtime or not self.href:
|
|
|
- log.warning(f"基础数据为空:\n 发布地址:{self.href}\n 发布时间:{self.publishtime}\n 标题:{self.projectname}")
|
|
|
- self.dont_save = True
|
|
|
-
|
|
|
+ def handle_page_html(self):
|
|
|
if not self.contenthtml:
|
|
|
- log.warning(f"正文数据为空:\n 发布地址:{self.href}\n 发布时间:{self.publishtime}\n 标题:{self.projectname}")
|
|
|
+ log.warning(f"页面源码不能为空!\n 发布地址:{self.href}\n 发布时间:{self.publishtime}\n 标题:{self.projectname}")
|
|
|
self.dont_save = True
|
|
|
else:
|
|
|
if not self.detail:
|
|
@@ -141,9 +141,27 @@ class DataNjpcItem(BaseDetailItem):
|
|
|
if text_search(self.detail).total == 0:
|
|
|
self.sendflag = "true"
|
|
|
|
|
|
+ def check_data_validity(self):
|
|
|
+ if not self.dont_save:
|
|
|
+ if not self.projectname or not self.publishtime or not self.href:
|
|
|
+ log.warning(f"基础数据不能为空!\n 发布地址:{self.href}\n 发布时间:{self.publishtime}\n 标题:{self.projectname}")
|
|
|
+ self.dont_save = True
|
|
|
+
|
|
|
+ def cleanup(self):
|
|
|
if not self.projectinfo:
|
|
|
del self.projectinfo
|
|
|
|
|
|
+ def pre_to_db(self):
|
|
|
+ if not self.title:
|
|
|
+ self.title = self.projectname
|
|
|
+ log.debug("请检测 < title > 是否正确!")
|
|
|
+
|
|
|
+ self.handle_publish_time()
|
|
|
+ self.handle_publish_time_overdue()
|
|
|
+ self.handle_page_html()
|
|
|
+ self.check_data_validity()
|
|
|
+ self.cleanup()
|
|
|
+
|
|
|
|
|
|
class NjpcListItem(BaseListItem):
|
|
|
|