|
@@ -17,7 +17,8 @@ class DataNjpcItem(BaseDetailItem):
|
|
|
}
|
|
|
__ignore_attr__ = {
|
|
|
'parse_url', 'parser_name', 'parse', 'deal_detail', 'files', 'proxies',
|
|
|
- 'ex_python', 'ex_js', 'request_params', 'comeintime', 'failed', 'error'
|
|
|
+ 'ex_python', 'ex_js', 'request_params', 'comeintime', 'failed', 'error',
|
|
|
+ 'failed_times'
|
|
|
}
|
|
|
|
|
|
def __init__(self, ignore=None, **kwargs):
|
|
@@ -106,16 +107,23 @@ class DataNjpcItem(BaseDetailItem):
|
|
|
self.title = self.projectname
|
|
|
log.debug("请检测 < title > 是否正确!")
|
|
|
|
|
|
- if "-" in str(self.publishtime) and ":" in str(self.publishtime):
|
|
|
- self.publishtime = int2long(tools.date_to_timestamp(self.publishtime))
|
|
|
- elif "-" in str(self.publishtime) and ":" not in str(self.publishtime):
|
|
|
- self.publishtime = int2long(tools.date_to_timestamp(self.publishtime, "%Y-%m-%d"))
|
|
|
- elif len(str(self.publishtime)) == 10 or len(str(self.publishtime)) == 13: # 或许是时间戳
|
|
|
- self.publishtime = int2long(int(str(self.publishtime)[:10]))
|
|
|
+ # 时间格式处理
|
|
|
+ cur_time = tools.get_current_date().split(' ')[-1]
|
|
|
+ if "-" in str(self.publishtime) and ":" not in str(self.publishtime):
|
|
|
+ self.publishtime = self.publishtime + " " + cur_time
|
|
|
+ elif "-" not in str(self.publishtime):
|
|
|
+ self.publishtime = tools.timestamp_to_date(int(str(self.publishtime)[:10]))
|
|
|
+ if "00:00:00" in self.publishtime:
|
|
|
+ self.publishtime = self.publishtime.split(' ')[0] + " " + cur_time
|
|
|
else:
|
|
|
- raise ValueError("发布时间格式不正确 -> %r " %(self.publishtime))
|
|
|
-
|
|
|
- if isinstance(self.publishtime,type(self.comeintime)) and self.publishtime > self.comeintime:
|
|
|
+ if "-" in str(self.publishtime) and ":" in str(self.publishtime):
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ raise ValueError("发布时间格式不正确 -> %r " % (self.publishtime))
|
|
|
+ # 时间字符串转时间戳
|
|
|
+ self.publishtime = int2long(tools.date_to_timestamp(self.publishtime))
|
|
|
+
|
|
|
+ if isinstance(self.publishtime, type(self.comeintime)) and self.publishtime > self.comeintime:
|
|
|
log.warning("发布时间大于当前时间,已设置当前时间为发布时间!")
|
|
|
self.publishtime = int2long(tools.get_current_timestamp())
|
|
|
|