|
@@ -31,44 +31,13 @@ class NoFieldChecker(object):
|
|
:param obj:代表一个item
|
|
:param obj:代表一个item
|
|
:return:返回true 代表异常
|
|
:return:返回true 代表异常
|
|
"""
|
|
"""
|
|
- self.check_bidamount_ac = AcAutomation()
|
|
|
|
- with open(amount_config["table_field_config"]["path"], "r") as f:
|
|
|
|
- reads = csv.reader(f)
|
|
|
|
- [self.check_bidamount_ac.add_word(w[0]) for w in reads]
|
|
|
|
-
|
|
|
|
- detail = obj.get("detail", "")
|
|
|
|
- attach_text = obj.get("attach_text", {})
|
|
|
|
subtype = obj.get("subtype", "")
|
|
subtype = obj.get("subtype", "")
|
|
if subtype in ["中标", "成交","合同","验收"]:
|
|
if subtype in ["中标", "成交","合同","验收"]:
|
|
- contents = catch_content.public_attachment_catch(detail, platform="html", document_id="公告") #返回值是字典
|
|
|
|
- content = "\n".join(contents) #字典处理成字符串
|
|
|
|
- if self.check_bidamount_ac.search(content):
|
|
|
|
- return True
|
|
|
|
-
|
|
|
|
- for attach_index, attach_content in attach_text.items():
|
|
|
|
- if attach_content:
|
|
|
|
- for topic_index, topic_detail in attach_content.items():
|
|
|
|
- # oss地址
|
|
|
|
- attach_url = topic_detail.get("attach_url", "")
|
|
|
|
- if attach_url:
|
|
|
|
- # 获取附件内容
|
|
|
|
- st, content = fsc.download_text_content(attach_url)
|
|
|
|
-
|
|
|
|
- # 下载成功
|
|
|
|
- # 超长文本不处理,暂定30万字
|
|
|
|
- if st and content.strip():
|
|
|
|
- if len(content) > 300000:
|
|
|
|
- continue
|
|
|
|
- # 开始检测
|
|
|
|
- contents = catch_content.public_attachment_catch(content, platform="attach",document_id=attach_url)
|
|
|
|
- content = "\n".join(contents)
|
|
|
|
- if self.check_bidamount_ac.search(content):
|
|
|
|
- return True
|
|
|
|
- return False
|
|
|
|
|
|
+ bidamount = obj.get("bidamount", "")
|
|
|
|
+ if bidamount:
|
|
|
|
+ return False
|
|
|
|
+ return True
|
|
return False
|
|
return False
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
|
|
|
|
def check_winner(self,obj, catch_content: CatchContentObject) -> bool:
|
|
def check_winner(self,obj, catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
@@ -83,9 +52,7 @@ class NoFieldChecker(object):
|
|
return False
|
|
return False
|
|
return True
|
|
return True
|
|
return False
|
|
return False
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
|
|
+
|
|
|
|
|
|
def check_buyer(self,obj,catch_content: CatchContentObject) -> bool:
|
|
def check_buyer(self,obj,catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
@@ -99,55 +66,19 @@ class NoFieldChecker(object):
|
|
return False
|
|
return False
|
|
return True
|
|
return True
|
|
|
|
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
-
|
|
|
|
def check_budget(self,obj, catch_content: CatchContentObject) -> bool:
|
|
def check_budget(self,obj, catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
预算为空检测
|
|
预算为空检测
|
|
:param obj:代表一个item
|
|
:param obj:代表一个item
|
|
:return:返回true 代表异常
|
|
:return:返回true 代表异常
|
|
"""
|
|
"""
|
|
- self.check_budget_ac = AcAutomation()
|
|
|
|
- with open(budget_config["table_field_config"]["path"],"r") as f :
|
|
|
|
- reads=csv.reader(f)
|
|
|
|
- [self.check_budget_ac.add_word(w[0]) for w in reads ]
|
|
|
|
-
|
|
|
|
- detail = obj.get("detail", "")
|
|
|
|
- attach_text = obj.get("attach_text", {})
|
|
|
|
subtype = obj.get("subtype", "")
|
|
subtype = obj.get("subtype", "")
|
|
if subtype not in ["中标", "成交", "合同", "验收"]:
|
|
if subtype not in ["中标", "成交", "合同", "验收"]:
|
|
- contents = catch_content.public_attachment_catch(detail, platform="html", document_id="公告") # 返回值是字典
|
|
|
|
- content = "\n".join(contents) # 字典处理成字符串
|
|
|
|
- if self.check_budget_ac.search(content):
|
|
|
|
- return True
|
|
|
|
-
|
|
|
|
- for attach_index, attach_content in attach_text.items():
|
|
|
|
- if attach_content:
|
|
|
|
- for topic_index, topic_detail in attach_content.items():
|
|
|
|
- # oss地址
|
|
|
|
- attach_url = topic_detail.get("attach_url", "")
|
|
|
|
- if attach_url:
|
|
|
|
- # 获取附件内容
|
|
|
|
- st, content = fsc.download_text_content(attach_url)
|
|
|
|
-
|
|
|
|
- # 下载成功
|
|
|
|
- # 超长文本不处理,暂定30万字
|
|
|
|
- if st and content.strip():
|
|
|
|
- if len(content) > 300000:
|
|
|
|
- continue
|
|
|
|
- # 开始检测
|
|
|
|
- contents = catch_content.public_attachment_catch(content, platform="attach",
|
|
|
|
- document_id=attach_url)
|
|
|
|
- content = "\n".join(contents)
|
|
|
|
- if self.check_budget_ac.search(content):
|
|
|
|
- return True
|
|
|
|
- return False
|
|
|
|
|
|
+ budget = obj.get("budget", "")
|
|
|
|
+ if budget:
|
|
|
|
+ return False
|
|
|
|
+ return True
|
|
return False
|
|
return False
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
|
|
|
|
def check_region(self,obj, catch_content: CatchContentObject) -> bool:
|
|
def check_region(self,obj, catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
@@ -169,9 +100,6 @@ class NoFieldChecker(object):
|
|
if title :
|
|
if title :
|
|
return False
|
|
return False
|
|
return True
|
|
return True
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
|
|
|
|
def check_projectname(self,obj, catch_content: CatchContentObject) -> bool:
|
|
def check_projectname(self,obj, catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
@@ -182,9 +110,7 @@ class NoFieldChecker(object):
|
|
if projectname :
|
|
if projectname :
|
|
return False
|
|
return False
|
|
return True
|
|
return True
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
|
|
+
|
|
|
|
|
|
def check_projectcode(self,obj, catch_content: CatchContentObject) -> bool:
|
|
def check_projectcode(self,obj, catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
@@ -192,44 +118,11 @@ class NoFieldChecker(object):
|
|
:param obj:代表一个item
|
|
:param obj:代表一个item
|
|
:return:返回true 代表异常
|
|
:return:返回true 代表异常
|
|
"""
|
|
"""
|
|
- self.check_projectcode_ac = AcAutomation()
|
|
|
|
- with open(abnormal_config["table_field_config"]["path4"], "r") as f:
|
|
|
|
- reads = csv.reader(f)
|
|
|
|
- [self.check_projectcode_ac.add_word(w[0]) for w in reads]
|
|
|
|
-
|
|
|
|
projectcode = obj.get("projectcode", "")
|
|
projectcode = obj.get("projectcode", "")
|
|
- detail = obj.get("detail", "")
|
|
|
|
- attach_text = obj.get("attach_text", {})
|
|
|
|
- if projectcode == "":
|
|
|
|
- contents = catch_content.public_attachment_catch(detail, platform="html", document_id="公告") #返回值是字典
|
|
|
|
- content = "\n".join(contents) #字典处理成字符串
|
|
|
|
- if self.check_projectcode_ac.search(content):
|
|
|
|
- return True
|
|
|
|
-
|
|
|
|
- for attach_index, attach_content in attach_text.items():
|
|
|
|
- if attach_content:
|
|
|
|
- for topic_index, topic_detail in attach_content.items():
|
|
|
|
- # oss地址
|
|
|
|
- attach_url = topic_detail.get("attach_url", "")
|
|
|
|
- if attach_url:
|
|
|
|
- # 获取附件内容
|
|
|
|
- st, content = fsc.download_text_content(attach_url)
|
|
|
|
-
|
|
|
|
- # 下载成功
|
|
|
|
- # 超长文本不处理,暂定30万字
|
|
|
|
- if st and content.strip():
|
|
|
|
- if len(content) > 300000:
|
|
|
|
- continue
|
|
|
|
- # 开始检测
|
|
|
|
- contents = catch_content.public_attachment_catch(content, platform="attach",document_id=attach_url)
|
|
|
|
- content = "\n".join(contents)
|
|
|
|
- if self.check_projectcode_ac.search(content):
|
|
|
|
- return True
|
|
|
|
|
|
+ if projectcode:
|
|
return False
|
|
return False
|
|
- return False
|
|
|
|
- # 处理正文
|
|
|
|
- # 检查因素
|
|
|
|
- # 是否返回 0000
|
|
|
|
|
|
+ return True
|
|
|
|
+
|
|
def check_subpackage(self,obj, catch_content: CatchContentObject) -> bool:
|
|
def check_subpackage(self,obj, catch_content: CatchContentObject) -> bool:
|
|
"""
|
|
"""
|
|
公司名称检测
|
|
公司名称检测
|