123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- """
- 项目名称字段检查
- """
- import re
- from docs.config import general_config
- from util.sensitive_word import AcAutomation
- import csv
- from docs.config import abnormal_config
- class ProjectnameChecker(object):
- """
- 项目名称字段检查
- """
- def __init__(self):
- self.errors_tables = {
- "0101": {
- "name": "项目名称长度小于等于5",
- "parent_name": "长度类型",
- "parent_code": "01",
- "checkFn": self.lt5
- },
- "0102": {
- "name": "长度大于等于100",
- "parent_name": "长度类型",
- "parent_code": "01",
- "checkFn": self.gt100
- },
- "0201":{
- "name": "非汉字占比>55%",
- "parent_name": "汉字占比",
- "parent_code": "02",
- "checkFn": self.check0201
- },
- "0302": {
- "name": "不包含通用词汇(中标公告)",
- "parent_name": "语义表述不完整",
- "parent_code": "03",
- "checkFn": self.check0302
- }
- }
- @staticmethod
- def gt100(projectname: str) -> bool:
- """
- 标题长度大于80
- :param title:
- :return:返回true 代表异常
- """
- return len(projectname) >= 100
- @staticmethod
- def lt5(projectname: str) -> bool:
- """
- 标题长度小于5
- :param title:
- :return:返回true 代表异常
- """
- return len(projectname) <= 5
- def check0201(self,projectname: str) -> bool:
- """
- 标题非汉字占比 >55%
- :param title:
- :return:返回true 代表异常
- """
- # chinese_chars = [char for char in title if '\u4e00' <= char <= '\u9fff'] # 匹配汉字
- non_chinese_chars = [char for char in projectname if not ('\u4e00' <= char <= '\u9fff')] # 匹配非汉字和非字母数字字符
- non_chinese_chars_radio = len(non_chinese_chars) / len(projectname)
- if non_chinese_chars_radio > 0.5:
- return True
- return False
- def check0302(self,projectname: str) -> bool:
- """
- 没有通用后缀
- :param projectname:
- :return:返回true 代表异常
- """
- #标题中包含异常字符
- with open(abnormal_config["table_field_config"]["path6"], "r") as f:
- reads = csv.reader(f)
- for w in reads:
- if w[0] in projectname:
- return True
- #项目名称以异常字符结尾
- with open(abnormal_config["table_field_config"]["path5"], "r") as f:
- reads = csv.reader(f)
- for w in reads:
- if re.search(f"{w[0]}$", projectname) !=None:
- return True
- # 项目名称以异常字符开始
- p1 = re.compile(r"^[3|6|7|8|0|\.]")
- if p1.match(projectname):
- return True
- # 放在最后判断
- # 项目名称必须以通用词汇结尾
- with open(general_config["table_field_config"]["path"], "r") as f:
- reads = csv.reader(f)
- for w in reads:
- if re.search(f"{w[0]}$", projectname) != None:
- return False
- else:
- return True
- return True
|