""" 项目名称字段检查 """ import re from docs.config import general_config from util.sensitive_word import AcAutomation import csv from docs.config import abnormal_config class ProjectnameChecker(object): """ 项目名称字段检查 """ def __init__(self): self.errors_tables = { "0101": { "name": "项目名称长度小于等于5", "parent_name": "长度类型", "parent_code": "01", "checkFn": self.lt5 }, "0102": { "name": "长度大于等于100", "parent_name": "长度类型", "parent_code": "01", "checkFn": self.gt100 }, "0201":{ "name": "非汉字占比>55%", "parent_name": "汉字占比", "parent_code": "02", "checkFn": self.check0201 }, # "0302": { # "name": "不包含通用词汇(中标公告)", # "parent_name": "语义表述不完整", # "parent_code": "03", # "checkFn": self.check0302 # }, "0303": { "name": "包含叠词,异常词汇,特殊词汇(测试,公告公告等)", "parent_name": "语义表述不完整", "parent_code": "03", "checkFn": self.check0303 } } @staticmethod def gt100(projectname: str) -> bool: """ 标题长度大于80 :param title: :return:返回true 代表异常 """ return len(projectname) >= 100 @staticmethod def lt5(projectname: str) -> bool: """ 标题长度小于5 :param title: :return:返回true 代表异常 """ return len(projectname) <= 5 def check0201(self,projectname: str) -> bool: """ 标题非汉字占比 >55% :param title: :return:返回true 代表异常 """ # chinese_chars = [char for char in title if '\u4e00' <= char <= '\u9fff'] # 匹配汉字 non_chinese_chars = [char for char in projectname if not ('\u4e00' <= char <= '\u9fff')] # 匹配非汉字和非字母数字字符 non_chinese_chars_radio = len(non_chinese_chars) / len(projectname) if non_chinese_chars_radio > 0.5: return True return False def check0302(self,projectname: str) -> bool: """ 没有通用后缀 :param projectname: :return:返回true 代表异常 """ # 项目名称必须以通用词汇结尾 with open(general_config["table_field_config"]["path"], "r") as f: reads = csv.reader(f) for w in reads: if w[0] in projectname: return False else: return True return True def check0303(self,projectname: str) -> bool: """ 没有通用后缀 :param projectname: :return:返回true 代表异常 """ # 项目名称中包含异常字符 with open(abnormal_config["table_field_config"]["path6"], "r") as f: reads = csv.reader(f) for w in reads: if w[0] in projectname: return True # 项目名称以异常字符结尾 # with open(abnormal_config["table_field_config"]["path5"], "r") as f: # reads = csv.reader(f) # for w in reads: # if re.search(f"{w[0]}$", projectname) != None: # return True p2 = re.search("[nbsp\..\...\.]$",projectname) #re.search():匹配整个字符串,并返回第一个成功的匹配,如果匹配失败,则返回None if p2!=None: return True #项目名称以异常字符开始 p1 = re.search("^[36780\.]",projectname) if p1!=None: return True