projectname.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. """
  2. 项目名称字段检查
  3. """
  4. import re
  5. from docs.config import general_config
  6. from util.sensitive_word import AcAutomation
  7. import csv
  8. from docs.config import abnormal_config
  9. class ProjectnameChecker(object):
  10. """
  11. 项目名称字段检查
  12. """
  13. def __init__(self):
  14. self.errors_tables = {
  15. "0101": {
  16. "name": "项目名称长度小于等于5",
  17. "parent_name": "长度类型",
  18. "parent_code": "01",
  19. "checkFn": self.lt5
  20. },
  21. "0102": {
  22. "name": "长度大于等于100",
  23. "parent_name": "长度类型",
  24. "parent_code": "01",
  25. "checkFn": self.gt100
  26. },
  27. "0201":{
  28. "name": "非汉字占比>55%",
  29. "parent_name": "汉字占比",
  30. "parent_code": "02",
  31. "checkFn": self.check0201
  32. },
  33. # "0302": {
  34. # "name": "不包含通用词汇(中标公告)",
  35. # "parent_name": "语义表述不完整",
  36. # "parent_code": "03",
  37. # "checkFn": self.check0302
  38. # },
  39. "0303": {
  40. "name": "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
  41. "parent_name": "语义表述不完整",
  42. "parent_code": "03",
  43. "checkFn": self.check0303
  44. }
  45. }
  46. @staticmethod
  47. def gt100(projectname: str) -> bool:
  48. """
  49. 标题长度大于80
  50. :param title:
  51. :return:返回true 代表异常
  52. """
  53. return len(projectname) >= 100
  54. @staticmethod
  55. def lt5(projectname: str) -> bool:
  56. """
  57. 标题长度小于5
  58. :param title:
  59. :return:返回true 代表异常
  60. """
  61. return len(projectname) <= 5
  62. def check0201(self,projectname: str) -> bool:
  63. """
  64. 标题非汉字占比 >55%
  65. :param title:
  66. :return:返回true 代表异常
  67. """
  68. # chinese_chars = [char for char in title if '\u4e00' <= char <= '\u9fff'] # 匹配汉字
  69. non_chinese_chars = [char for char in projectname if not ('\u4e00' <= char <= '\u9fff')] # 匹配非汉字和非字母数字字符
  70. non_chinese_chars_radio = len(non_chinese_chars) / len(projectname)
  71. if non_chinese_chars_radio > 0.5:
  72. return True
  73. return False
  74. def check0302(self,projectname: str) -> bool:
  75. """
  76. 没有通用后缀
  77. :param projectname:
  78. :return:返回true 代表异常
  79. """
  80. # 项目名称必须以通用词汇结尾
  81. with open(general_config["table_field_config"]["path"], "r") as f:
  82. reads = csv.reader(f)
  83. for w in reads:
  84. if w[0] in projectname:
  85. return False
  86. else:
  87. return True
  88. return True
  89. def check0303(self,projectname: str) -> bool:
  90. """
  91. 没有通用后缀
  92. :param projectname:
  93. :return:返回true 代表异常
  94. """
  95. # 项目名称中包含异常字符
  96. with open(abnormal_config["table_field_config"]["path6"], "r") as f:
  97. reads = csv.reader(f)
  98. for w in reads:
  99. if w[0] in projectname:
  100. return True
  101. # 项目名称以异常字符结尾
  102. # with open(abnormal_config["table_field_config"]["path5"], "r") as f:
  103. # reads = csv.reader(f)
  104. # for w in reads:
  105. # if re.search(f"{w[0]}$", projectname) != None:
  106. # return True
  107. p2 = re.search("[nbsp\..\...\.]$",projectname)
  108. #re.search():匹配整个字符串,并返回第一个成功的匹配,如果匹配失败,则返回None
  109. if p2!=None:
  110. return True
  111. #项目名称以异常字符开始
  112. p1 = re.search("^[36780\.)]",projectname)
  113. if p1!=None:
  114. return True