projectname.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. """
  2. 项目名称字段检查
  3. """
  4. import re
  5. from docs.config import general_config
  6. from util.sensitive_word import AcAutomation
  7. import csv
  8. from docs.config import abnormal_config
  9. class ProjectnameChecker(object):
  10. """
  11. 项目名称字段检查
  12. """
  13. def __init__(self):
  14. self.errors_tables = {
  15. "0101": {
  16. "name": "项目名称长度小于等于5",
  17. "parent_name": "长度类型",
  18. "parent_code": "01",
  19. "checkFn": self.lt5
  20. },
  21. "0102": {
  22. "name": "长度大于等于100",
  23. "parent_name": "长度类型",
  24. "parent_code": "01",
  25. "checkFn": self.gt100
  26. },
  27. "0201":{
  28. "name": "非汉字占比>55%",
  29. "parent_name": "汉字占比",
  30. "parent_code": "02",
  31. "checkFn": self.check0201
  32. },
  33. "0302": {
  34. "name": "不包含通用词汇(中标公告)",
  35. "parent_name": "语义表述不完整",
  36. "parent_code": "03",
  37. "checkFn": self.check0302
  38. }
  39. }
  40. @staticmethod
  41. def gt100(projectname: str) -> bool:
  42. """
  43. 标题长度大于80
  44. :param title:
  45. :return:返回true 代表异常
  46. """
  47. return len(projectname) >= 100
  48. @staticmethod
  49. def lt5(projectname: str) -> bool:
  50. """
  51. 标题长度小于5
  52. :param title:
  53. :return:返回true 代表异常
  54. """
  55. return len(projectname) <= 5
  56. def check0201(self,projectname: str) -> bool:
  57. """
  58. 标题非汉字占比 >55%
  59. :param title:
  60. :return:返回true 代表异常
  61. """
  62. # chinese_chars = [char for char in title if '\u4e00' <= char <= '\u9fff'] # 匹配汉字
  63. non_chinese_chars = [char for char in projectname if not ('\u4e00' <= char <= '\u9fff')] # 匹配非汉字和非字母数字字符
  64. non_chinese_chars_radio = len(non_chinese_chars) / len(projectname)
  65. if non_chinese_chars_radio > 0.5:
  66. return True
  67. return False
  68. def check0302(self,projectname: str) -> bool:
  69. """
  70. 没有通用后缀
  71. :param projectname:
  72. :return:返回true 代表异常
  73. """
  74. #标题中包含异常字符
  75. with open(abnormal_config["table_field_config"]["path6"], "r") as f:
  76. reads = csv.reader(f)
  77. for w in reads:
  78. if w[0] in projectname:
  79. return True
  80. #项目名称以异常字符结尾
  81. with open(abnormal_config["table_field_config"]["path5"], "r") as f:
  82. reads = csv.reader(f)
  83. for w in reads:
  84. if re.search(f"{w[0]}$", projectname) !=None:
  85. return True
  86. # 项目名称以异常字符开始
  87. p1 = re.compile(r"^[3|6|7|8|0|\.]")
  88. if p1.match(projectname):
  89. return True
  90. # 放在最后判断
  91. # 项目名称必须以通用词汇结尾
  92. with open(general_config["table_field_config"]["path"], "r") as f:
  93. reads = csv.reader(f)
  94. for w in reads:
  95. if re.search(f"{w[0]}$", projectname) != None:
  96. return False
  97. else:
  98. return True
  99. return True