1 year ago · 663a1c3945
--- a/score.py
+++ b/score.py
@@ -2,35 +2,33 @@ from pymongo import MongoClient
 
															 from bson import ObjectId
														
 
															 def bid_score():
														
 
															-    db = MongoClient('192.168.3.167', 27080, unicode_decode_error_handler="ignore").jyqyfw_historyData2023_1
														
 
															-    coll_user = db["20230921Ssk_endo"]
														
 
															-    # db = MongoClient('192.168.3.166', 27082, unicode_decode_error_handler="ignore").yantianlei
														
 
															-    # coll_user = db["20230920Zglt_9_1"]
														
 
															-    # db = MongoClient('192.168.3.166', 27082, unicode_decode_error_handler="ignore").zhaoxiuzhen
														
 
															-    # coll_user = db["20230917LT_ycl"]
														
 
															+    # db = MongoClient('192.168.3.167', 27080, unicode_decode_error_handler="ignore").jyqyfw_historyData2023_1
														
 
															+    # coll_user = db["20230921Ssk_endo"]
														
 
															+    db = MongoClient('192.168.3.206', 27080, unicode_decode_error_handler="ignore").data_quality
														
 
															+    coll_user = db["bidding_20231122"]
														
 
															     count=0
														
 
															     score=100
														
 
															-    # for item in coll_user.find({"_id":ObjectId("64dc2bea5b7b9126edac6845")}):
														
 
															-    for item in coll_user.find().sort("_id",1):
														
 
															-        # if item['title_qa']:
														
 
															-        #      score-=10
														
 
															-        if item['projectname_qa']:
														
 
															+    for item in coll_user.find({"_id":ObjectId("655ec5319aed6eb2ffa5d77f")}):
														
 
															+    # for item in coll_user.find().sort("_id",1):
														
 
															+        if item.get('title_qa'):
														
 
															+             score-=10
														
 
															+        if item.get('projectname_qa'):
														
 
															             score-=10
														
 
															-        if item['area_qa']:
														
 
															+        if item.get('area_qa'):
														
 
															             score-=10
														
 
															-        if item['projectcode_qa']:
														
 
															+        if item.get('projectcode_qa'):
														
 
															             score-=10
														
 
															         # if item['bidopentime_qa']:
														
 
															         #     score-=10
														
 
															-        if item['buyer_qa']:
														
 
															+        if item.get('buyer_qa'):
														
 
															             score-=10
														
 
															-        if item['winner_qa']:
														
 
															+        if item.get('winner_qa'):
														
 
															             score-=10
														
 
															-        if item['budget_qa']:
														
 
															+        if item.get('budget_qa'):
														
 
															             score-=10
														
 
															-        if item['bidamount_qa']:
														
 
															+        if item.get('bidamount_qa'):
														
 
															             score-=10
														
 
															-        if item["multipackage_qa"]:
														
 
															+        if item.get("multipackage_qa"):
														
 
															             score -= 10
														
 
															         print(score)
														
 
															         coll_user.update_one({"_id": item["_id"]}, {"$set": {"score": score}})
														
--- a/tables/fields/area.py
+++ b/tables/fields/area.py
@@ -37,10 +37,10 @@ class AreaChecker(object):
 
															         # 获取当前脚本所在目录的上一级目录
														
 
															         current_dir = os.path.dirname(__file__)
														
 
															-        parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
														
 
															+        # parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
														
 
															         # 构建 Excel 文件路径
														
 
															-        xls_file = os.path.join(parent_dir, 'docs', 'area.xlsx')
														
 
															+        xls_file = os.path.join(current_dir, '..', '..', 'docs', 'aera.xls')
														
 
															         # 加载地区代码的XLS文件
														
 
															         # xls_file = "C:\\Users\\25503\\PycharmProjects\\data_quality\\docs\\table_head_doc\\aera.xls"
														
 
															         df = pd.read_excel(xls_file)
														
--- a/tables/fields/projectname.py
+++ b/tables/fields/projectname.py
@@ -31,11 +31,17 @@ class ProjectnameChecker(object):
 
															                 "parent_code": "02",
														
 
															                 "checkFn": self.check0201
														
 
															             },
														
 
															-            "0302": {
														
 
															-                "name": "不包含通用词汇（中标公告）",
														
 
															+            # "0302": {
														
 
															+            #     "name": "不包含通用词汇（中标公告）",
														
 
															+            #     "parent_name": "语义表述不完整",
														
 
															+            #     "parent_code": "03",
														
 
															+            #     "checkFn": self.check0302
														
 
															+            # },
														
 
															+            "0303": {
														
 
															+                "name": "包含叠词，异常词汇，特殊词汇（测试，公告公告等）",
														
 
															                 "parent_name": "语义表述不完整",
														
 
															                 "parent_code": "03",
														
 
															-                "checkFn": self.check0302
														
 
															+                "checkFn": self.check0303
														
 
															             }
														
 
															         }
														
@@ -75,29 +81,34 @@ class ProjectnameChecker(object):
 
															         :param projectname:
														
 
															         :return:返回true 代表异常
														
 
															         """
														
 
															-        #标题中包含异常字符
														
 
															+        # 项目名称必须以通用词汇结尾
														
 
															+        with open(general_config["table_field_config"]["path"], "r") as f:
														
 
															+            reads = csv.reader(f)
														
 
															+            for w in reads:
														
 
															+                if w[0] in projectname:
														
 
															+                    return False
														
 
															+                else:
														
 
															+                    return True
														
 
															+        return True
														
 
															+    def check0303(self,projectname: str) -> bool:
														
 
															+        """
														
 
															+        没有通用后缀
														
 
															+        :param projectname:
														
 
															+        :return:返回true 代表异常
														
 
															+        """
														
 
															+        # 项目名称中包含异常字符
														
 
															         with open(abnormal_config["table_field_config"]["path6"], "r") as f:
														
 
															             reads = csv.reader(f)
														
 
															             for w in reads:
														
 
															                 if w[0] in projectname:
														
 
															                     return True
														
 
															-        #项目名称以异常字符结尾
														
 
															+        # 项目名称以异常字符结尾
														
 
															         with open(abnormal_config["table_field_config"]["path5"], "r") as f:
														
 
															             reads = csv.reader(f)
														
 
															             for w in reads:
														
 
															-                if re.search(f"{w[0]}$", projectname) !=None:
														
 
															+                if re.search(f"{w[0]}$", projectname) != None:
														
 
															                     return True
														
 
															         # 项目名称以异常字符开始
														
 
															         p1 = re.compile(r"^[3|6|7|8|0|\.]")
														
 
															         if p1.match(projectname):
														
 
															-            return True
														
 
															-        # 放在最后判断
														
 
															-        # 项目名称必须以通用词汇结尾
														
 
															-        with open(general_config["table_field_config"]["path"], "r") as f:
														
 
															-            reads = csv.reader(f)
														
 
															-            for w in reads:
														
 
															-                if re.search(f"{w[0]}$", projectname) != None:
														
 
															-                    return False
														
 
															-                else:
														
 
															-                    return True
														
 
															-        return True
														
 
															+            return True
														
--- a/tables/fields/title.py
+++ b/tables/fields/title.py
@@ -30,11 +30,17 @@ class TitleChecker(object):
 
															                 "parent_code": "02",
														
 
															                 "checkFn": self.check0201
														
 
															             },
														
 
															-            "0302": {
														
 
															-                "name": "不包含通用词汇（中标公告）",
														
 
															+            # "0302": {
														
 
															+            #     "name": "不包含通用词汇（中标公告）",
														
 
															+            #     "parent_name": "语义表述不完整",
														
 
															+            #     "parent_code": "03",
														
 
															+            #     "checkFn": self.check0302
														
 
															+            # },
														
 
															+            "0303": {
														
 
															+                "name": "包含叠词，异常词汇，特殊词汇（测试，公告公告等）",
														
 
															                 "parent_name": "语义表述不完整",
														
 
															                 "parent_code": "03",
														
 
															-                "checkFn": self.check0302
														
 
															+                "checkFn": self.check0303
														
 
															             }
														
 
															         }
														
@@ -70,6 +76,22 @@ class TitleChecker(object):
 
															             return True
														
 
															         return False
														
 
															     def check0302(self,title: str) -> bool:
														
 
															+        """
														
 
															+        没有通用后缀
														
 
															+        :param title:
														
 
															+        :return:返回true 代表异常
														
 
															+        """
														
 
															+        #标题必须以通用词汇结尾
														
 
															+        with open(general_config["table_field_config"]["path"], "r") as f:
														
 
															+            reads = csv.reader(f)
														
 
															+            for w in reads:
														
 
															+                if w[0] in title:
														
 
															+                    return False
														
 
															+                else:
														
 
															+                    return True
														
 
															+        return False
														
 
															+
														
 
															+    def check0303(self, title: str) -> bool:
														
 
															         """
														
 
															         没有通用后缀
														
 
															         :param title:
														
@@ -92,15 +114,4 @@ class TitleChecker(object):
 
															         #标题以异常字符开始
														
 
															         p1 = re.compile(r"^[3|6|7|8|0|\.]")
														
 
															         if p1.match(title):
														
 
															-            return True
														
 
															-
														
 
															-        #放在最后判断
														
 
															-        #标题必须以通用词汇结尾
														
 
															-        with open(general_config["table_field_config"]["path"], "r") as f:
														
 
															-            reads = csv.reader(f)
														
 
															-            for w in reads:
														
 
															-                if re.search(f"{w[0]}$", title) !=None:
														
 
															-                    return False
														
 
															-                else:
														
 
															-                    return True
														
 
															-        return False
														
 
															+            return True
														
--- a/util/get_region.py
+++ b/util/get_region.py
@@ -1,16 +1,23 @@
 
															+import os
														
 
															 import cpca
														
 
															 import re
														
 
															 import pandas as pd
														
 
															 def get_city_info(text):
														
 
															-    # 读取区县数据
														
 
															-    df_county = pd.read_excel("//Users//miaobao//Documents//work//PycharmProjects//data_quality//docs//区县.xlsx")
														
 
															+    # 获取当前脚本所在目录的上一级目录
														
 
															+    current_dir = os.path.dirname(__file__)
														
 
															+    parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
														
 
															+    # 读取区县数据
														
 
															+    df_county_addr = os.path.join(parent_dir, 'docs', '区县.xlsx')
														
 
															+    df_county=pd.read_excel(df_county_addr)
														
 
															     # 读取乡镇数据
														
 
															-    df_town = pd.read_excel("//Users//miaobao//Documents//work//PycharmProjects//data_quality//docs//乡镇.xlsx")
														
 
															+    df_town_addr = os.path.join(parent_dir, 'docs', '乡镇.xlsx')
														
 
															+    df_town = pd.read_excel(df_town_addr)
														
 
															     # 读取市级数据
														
 
															-    df_city = pd.read_excel("//Users//miaobao//Documents//work//PycharmProjects//data_quality//docs//市.xlsx")
														
 
															+    df_city_addr = os.path.join(parent_dir, 'docs', '市.xlsx')
														
 
															+    df_city = pd.read_excel(df_city_addr)
														
 
															     # 使用cpca库提取地名
														
 
															     df = cpca.transform([text])
														
@@ -26,7 +33,7 @@ def get_city_info(text):
 
															     if province is None and city is None and district is None:
														
 
															         # 使用正则表达式提取乡镇信息
														
 
															-        towns = re.findall(r'[\u4e00-\u9fa5]+镇|[\u4e00-\u9fa5]+乡', text)
														
 
															+        towns = re.findall(r'[\u4e00-\u9fa5]+镇|[\u4e00-\u9fa5]+乡|[\u4e00-\u9fa5]+街道|[\u4e00-\u9fa5]+庄|[\u4e00-\u9fa5]+营|[\u4e00-\u9fa5]+店', text)
														
 
															         if towns:
														
 
															             for town in towns:
														
 
															                 town_name = None
														
@@ -55,27 +62,15 @@ def get_city_info(text):
 
															                         province = df_city_result.iloc[0]['省']
														
 
															                     break# 找到乡镇信息后跳出循环
														
 
															-
														
 
															-            if not province and not city and not district and '区县代码' in df_county.columns:
														
 
															-                county_code = df.iloc[0]['区县代码']
														
 
															-                city_info = df_city[df_city['城市代码'] == county_code]
														
 
															-                if not city_info.empty:
														
 
															-                    city = city_info.iloc[0]['城市名称']
														
 
															-
														
 
															-                    # 将城市名称转换成对应的省份名称
														
 
															-                    df_city_result = cpca.transform([city])
														
 
															-                    province = df_city_result.iloc[0]['省']
														
 
															-
														
 
															-                county_info = df_county[df_county['区县代码'] == county_code].iloc[0]
														
 
															-                district = county_info['区县名称']
														
 
															-
														
 
															+                else:
														
 
															+                    continue
														
 
															     return province, city, district
														
 
															 if __name__ == '__main__':
														
 
															     # 使用方法示例
														
 
															-    province, city, district = get_city_info("河南省开发的")
														
 
															-    if get_city_info("电动蝶阀待开发的")==[None,None,None]:
														
 
															+    province, city, district = get_city_info("杞县文化广电新闻出版旅游局")
														
 
															+    if province==None or city==None or district==None:
														
 
															         print("44444")
														
 
															     print(province, city, district)