liumiaomiao 18 hours ago
parent
commit
746613b0ee

+ 92 - 241
app.py

@@ -1,8 +1,9 @@
 # coding:utf-8
 import inspect
 import csv
-from pymongo import MongoClient
 from docs.config import abnormal_config
+from tables.fields.toptype import ToptypeChecker
+from tables.fields.subtype import SubtypeChecker
 from tables.fields.s_winner import WinnerChecker
 from tables.fields.buyer import BuyerChecker
 from tables.fields.bidamount import BidAmountChecker
@@ -24,6 +25,9 @@ from tables.fields.docendtime import DocendtimeChecker
 from tables.fields.bidstarttime import BidstarttimeChecker
 from tables.fields.bidendtime import BidendtimeChecker
 from tables.fields.bidopentime import BidopentimeChecker
+from tables.fields.capital import CapitalChecker
+toptype_checker = ToptypeChecker()
+subtype_checker = SubtypeChecker()
 area_checker = AreaChecker()
 winner_checker = WinnerChecker()
 buyer_checker = BuyerChecker()
@@ -44,8 +48,15 @@ fieldtype_checker = FieldTypeChecker()
 purchasing_checker = PurchasingChecker()
 city_checker = CityChecker()
 district_checker = DistrictChecker()
+capital_checker = CapitalChecker()
 # 定义检查链
 check_chain = {
+    "toptype": {
+        "checker": toptype_checker,
+    },
+    "subtype": {
+        "checker": subtype_checker,
+    },
     "title": {
         "checker": title_checker,
     },
@@ -99,6 +110,9 @@ check_chain = {
     },
     "purchasinglist": {
         "checker": purchasing_checker,
+    },
+    "capital": {
+        "checker": capital_checker,
     }
 }
 
@@ -107,7 +121,7 @@ def check_params(func, param_rows: dict, is_check_type=False) -> (bool, list):
     """
     函数传参数
     """
-    _default_params = {"attach_text": {}, "bidamount": 0, "budget": 0,"s_winner":"","bidopentime":0,"publishtime":0,"bidstarttime":0,"bidendtime":0,"docendtime":0,"docstarttime":0,"subtype":"","s_topscopeclass":"","supervisorrate":0,"district":"金水区","com_package":[],"buyer":""}  # 必要参数的默认值
+    _default_params = {"attach_text": {}, "projectcode":'',"bidamount": 0, "budget": 0,"capital": 0,"s_winner":"","bidopentime":0,"publishtime":0,"bidstarttime":0,"bidendtime":0,"docendtime":0,"docstarttime":0,"subtype":"","s_topscopeclass":"","supervisorrate":0,"district":"金水区","com_package":[],"buyer":""}  # 必要参数的默认值
     func_params = inspect.getfullargspec(func)  # 取出函数的参数
     args_annotations = func_params.annotations  # 参数类型dict,取出参数类型
     args_list = func_params.args  # 参数list
@@ -222,7 +236,8 @@ if __name__ == '__main__':
     "comeintime" : int(1698739748), 
     "area" : "内蒙古",
     "purchasingsource" : "[{\"start\": 13, \"verify\": \"确定\", \"direction\": \"h\", \"type\": \"识别\", \"header\": {\"itemname\": \"产品名称\", \"model\": \"技术规格\", \"number\": \"数量\", \"unitprice\": \"单价(元)\", \"totalprice\": \"金额(元)\"}, \"file_name\": \"公告\"}]",
-    "toptype" : "预告",
+    "toptype" : "拟建",
+    "subtype" : "拟建",
     "spidercode" : "nm_nmgzzqzfcgw_dzmc_zgcgcjgs", 
     "extracttype" : int(1),
     "s_sha" : "9269588cb73151d8ec09cf16347539678549f81395559589a731ca1394828582", 
@@ -233,7 +248,7 @@ if __name__ == '__main__':
     "dataging" : int(0),
     # "bidopentime":int(1798739414),
     # "publishtime" : int(1751937052),
-    "subtype" : "中标",
+
     "purchasinglist" : [
         {
             "score" : 0.8275, 
@@ -268,7 +283,7 @@ if __name__ == '__main__':
     "autoid" : int(249351001), 
     "bidamount" : 10000,
     "bidway" : "电子投标", 
-    "budget" : 30000.0,
+    # "budget" : 30000.0,
     "supervisorrate": 0.03,
     "buyer" : "玖宸(北京)科技有限公司",
     "buyerclass" : "医疗", 
@@ -315,8 +330,34 @@ if __name__ == '__main__':
             "type": "结果_关于中标 候选人的公示\n市招示 (\n2018) 字\n施工资格后审第\n0827号"
         }
     ],
+
     "bidendtime": 1754614800.0,
-    "com_package": [
+    #拟建字段
+    "owner": "浙江中菱电梯安装工程有限公司",
+    "project_stage_code": "05",
+    "total_investment":"71050.700000",
+    # "capital": 10000.737132,
+    "tenderlist": [
+        {
+            "s_winner": "",
+            "detail": "由乌海市鸿鼎房地产开发有限责任公司建设的鼎盛花园小区5#楼附属商业调整项目于2025年海勃湾区第5次城市规划建设区长专题办公会议通过,依据《关于城乡规划公开公示的规定》的要求,为保障公众利益,维护相关单位及个人的合法权益,现予以公示,公示期为七天。<br/><table border=\"1\"><tbody>\t<tr>\t<td>总用地面积<br/>\t</td>\t<td>83724.90㎡<br/>\t</td>\t</tr>\t<tr>\t<td>总建筑面积<br/>\t</td>\t<td>308931.37㎡<br/>\t</td>\t</tr>\t<tr>\t<td>建筑密度<br/>\t</td>\t<td>26.1%<br/>\t</td>\t</tr>\t<tr>\t<td>绿地率<br/>\t</td>\t<td>30.3%<br/>\t</td>\t</tr>\t<tr>\t<td>容积率<br/>\t</td>\t<td>2.94<br/>\t</td>\t</tr>\t<tr>\t<td>停车位<br/>\t</td>\t<td>1722个<br/>\t</td>\t</tr></tbody></table><br/>调整前,5#楼附属商业基底面积554.33㎡,建筑面积953.29㎡,调整后5#楼附属商业基底面积504.15㎡,建筑面积894.53㎡。<br/>反馈意见单位:乌海市自然资源局海勃湾分局<br/>联系电话:0473-6995512<br/>联系人:贺倩<br/>附图:<br/>立面图<br/>鸟瞰图<br/>平面图详情请下载附件!",
+            "title": "鼎盛花园小区5#楼附属商业调整项目规划设计方案公示",
+            "jyhref": "https://www.jianyu360.cn/article/entservice/U1ZMQ1FAFQgHUAFWEUdVSkcPAAQGXUNK.html?appid",
+            "publishtime": int(1748397383),
+            "buyer": "",
+            "winnerperson": "",
+            "tenderid": "688042cad5d8e4081fcac379",
+            "projectcode": "",
+            "buyerperson": "贺倩",
+            "buyertel": "0473-6995512",
+            "winnertel": "",
+            "href": "http://zrzy.wuhai.gov.cn/zrzy/1442195/1442202/1442239/2237769/index.html",
+            "budget": 0.0,
+            "bidamount": 0.0,
+            "subtype": "拟建"
+        }
+    ],
+        "com_package": [
         {
             "projectcode": "S202501020015",
             "packagecode": "",
@@ -334,238 +375,42 @@ if __name__ == '__main__':
     ],
     "pici" : int(1698740066)
 }
-    # result=check(row,rules={
-    #     "bidamount": {
-    #         "0101": {
-    #             "name": "互相校验(预算和中标金额的比例)",
-    #             "parent_name": "金额错误",
-    #             "parent_code": "01"
-    #         },
-    #         "0102": {
-    #             "name": "过大过小[100,10亿]",
-    #             "parent_name": "金额错误",
-    #             "parent_code": "01"
-    #         },
-    #         "0103": {
-    #             "name": "中标金额小数点位数超过4位",
-    #             "parent_name": "金额错误",
-    #             "parent_code": "01",
-    #             "checkFn": "01",
-    #         },
-    #         "0104": {
-    #             "name": "中标金额存在费率,折扣率",
-    #             "parent_name": "金额错误",
-    #             "parent_code": "01"
-    #         }
-    #     },
-    # #     "budget" : {
-    # #         "0101" : {
-    # #             "name" : "互相校验(预算和中标金额的比例)",
-    # #             "parent_name" : "金额错误",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0102" : {
-    # #             "name" : "过大过小[100,10亿]",
-    # #             "parent_name" : "金额错误",
-    # #             "parent_code" : "01"
-    # #         }
-    # #     },
-    # #     "title" : {
-    # #         "0101" : {
-    # #             "name" : "标题长度小于等于5",
-    # #             "parent_name" : "长度类型",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0102" : {
-    # #             "name" : "长度大于等于100",
-    # #             "parent_name" : "长度类型",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0201" : {
-    # #             "name" : "非汉字占比>55%",
-    # #             "parent_name" : "汉字占比",
-    # #             "parent_code" : "02"
-    # #         },
-    # #         "0303" : {
-    # #             "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
-    # #             "parent_name" : "语义表述不完整",
-    # #             "parent_code" : "03"
-    # #         }
-    # #     },
-    # #     "projectname" : {
-    # #         "0101" : {
-    # #             "name" : "项目名称长度小于等于5",
-    # #             "parent_name" : "长度类型",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0102" : {
-    # #             "name" : "长度大于等于100",
-    # #             "parent_name" : "长度类型",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0201" : {
-    # #             "name" : "非汉字占比>55%",
-    # #             "parent_name" : "汉字占比",
-    # #             "parent_code" : "02"
-    # #         },
-    # #         "0303" : {
-    # #             "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
-    # #             "parent_name" : "语义表述不完整",
-    # #             "parent_code" : "03"
-    # #         }
-    # #     },
-    # #     "winner" : {
-    # #         "0103" : {
-    # #             "name" : "包含叠词,异常词汇,特殊词汇",
-    # #             "parent_name" : "名称错误",
-    # #             "parent_code" : "01"
-    # #         }
-    # #     },
-    # #     "projectcode" : {
-    # #         "0102" : {
-    # #             "name" : "长度大于2小于等于4",
-    # #             "parent_name" : "长度类型",
-    # #             "parent_code" : "02"
-    # #         },
-    # #         "0103" : {
-    # #             "name" : "长度大于50",
-    # #             "parent_name" : "长度类型",
-    # #             "parent_code" : "03"
-    # #         },
-    # #         "0201" : {
-    # #             "name" : "检查日期格式",
-    # #             "parent_name" : "日期格式",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0202" : {
-    # #             "name" : "包含异常关键字",
-    # #             "parent_name" : "异常关键字",
-    # #             "parent_code" : "02"
-    # #         },
-    # #         "0203" : {
-    # #             "name" : "不包含数字字母",
-    # #             "parent_name" : "不包含数字字母",
-    # #             "parent_code" : "03"
-    # #         },
-    # #         "0301" : {
-    # #             "name" : "汉字占比>60%且不包含中国电信",
-    # #             "parent_name" : "汉字占比",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0302" : {
-    # #             "name" : "连续汉字超过9个",
-    # #             "parent_name" : "汉字占比",
-    # #             "parent_code" : "03"
-    # #         }
-    # #     },
-    # #     "buyer" : {
-    # #         "0103" : {
-    # #             "name" : "包含叠词,异常词汇,特殊词汇",
-    # #             "parent_name" : "名称错误",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0104" : {
-    # #             "name" : "名称不完整",
-    # #             "parent_name" : "名称错误",
-    # #             "parent_code" : "01"
-    # #         }
-    # #     },
-    # #
-    # #     "area" : {
-    # #         "0101" : {
-    # #             "name" : "全国类数据",
-    # #             "parent_name" : "全国类型",
-    # #             "parent_code" : "01"
-    # #         },
-    # #         "0301" : {
-    # #             "name" : "省份不在[2,3]个字之间",
-    # #             "parent_name" : "长度异常类型",
-    # #             "parent_code" : "03"
-    # #         },
-    # #         "0302" : {
-    # #             "name" : "城市不在[3,11]个字之间",
-    # #             "parent_name" : "长度异常类型",
-    # #             "parent_code" : "03"
-    # #         },
-    # #         "0303" : {
-    # #             "name" : "区县不在[2,15]个字之间",
-    # #             "parent_name" : "长度异常类型",
-    # #             "parent_code" : "03"
-    # #         }
-    # #     },
-    # #     "multipackage" : {
-    # #         "1000" : {
-    # #             "name" : "分包类数据",
-    # #             "parent_name" : "分包类型",
-    # #             "parent_code" : "01"
-    # #         }
-    # #     },
-    # #     "toptype": {
-    # #     },
-    # #     "subtype": {
-    # #     },
-    # #     "publishtime": {
-    # #         "0201": {
-    # #             "name": "发布时间 > 开标时间  ",
-    # #             "parent_name": "数据范围类型",
-    # #             "parent_code": "02"
-    # #         }
-    # #     },
-    # #     "bidopentime": {
-    # #         "0201": {
-    # #             "name": "发布时间 > 开标时间",
-    # #             "parent_name": "数据范围类型",
-    # #             "parent_code": "02"
-    # #         }
-    # #     }
-    # # })
-    # # result = check(row, rules={
-    # #     "title": {
-    # #                 "0101" : {
-    # #                     "name" : "标题长度小于等于5",
-    # #                     "parent_name" : "长度类型",
-    # #                     "parent_code" : "01"
-    # #                 },
-    # #                 "0102" : {
-    # #                     "name" : "长度大于等于100",
-    # #                     "parent_name" : "长度类型",
-    # #                     "parent_code" : "01"
-    # #                 },
-    # #                 "0201" : {
-    # #                     "name" : "非汉字占比>55%",
-    # #                     "parent_name" : "汉字占比",
-    # #                     "parent_code" : "02"
-    # #                 },
-    # #                 "0303" : {
-    # #                     "name" : "包含叠词,异常词汇,特殊词汇(测试,公告公告等)",
-    # #                     "parent_name" : "语义表述不完整",
-    # #                     "parent_code" : "03"
-    # #                 }
-    # #             },
-    # #     "area": {
-    # #             "0101": {
-    # #                 "name": "全国类数据",
-    # #                 "parent_name": "全国类型",
-    # #                 "parent_code": "01"
-    # #             },
-    # #             "0301": {
-    # #                 "name": "省份不在[2,3]个字之间",
-    # #                 "parent_name": "长度异常类型",
-    # #                 "parent_code": "03"
-    # #             },
-    # #             "0302": {
-    # #                 "name": "城市不在[3,11]个字之间",
-    # #                 "parent_name": "长度异常类型",
-    # #                 "parent_code": "03"
-    # #             },
-    # #             "0303": {
-    # #                 "name": "区县不在[2,15]个字之间",
-    # #                 "parent_name": "长度异常类型",
-    # #                 "parent_code": "03"
-    # #             }
-    # #     }
-    # })
+
     result = check(row, rules={
+        # "capital": {
+        #     "0103": {
+        #         "name": "投资金额小数点位数超过4位",
+        #         "parent_name": "金额错误",
+        #         "parent_code": "01"
+        #     },
+        #     "0201": {
+        #         "name": "投资金额<0",
+        #         "parent_name": "金额错误",
+        #         "parent_code": "01"
+        #     }
+        # },
+        # "budget": {
+        #     "0101": {
+        #         "name": "预算/中标金额,不在[0.7,1.3]",
+        #         "parent_name": "金额错误",
+        #         "parent_code": "01"
+        #     },
+        #     "0102": {
+        #         "name": "过大过小,不在[100,10亿]",
+        #         "parent_name": "金额错误",
+        #         "parent_code": "01"
+        #     },
+        #     "0103": {
+        #         "name": "预算小数点位数超过4位",
+        #         "parent_name": "金额错误",
+        #         "parent_code": "01"
+        #     },
+        #     "0201": {
+        #         "name": "预算<0",
+        #         "parent_name": "金额错误",
+        #         "parent_code": "01"
+        #     }
+        # },
         #  "bidendtime" : {
         #     "0101" : {
         #         "name" : "投标截止日期<投标文件递交开始时间",
@@ -590,10 +435,16 @@ if __name__ == '__main__':
         #         "parent_code": "01"
         #     }
         # },
-        "note": {
-            },
-        # "price": {
-        # },
+        # "toptype": {
+        #     "0101": {
+        #         "name": "数据长度<2",
+        #         "parent_name": "长度错误",
+        #         "parent_code": "01",
+        #
+        #     }
+        # }
+        "project_stage_code": {
+        },
         # "sort": {
         # },
         # "city": {

+ 195 - 0
client_mongo_mysql_nzj_liantong.py

@@ -0,0 +1,195 @@
+# coding:utf-8
+import time
+from a2s.tools import json_serialize, json_deserialize
+from a2s.a2s_client import a2s_execute
+from docs.config import ReluMongodb
+from util.mogodb_helper import MongoDBInterface
+from pymongo import MongoClient
+from util.mysql_tool import MysqlUtil
+import json
+from datetime import datetime, timedelta
+from bson import ObjectId
+
+ReluClient = MongoDBInterface(ReluMongodb)
+
+# 评估服务配置
+a2s_ip = "172.20.100.235:9090"
+topic = "quality_bid"
+#本地测试用的主题
+# topic = "test_quality_bid"
+timeout = 300
+
+
+# 开始评估
+def start_quality(data: dict, rules_id: int, a2s_ip, topic, timeout, retry=3):
+    # 本次不使用SSL,所以channel是不安全的
+    row = {"data": data, "rules_id": rules_id}
+    bytes_data = json_serialize(row)
+    for t in range(retry):
+        print("topic",topic)
+        try:
+            resp_data = a2s_execute(a2s_ip, topic, timeout, bytes_data)
+            if resp_data is None:
+                continue
+            result = json_deserialize(resp_data)
+            return result
+        except Exception as e:
+            print(e)
+    return {}
+
+# 获取规则ID
+def get_rule(company, version):
+    rule_id = ReluClient.find_rule_by_company(ReluMongodb["col"], company, version)
+    return rule_id
+
+def find_error_id(conn, cleaned_key, sub_value):
+    """
+    查找 error_dict 中的 id
+    """
+    query = """SELECT id FROM error_dict WHERE fields = %s AND error = %s"""
+    params = (cleaned_key, sub_value)
+    result = MysqlUtil.query_data(conn, query, params)
+    #[(10,)]
+    # 检查查询结果是否为空
+    if not result:
+        print(f"Error: No matching record found for fields={cleaned_key}, error={sub_value}")
+        return None  # 或者返回一个默认值,根据需求而定
+
+    record = result[0][0]
+    return record
+
+def insert_batch_data(conn, params):
+    """
+    执行批量插入数据
+    """
+    query = """INSERT IGNORE INTO nzj_analysis_liantong (mongoid, area, city, district, score, error_type, create_time,
+              projectname,title,owner,project_stage_code,capital) 
+               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )"""
+    MysqlUtil.insert_data(conn, query, params)
+
+def insert_dynamic_error_field(conn, cleaned_key, error_ids, mongoid):
+    """
+    动态插入 error_ids 到相应的 cleaned_key_error 字段
+    """
+    # 构造动态插入 SQL 语句,更新指定的 cleaned_key_error 字段
+    query = f"""
+        UPDATE nzj_analysis_liantong 
+        SET {cleaned_key}_error = %s 
+        WHERE mongoid = %s
+    """
+    # 拼接多个 error_id,用分隔符分开
+    error_ids_str = ','.join(map(str, error_ids))
+    params = (error_ids_str, mongoid )
+
+    MysqlUtil.update_data(conn, query, params)
+
+def has_non_empty_qa(data):
+    # 获取data字典
+    data_dict = data.get('data', {})
+
+    # 遍历所有键值对
+    for key, value in data_dict.items():
+        # 检查键以'_qa'结尾且值不为空
+        if key.endswith('_qa') and value:  # value不为None、空字典、空列表等
+            return True
+    return False
+
+def parse_timestamp(timestamp):
+    if not timestamp:
+        return None
+    try:
+        return datetime.fromtimestamp(int(timestamp))
+    except (ValueError, TypeError):
+        return None
+
+def batch_load_data():
+    """
+    批量数据质量检查
+    """
+    # 获取今天的日期(字符串格式)
+    today_date = datetime.now().strftime("%Y-%m-%d")
+    yesterday_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
+    # 获取昨天 00:00:00 的时间戳
+    start_date = int(datetime.strptime(f"{yesterday_date} 00:00:00", "%Y-%m-%d %H:%M:%S").timestamp())
+    # print("start_date",start_date)
+    # 获取今天 00:00:00 的时间戳
+    end_date = int(datetime.strptime(f"{today_date} 00:00:00", "%Y-%m-%d %H:%M:%S").timestamp())
+    # print("end_date", end_date)
+    # 规则查询,根据必要条件 公司名称(用户ID)、版本号
+    rules_id = get_rule("中国联通-拟在建", "v1.4.4")
+    print(rules_id)
+    # 初始化mysql
+    conn = MysqlUtil.connect_to_mysql(host='172.20.45.129', port='4000', user='root', password='=PDT49#80Z!RVv52_z',database='quality')
+
+    max_id = ObjectId("0" * 24)
+    # max_id = ObjectId("688363ebf0c6ad8b095e2245")
+    # 查询条件:_id >= max_id, appid匹配,且 createtime 在 [start_date, end_date] 之间
+    query = {
+        "_id": {"$gte": max_id},
+        # "_id": max_id,
+        # "appid": "jyGQ1XQQsEAwNeSENOFR9D",
+        # "createtime": {"$gte": start_date, "$lte": end_date}
+    }
+
+    while True:
+        # client = MongoClient('mongodb://127.0.0.1:27087/', unicode_decode_error_handler="ignore",directConnection=True).jyqyfw  # 修改为你的连接地址
+        client = MongoClient('mongodb://172.20.17.61:27080/', unicode_decode_error_handler="ignore",directConnection=True).jyqyfw  # 正式环境
+        coll_user = client["usermail_lt_nzj"]
+        try:
+            for item in coll_user.find(query).sort("_id", 1):
+                print("------数据处理开始--------")
+                max_id = item["_id"]
+                item["_id"] = str(item["_id"])
+                print(f"正在处理数据: {max_id}")
+                # 质量检查逻辑
+                result = start_quality(item, rules_id, a2s_ip, topic, timeout)
+                print(result)
+                code = result.get("code")
+                if code != 200:
+                    # 数据出错,跳过
+                    continue
+                #只将有错误的数据存库
+                if has_non_empty_qa(result):
+                    data = result.get("data", {})
+
+                    # 数据插入到 MySQL
+                    area = item.get("area", "")
+                    city = item.get("city", "")
+                    district = item.get("district", "")
+                    projectname =item.get('projectname',"")
+                    title = item.get("title", "")
+                    owner = item.get("owner", "")
+                    project_stage_code = item.get("project_stage_code", "")
+                    capital = item.get("capital","")
+                    # ---
+
+                    tenderlist = item.get("tenderlist", "")
+                    # publishtime = item.get("publishtime", "")
+                    # bidamount = item.get("bidamount", "")
+                    # projectcode = item.get("projectcode", "")
+                    # buyerperson = item.get("buyerperson", "")
+                    # buyertel = item.get("buyertel", "")
+                    # detail = item.get("detail", "")
+                    # href = item.get("href", "")
+                    # s_winner = item.get("s_winner", "")
+                    # winnerperson = item.get("winnerperson", "")
+                    # winnertel = item.get("winnertel", "")
+                    #---
+                    score = data.get("score", "")
+                    error_type_data = json.dumps(data)
+                    create_time = today_date
+
+                    params = (item["_id"],  area, city, district, score, error_type_data,create_time, projectname, title, owner,project_stage_code,capital)
+                    insert_batch_data(conn, params)
+            print("---- 数据处理完成 ----")
+            break
+        except Exception as e:
+            print(f"错误: {e}")
+            import traceback
+            traceback.print_exc()  # 打印完整堆栈信息
+            time.sleep(10)
+
+
+if __name__ == '__main__':
+    batch_load_data()
+

+ 94 - 15
tables/fields/NoField.py

@@ -50,7 +50,11 @@ class NoFieldChecker(object):
             "sortstr":self.check_sort,
             "price":self.check_price,
             "winnerorder":self.check_winnerorder,
-            "note":self.check_note
+            "note":self.check_note,
+            "publish_org":self.check_publish_org,
+            "projectinfo":self.check_projectinfo,
+            "capital":self.check_capital,
+            "project_stage_code":self.check_project_stage
         }
 
     def check_bidamount(self,obj,catch_content: CatchContentObject) -> bool:
@@ -66,19 +70,6 @@ class NoFieldChecker(object):
                 return True
         return False
 
-    def check_owner(self,obj, catch_content: CatchContentObject) -> bool:
-        """
-        业主单位名称为空检测,除中标类型的标讯,其他类型标讯不检查这个字段是否为空
-        :param obj:代表一个item
-        :return:返回true 代表异常
-        """
-        subtype = obj.get("subtype", "")
-        if subtype in ["拟建"]:
-            owner = obj.get("owner")
-            if not owner:
-                return True
-        return False
-
     def check_winner(self,obj, catch_content: CatchContentObject) -> bool:
         """
         中标单位名称为空检测,除中标类型的标讯,其他类型标讯不检查这个字段是否为空
@@ -179,7 +170,7 @@ class NoFieldChecker(object):
         :return:返回true 代表异常
         """
         toptype = obj.get("toptype", "")
-        if toptype  not in ["拟建","采购意向"]:
+        if toptype not in ["拟建","采购意向"]:
             projectcode = obj.get("projectcode")
             if not projectcode:
                 return True
@@ -476,4 +467,92 @@ class NoFieldChecker(object):
         if toptype == "预告" :
             if "note" not in obj:
                 return True  # 如果没有note字段,视为无
+        return False
+
+    def check_publish_org(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        发文单位 为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        publish_org = obj.get("publish_org", "")
+        if not publish_org:
+            return True  # 如果没有note字段,视为无
+        return False
+    def check_projectinfo(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        政策附件 为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        projectinfo = obj.get("projectinfo", "")
+        if not projectinfo:
+            return True  # 如果没有note字段,视为无
+        return False
+    def check_owner(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        联通拟在建类型
+        业主单位名称为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        owner = obj.get("owner", "")
+        if not owner:
+            return True
+        return False
+    def check_total_investment(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        联通拟在建类型
+        投资金额(万元)为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        total_investment = obj.get("total_investment", "")
+        if not total_investment:
+            return True
+        return False
+    def check_area_code(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        联通拟在建类型
+        省份为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        area_code = obj.get("area_code", "")
+        if not area_code:
+            return True
+        return False
+
+    def check_city_code(self, obj, catch_content: CatchContentObject) -> bool:
+        """
+        联通拟在建类型
+        城市为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        city_code = obj.get("city_code", "")
+        if not city_code:
+            return True
+        return False
+    def check_capital(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        联通拟在建类型
+        投资金额(万元)为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        capital = obj.get("capital", "")
+        if not capital:
+            return True
+        return False
+    def check_project_stage(self,obj, catch_content: CatchContentObject) -> bool:
+        """
+        联通拟在建类型
+        项目阶段为空检测
+        :param obj:代表一个item
+        :return:返回true 代表异常
+        """
+        project_stage = obj.get("check_project_stage_code", "")
+        if not project_stage:
+            return True
         return False

+ 11 - 5
tables/fields/budget.py

@@ -66,9 +66,12 @@ class BudgetChecker(object):
         :param price:
         :return: 返回true 代表异常
         """
-        if 100 < budget < 1000000000:
+        if budget:
+            if 100 < budget < 1000000000:
+                return False
+            return True
+        else:
             return False
-        return True
 
     @staticmethod
     def check0103(budget: float) -> bool :
@@ -106,6 +109,9 @@ class BudgetChecker(object):
         预算金额 < 中标金额,视为异常
         :return: 返回true 代表异常
         """
-        if budget < bidamount:
-            return True
-        return False
+        if budget and bidamount:
+            if budget < bidamount:
+                return True
+            return False
+        else:
+            return False

+ 56 - 0
tables/fields/capital.py

@@ -0,0 +1,56 @@
+"""
+    capital投资金额字段检查
+"""
+
+
+class CapitalChecker(object):
+    """
+        投资金额字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0103": {
+                "name": "投资金额小数点位数超过4位",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0103
+            },
+            "0201": {
+                "name": "投资金额<0",
+                "parent_name": "金额错误",
+                "parent_code": "01",
+                "checkFn": self.check0201
+            }
+        }
+
+    @staticmethod
+    def check0103(capital: float) -> bool :
+        """
+        投资金额小数点位数超过4位,视为异常
+        :param price:
+        :return: 返回true 代表异常
+        """
+        # 将数字转换为字符串
+        number_str = str(capital)
+        # 检查是否有小数点
+        if '.' in number_str:
+        # 分割整数部分和小数部分
+            integer_part, decimal_part = number_str.split('.')
+        # 返回小数部分的长度
+            length= len(decimal_part)
+        else:
+             length = 0
+        if length > 4 :
+            return True
+
+    @staticmethod
+    def check0201(capital: float) -> bool :
+        """
+        投资金额<0,视为异常
+        :return: 返回true 代表异常
+        """
+        if  capital < 0:
+            return True
+        return False
+

+ 15 - 9
tables/fields/projectcode.py

@@ -102,12 +102,14 @@ class ProjectcodeChecker(object):
 
     # 检查projectcode长度小于等于4大于2
     def check0102(self,projectcode: str) -> bool:
-        return 2 < len(projectcode) <= 4
+        if projectcode:
+            return 2 < len(projectcode) <= 4
 
     @staticmethod
     # 检查projectcode长度大于50
     def check0103( projectcode: str) -> bool:
-        return len(projectcode) > 50
+        if projectcode:
+            return len(projectcode) > 50
 
     def check0201(self, projectcode: str) -> bool:
         def is_valid_date_format(s):
@@ -117,16 +119,20 @@ class ProjectcodeChecker(object):
         return is_valid_date_format(projectcode)
 
     def check0202(self, projectcode: str) -> bool:
-        codeUnConReg = re.compile(r"(null|勘察|测试|设计|监理|范围|分包|日)")
-        return bool(codeUnConReg.search(projectcode))
+        if projectcode:
+            codeUnConReg = re.compile(r"(null|勘察|测试|设计|监理|范围|分包|日)")
+            return bool(codeUnConReg.search(projectcode))
 
     def check0203(self, projectcode: str) -> bool:
-        return not any(char.isalnum() for char in projectcode)
+        if projectcode:
+            return not any(char.isalnum() for char in projectcode)
 
     def check0301(self, projectcode: str) -> bool:
-        chinese_chars = [char for char in projectcode if '\u4e00' <= char <= '\u9fff']
-        chinese_chars_ratio = len(chinese_chars) / len(projectcode)
-        return chinese_chars_ratio > 0.6 and "中国电信" not in projectcode
+        if projectcode:
+            chinese_chars = [char for char in projectcode if '\u4e00' <= char <= '\u9fff']
+            chinese_chars_ratio = len(chinese_chars) / len(projectcode)
+            return chinese_chars_ratio > 0.6 and "中国电信" not in projectcode
 
     def check0302(self, projectcode: str) -> bool:
-        return len(re.findall(r'[\u4e00-\u9fa5]{9,}', projectcode)) > 0
+        if projectcode:
+            return len(re.findall(r'[\u4e00-\u9fa5]{9,}', projectcode)) > 0

+ 32 - 0
tables/fields/publish_org.py

@@ -0,0 +1,32 @@
+"""
+    发文单位字段检查
+"""
+class Publish_orgChecker(object):
+    """
+    发文单位字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "数据长度<2",
+                "parent_name": "长度错误",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    @staticmethod
+    def check0101(publish_org: str) -> bool:
+        """
+        :return:返回true 代表异常
+        """
+        if publish_org:
+            if len(publish_org) < 2:
+                return True
+            else:
+                return False
+        else:
+            return False
+
+

+ 31 - 0
tables/fields/subtype.py

@@ -0,0 +1,31 @@
+"""
+    信息二级分类字段检查
+"""
+
+
+class SubtypeChecker(object):
+    """
+        信息二级分类字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "数据长度<2",
+                "parent_name": "长度错误",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    @staticmethod
+    def check0101(subtype: str) -> bool:
+        """
+        :return:返回true 代表异常
+        """
+        if len(subtype) < 2:
+            return True
+        else:
+            return False
+
+

+ 31 - 0
tables/fields/toptype.py

@@ -0,0 +1,31 @@
+"""
+    信息一级分类字段检查
+"""
+
+
+class ToptypeChecker(object):
+    """
+        信息一级分类字段检查
+    """
+
+    def __init__(self):
+        self.errors_tables = {
+            "0101": {
+                "name": "数据长度<2",
+                "parent_name": "长度错误",
+                "parent_code": "01",
+                "checkFn": self.check0101
+            }
+        }
+
+    @staticmethod
+    def check0101(toptype: str) -> bool:
+        """
+        :return:返回true 代表异常
+        """
+        if len(toptype) < 2:
+            return True
+        else:
+            return False
+
+