liumiaomiao 2 jaren geleden
bovenliggende
commit
c5327d2b43

+ 8 - 0
Dataquality/.idea/.gitignore

@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

+ 10 - 0
Dataquality/.idea/Dataquality.iml

@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 6 - 0
Dataquality/.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 4 - 0
Dataquality/.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (Dataquality)" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
Dataquality/.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Dataquality.iml" filepath="$PROJECT_DIR$/.idea/Dataquality.iml" />
+    </modules>
+  </component>
+</project>

+ 0 - 0
Dataquality/BasicMethods/__init__.py


BIN
Dataquality/BasicMethods/__pycache__/__init__.cpython-38.pyc


BIN
Dataquality/BasicMethods/__pycache__/es_tools.cpython-38.pyc


BIN
Dataquality/BasicMethods/__pycache__/excel_tools.cpython-38.pyc


BIN
Dataquality/BasicMethods/__pycache__/mongo_tools.cpython-38.pyc


BIN
Dataquality/BasicMethods/__pycache__/mysql_tools.cpython-38.pyc


BIN
Dataquality/BasicMethods/__pycache__/querys.cpython-38.pyc


BIN
Dataquality/BasicMethods/__pycache__/time_tools.cpython-38.pyc


+ 5 - 0
Dataquality/BasicMethods/area_quality.py

@@ -0,0 +1,5 @@
+class AreaUtil:
+    def export_execl(self):
+        pass
+    def export_area(self,name):
+        pass

+ 85 - 0
Dataquality/BasicMethods/es_tools.py

@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+#date : 2023-07-03
+from pymongo import MongoClient
+from BasicMethods import mongo_tools
+from elasticsearch import Elasticsearch
+class EsUtil:
+    @staticmethod
+    def es_query_save(query, **kwargs):
+        """
+        es直接到mongo
+        :param query:
+        :param kwargs:
+        :return:
+        """
+        coon = mongo_tools.CoonUtil.get_coon(**kwargs)
+        es = EsUtil.get_es(**kwargs)
+        result_all = EsUtil.get_es_result(es, query, **kwargs)
+        # print(result_all)
+        total = result_all['hits']['total']["value"]
+        results = result_all['hits']['hits']
+        scroll_id = result_all['_scroll_id']
+        print("数据总量:", total)
+        count = 0
+        for i in range(0, int(total / kwargs['size']) + 1):
+            # scroll参数必须指定否则会报错
+            query_scroll = EsUtil.get_es_scroll(es, scroll_id, **kwargs)
+            results += query_scroll
+            save_bulk = []
+            for res in results:
+                count += 1
+                es_result = res["_source"]
+                save_bulk.append(es_result)
+            results = []
+            mongo_tools.MongoSentence.insert_many(coon, save_bulk)
+            print(count, "数据已保存")
+    def get_es(**kwargs):
+        """
+		获取es连接
+		:param kwargs:
+		:return:
+		"""
+        es = Elasticsearch(
+            [{"host": kwargs["es_host"], "http_auth": kwargs["es_http_auth"], "port": kwargs["es_port"]}])
+        # print(es)
+        return es
+    @staticmethod
+    def get_es_count(es, query, **kwargs):
+        """
+               直接查询
+               :param es:
+               :param query:
+               :param kwargs:
+               :return:
+               """
+        result_all_count = es.count(index=kwargs["index"], body=query, request_timeout=kwargs["timeout"])
+        return result_all_count
+    @staticmethod
+    def get_es_result(es, query, **kwargs):
+        """
+        直接查询
+        :param es:
+        :param query:
+        :param kwargs:
+        :return:
+        """
+        result_all = es.search(index=kwargs["index"], body=query, request_timeout=kwargs["timeout"],
+                               scroll='2m', size=kwargs["size"])
+
+        return result_all
+
+    @staticmethod
+    def get_es_scroll(es, scroll_id,  **kwargs):
+        """
+        游标scroll_id
+        :param es:
+        :param scroll_id:
+        :param kwargs:
+        :return:
+        """
+        query_scroll = es.scroll(scroll_id=scroll_id, scroll='2m',
+                                 request_timeout=kwargs["timeout"])['hits']['hits']
+        return query_scroll
+esutil=EsUtil()

+ 139 - 0
Dataquality/BasicMethods/excel_tools.py

@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+#date : 2021/4/28
+
+import xlsxwriter
+import pandas as pd
+import openpyxl
+
+
+class ExcelUtil:
+    @staticmethod
+    def writer_excel(path, sheet_name, title_name, alist, vlist):
+        """
+		mongo数据写入excel
+		:param path:  路径
+		:param vlist: mongo游标
+		:param columns: 所需字段
+		:param headings: 表头
+		:return:
+		"""
+        workbook = xlsxwriter.Workbook(path, {'constant_memory': True, 'strings_to_urls': False})
+        worksheet = workbook.add_worksheet(name=sheet_name)
+        # 加了格式会慢
+        header_format = workbook.add_format({
+            'font_size': 12,  # 字体
+            'bold': True,  # 加粗
+            # 'text_wrap': True,#换行
+            'align': 'center',  # 居中
+            'valign': 'vcenter',
+            # 'fg_color': '#D7E4BC',#颜色
+            # 'border': customer_program#边框
+        })
+        title_format = workbook.add_format({  # 样式设置
+            'font_size': 16,  # 字体
+            'bold': True,  # 加粗
+            'text_wrap': True,  # 换行
+            'align': 'center',  # 水平居中
+            'valign': 'vcenter',  # 垂直居中
+            # 'fg_color': '#D7E4BC',    # 颜色
+            # 'border': customer_program   # 边框
+        })
+        count = 2
+        worksheet.merge_range('A1:U1', title_name, title_format)
+        # worksheet.write_row(customer_program, 0, headings,header_format)
+
+        # 写入表头并添加格式
+        for row_header, rowdata_header in enumerate(alist):
+            for col_header, coldata_header in enumerate(rowdata_header):
+                if row_header == 1:
+                    worksheet.write(row_header, col_header, coldata_header, header_format)
+        for item in alist:
+            value = []
+            i = 0
+            for k in vlist:
+                sum = 0
+                for key in item:
+                    sum += 1
+                    if k == key:
+                        value.insert(i, item[key])
+                        break
+                    elif sum == len(item) and k != key:
+                        value.insert(i, '')
+                i += 1
+            data = value
+            worksheet.write_row(count, 0, data)
+            count += 1
+            if count % 1000 == 0:
+                print(count)
+            worksheet.set_column('A:A', 10, header_format)  # 指定列设置样式
+        workbook.close()
+
+    @staticmethod
+    def add_excel(path, sheet_name, title_name, alist, vlist):
+        """
+		mongo数据写入excel
+		:param path:  路径
+		:param vlist: mongo游标
+		:param columns: 所需字段
+		:param headings: 表头
+		:return:
+		"""
+        workbook = xlsxwriter.Workbook(path, {'constant_memory': True, 'strings_to_urls': False})
+        worksheet = workbook.add_worksheet(name=sheet_name)  # 加了格式会慢
+        header_format = workbook.add_format({
+            'font_size': 12,  # 字体
+            'bold': True,  # 加粗
+            # 'text_wrap': True,#换行
+            'align': 'center',  # 居中
+            'valign': 'vcenter',
+            # 'fg_color': '#D7E4BC',#颜色
+            # 'border': customer_program#边框
+        })
+        title_format = workbook.add_format({  # 样式设置
+            'font_size': 16,  # 字体
+            'bold': True,  # 加粗
+            'text_wrap': True,  # 换行
+            'align': 'center',  # 水平居中
+            'valign': 'vcenter',  # 垂直居中
+            # 'fg_color': '#D7E4BC',    # 颜色
+            # 'border': 1   # 边框
+        })
+        count = 2
+        worksheet.merge_range('A1:U1', title_name, title_format)
+        # worksheet.write_row(1, 0, headings,header_format)
+
+        # 写入表头并添加格式
+        for row_header, rowdata_header in enumerate(alist):
+            for col_header, coldata_header in enumerate(rowdata_header):
+                if row_header == 1:
+                    worksheet.write(row_header, col_header, coldata_header, header_format)
+        for item in alist:
+            value = []
+            i = 0
+            for k in vlist:
+                sum = 0
+                for key in item:
+                    sum += 1
+                    if k == key:
+                        value.insert(i, item[key])
+                        break
+                    elif sum == len(item) and k != key:
+                        value.insert(i, '')
+                i += 1
+            data = value
+            worksheet.write_row(count, 0, data)
+            count += 1
+            if count % 1000 == 0:
+                print(count)
+            worksheet.set_column('A:A', 10, header_format)  # 指定列设置样式
+        workbook.close()
+
+    @staticmethod
+    def from_excel(path,sheet_name):
+        # 读取表中关键词
+        data_all_keys = pd.read_excel(path, sheet_name=sheet_name).fillna("")
+        #读取所有表头
+        data_header = list(data_all_keys.columns)
+        #读取指定表头 data_all_keys["spidercode"]

+ 255 - 0
Dataquality/BasicMethods/mongo_tools.py

@@ -0,0 +1,255 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+#date : 2023-07-03
+from pymongo import MongoClient
+from bson import ObjectId
+
+class MongoUtil:
+    @staticmethod
+    def get_coon(host="192.168.3.167", port=27080, database=None, collection=None, authdb=None, authuser=None, authpass=None):
+        """
+        获取mongo数据库连接
+        :param host:
+        :param port:
+        :param database:
+        :param collection:
+        :param authdb:
+        :param authuser:
+        :param authpass:
+        :return:
+        """
+        if database is None:
+            raise RuntimeError('database is None')
+        if collection is None:
+            raise RuntimeError('collection is None')
+        conn = MongoClient(host, port, unicode_decode_error_handler="ignore")
+        print(conn)
+        if authdb is not None:
+            db_auth = conn[authdb]
+            db_auth.authenticate(authuser, authpass)
+        db = conn[database]
+        collection = db[collection]
+        return collection
+
+
+class CoonUtil:
+    @staticmethod
+    def get_coon(**kwargs):
+        """
+        获取mongo连接
+        :param kwargs:
+        :return:
+        """
+        coon = MongoUtil.get_coon(host=kwargs["mg_host"], port=kwargs["mg_port"],
+                                              database=kwargs["database"], collection=kwargs["collection"])
+        return coon
+class MongoSentence:
+    @staticmethod
+    def count(coon, nosql=None):
+        """
+        count数据量
+        :param coon:
+        :param nosql:
+        :return:
+        """
+        if nosql is None:
+            return coon.find({}).count()
+        else:
+            return coon.count(nosql)
+
+    @staticmethod
+    def find_all(coon, columns=None):
+        """
+        无查询条件返回指定字段的全量数据
+        :param coon:
+        :param columns:
+        :return:
+        """
+        # data = DataFrame(list(self.collection.find({})))
+        # data.drop(["_id"],axis=customer_program,inplace=True)
+        # return data
+        vlist = []
+        if columns is None:
+            vlist = coon.find({"item" :"5f0bcb65fc58d361fb9027f6"})
+        else:
+            cols = {}
+            for c in columns:
+                cols[c] = 1
+            vlist = coon.find({"item" :"5f0bcb65fc58d361fb9027f6"}, cols).batch_size(1000)
+        return vlist
+
+    @staticmethod
+    def find_by_Nosql(coon, nosql={}, columns=None):
+        vlist = []
+        # print(nosql)
+        if columns is None:
+            vlist = coon.find(nosql)
+        else:
+            cols = {}
+            for c in columns:
+                cols[c] = 1
+            vlist = coon.collection.find(nosql, cols)
+        return vlist
+
+    @staticmethod
+    def update_ir_ent_name_by_id(coon, oid, obj):
+        coon.update_one({"_id": ObjectId(oid)}, {"$set": {"IR_ENTNAME": obj}})
+
+    @staticmethod
+    def update_by_id(coon, oid, obj):
+        coon.update_one({"_id": ObjectId(oid)}, {"$set": obj})
+
+
+    @staticmethod
+    def find_one_by_company_name(coon, company):
+        return coon.find({"company_name": company}).count()
+
+    @staticmethod
+    def save(coon, obj):
+        """
+        插入数据
+        :param coon:
+        :param obj:
+        :return:
+        """
+        coon.save(obj)
+
+
+    @staticmethod
+    def insert_many(coon, bulk):
+        """
+        批量插入
+        :param coon:
+        :param bulk:
+        :return:
+        """
+        coon.insert_many(bulk)
+
+    @staticmethod
+    def delcol_by_id(coon, id, column):
+        """
+        删除数据
+        :param coon:
+        :param id:
+        :param column:
+        :return:
+        """
+        coon.collection.update_one({"_id": id}, {"$unset": {column: ""}})
+
+    @staticmethod
+    def find_one_by_id(coon, nosql, column):
+        return coon.collection.find_one(nosql, column)
+
+# 这个是删表操作
+    @staticmethod
+    def clear(coon):
+        """
+        删除表
+        :param coon:
+        :return:
+        """
+        coon.collection.drop()
+
+
+
+class Data_get():
+    @staticmethod
+    #连接数据库数据表
+    def get_con(host="192.168.3.167", port=27080, database=None, collection=None, authdb=None, authuser=None,
+                     authpass=None):
+
+        """
+        获取mongo数据库连接
+        :param host:
+        :param port:
+        :param database:
+        :param collection:
+        :param authdb:
+        :param authuser:
+        :param authpass:
+        :return:
+        """
+        if database is None:
+            raise RuntimeError('database is None')
+        if collection is None:
+            raise RuntimeError('collection is None')
+        con = MongoClient(host, port, unicode_decode_error_handler="ignore")
+        # print(con)
+        if authdb is not None:
+            db_auth = con[authdb]
+            db_auth.authenticate(authuser, authpass)
+        db = con[database]
+        collection = db[collection]
+        return collection
+
+    @staticmethod
+    #随机获取id
+    def get_id_sample(con):
+        id_list = []
+        for item in con.aggregate([{"$match":{"repeat":0}},{'$sample': {'size': 100}}, {"$project": {"_id": 1}}]):
+            id_list.append(str(item["_id"]))
+        return id_list
+
+    @staticmethod
+    #获取id
+    def get_id_mongo(con):
+        id_list = []
+
+    @staticmethod
+    #根据ids,从数据库获取数据
+    def data_ids_mongo(ids,con,save_con):
+        id_list = ids
+        for id in id_list:
+            query = {"_id": ObjectId(id)}
+            list_item = list(con.find(query))
+            if list_item:
+                Data_save.save(save_con,list_item[0])
+
+class Data_save():
+    @staticmethod
+    def save_con(host="192.168.3.167", port=27080, database=None, collection=None, authdb=None, authuser=None,
+                     authpass=None):
+        """
+        获取mongo数据库连接
+        :param host:
+        :param port:
+        :param database:
+        :param collection:
+        :param authdb:
+        :param authuser:
+        :param authpass:
+        :return:
+        """
+        if database is None:
+            raise RuntimeError('database is None')
+        if collection is None:
+            raise RuntimeError('collection is None')
+        con = MongoClient(host, port, unicode_decode_error_handler="ignore")
+        # print(con)
+        if authdb is not None:
+            db_auth = con[authdb]
+            db_auth.authenticate(authuser, authpass)
+        db = con[database]
+        collection = db[collection]
+        return collection
+
+    @staticmethod
+    def save(con, obj):
+        """
+        插入数据
+        :param coon:
+        :param obj:
+        :return:
+        """
+        con.save(obj)
+
+    @staticmethod
+    def insert_many(con, bulk):
+        """
+        批量插入
+        :param con:
+        :param bulk:
+        :return:
+        """
+        con.insert_many(bulk)

+ 57 - 0
Dataquality/BasicMethods/mysql_tools.py

@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+import pymysql
+class Data_get():
+	@staticmethod
+	def connectdb(host,port,user,password,database):
+		# print("连接到mysql服务器")
+		db = pymysql.connect(host=host, port=port, user=user, password=password, database=database)
+		print("数据库已连接!")
+		return db
+
+	@staticmethod
+	def createtb(db, sql):
+		# 使用cursor()方法获取操作游标
+		cursor = db.cursor()
+		# 使用 execute()  方法执行 SQL 查询
+		cursor.execute(sql)
+		print("数据库已创建!")
+	# 获取mysql数据
+	@staticmethod
+	def insert_data(db,sql):
+		# 使用cursor()方法获取操作游标
+		cursor = db.cursor()
+		# SQL语句
+		try:
+			# 使用 execute()  方法执行 SQL 查询
+			cursor.execute(sql)
+			#执行sql语句
+			db.commit()
+		except:
+			#如果发生错误则回滚
+			db.rollback()
+			print("执行语句失败")
+	@staticmethod
+	def select_data(db,sql):
+		# 使用cursor()方法获取操作游标
+		cursor = db.cursor()
+		# SQL语句
+		try:
+			# 使用 execute()  方法执行 SQL 查询
+			cursor.execute(sql)
+			# 执行sql语句
+			db.commit()
+			#获取所有记录列表
+			results = cursor.fetchall()
+			return results
+		except:
+			# 如果发生错误则回滚
+			db.rollback()
+			print("执行语句失败")
+
+
+	# 关闭数据库
+	@staticmethod
+	def closedb(db):
+		db.close()

+ 257 - 0
Dataquality/BasicMethods/querys.py

@@ -0,0 +1,257 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+
+#按照省份、时间统计招标项目数量
+def query_sum_by_area_zb(area, starttime, endtime):
+    query = {
+             "query": {
+                 "bool": {
+                     "must": [
+                         {
+                             "term": {
+                                 "area": area
+                             }
+                         },
+                         {
+                             "range": {
+                                 "zbtime": {
+                                     "gte": starttime,
+                                     "lt": endtime
+                                 }
+                             }
+                         }
+                     ]
+                 }
+             }
+             }
+    return query
+#按照省份、时间统计中标项目数量
+def query_sum_by_area_jg(area, starttime, endtime):
+    query = {
+             "query": {
+                 "bool": {
+                     "must": [
+                         {
+                             "term": {
+                                 "area": area
+                             }
+                         },
+                         {
+                             "range": {
+                                 "jgtime": {
+                                     "gte": starttime,
+                                     "lt": endtime
+                                 }
+                             }
+                         },
+                         {"terms": {"bidstatus": ["中标", "成交", "合同"]}}
+                     ]
+                 }
+             }
+             }
+    return query
+
+#按省份、区县、(公告行业)统计招标项目数量
+def query_sum_by_district_zb(shenfen_list,area,starttime, endtime):
+    query = {
+        "query": {
+            "bool": {
+                "must": [
+                    {
+                        "terms": {
+                            "subscopeclass": ["交通工程_道路", "交通工程_桥梁", "交通工程_隧道", "交通工程_其他", "交通工程_轨道"]
+                        }
+                    },
+                    {
+                        "terms": {
+                            "area": shenfen_list
+                        }
+                    },
+                    {
+                        "term": {
+                            "district": area
+                        }
+                    },
+                    {
+                        "range": {
+                            "zbtime": {
+                                "gte": starttime,
+                                "lt": endtime
+                            }
+                        }
+                    }
+                ]
+            }
+        }
+    }
+    return query
+#按省份、区县、(公告行业)统计中标项目数量
+def query_sum_by_district_jg(shenfen_list,area, starttime, endtime):
+    query = {
+        "query": {
+            "bool": {
+                "must": [
+                    {
+                        "terms": {
+                            "subscopeclass": ["交通工程_道路", "交通工程_桥梁", "交通工程_隧道", "交通工程_其他", "交通工程_轨道"]
+                        }
+                    },
+                    {
+                        "terms": {
+                            "area": shenfen_list
+                        }
+                    },
+                    {
+                        "term": {
+                            "district": area
+                        }
+                    },
+                    {
+                        "range": {
+                            "jgtime": {
+                                "gte": starttime,
+                                "lt": endtime
+                            }
+                        }
+                    },
+                    {"terms": {"bidstatus": ["中标", "成交", "合同"]}}
+
+                ]
+            }
+        }
+    }
+    return query
+#按省份、区县、(公告行业)统计中标金额)
+def query_sum_by_district_bidamount(shenfen_list,area, starttime, endtime):
+    query = {
+        "query": {
+            "bool": {
+                "must": [
+                    # {
+                    #     "terms": {
+                    #         "subscopeclass": ["交通工程_道路", "交通工程_桥梁", "交通工程_隧道", "交通工程_其他", "交通工程_轨道"]
+                    #     }
+                    # },
+                    {
+                        "terms": {
+                            "area": shenfen_list
+                        }
+                    },
+                    {
+                        "term": {
+                            "district": area
+                        }
+                    },
+                    {
+                        "range": {
+                            "jgtime": {
+                                "gte": starttime,
+                                "lt": endtime
+                            }
+                        }
+                    }, {"terms": {"bidstatus": ["中标", "成交", "合同"]}}
+                ]
+            }
+        }, "aggs": {"age_stats": {"sum": {"field": "bidamount"}}}
+    }
+    return query
+#按省份区县及行业统计中标项目数量
+def query_sum_by_district_subscopeclass_jg(shenfen_list,sub,area, starttime, endtime):
+    query = {
+        "query": {
+            "bool": {
+                "must": [{
+                    "terms": {
+                        "area": shenfen_list
+                    }
+                },
+                    {
+                        "terms": {
+                            "subscopeclass": sub
+                        }
+                    }, {
+                        "term": {
+                            "district": area
+                        }
+                    },
+                    {
+                        "range": {
+                            "jgtime": {
+                                "gte": starttime,
+                                "lt": endtime
+                            }
+                        }
+                    },
+                    {"terms": {"bidstatus": ["中标", "成交", "合同"]}}
+                ]
+            }
+        }
+    }
+    return query
+#按省份区县及行业统计招标项目数量
+def query_sum_by_district_subscopeclass_zb(shenfen_list,sub,area,starttime, endtime):
+    query = {
+        "query": {
+            "bool": {
+                "must": [{
+                    "terms": {
+                        "area": shenfen_list
+                    }
+                },
+                    {
+                        "terms": {
+                            "subscopeclass": sub
+                        }
+                    }, {
+                        "term": {
+                            "district": area
+                        }
+                    },
+                    {
+                        "range": {
+                            "zbtime": {
+                                "gte": starttime,
+                                "lt": endtime
+                            }
+                        }
+                    }
+                ]
+            }
+        }
+    }
+    return query
+#按省份区县及行业统计中标金额
+def query_sum_by_district_subscopeclass_bidamount(shenfen_list,sub, area,starttime, endtime):
+    query = {"track_total_hits": True,
+        "size": 0,
+        "query": {
+            "bool": {
+                "must": [{
+                    "terms": {
+                        "area": shenfen_list
+                    }
+                },
+                    {
+                        "terms": {
+                            "subscopeclass": sub
+                        }
+                    },
+                    {
+                        "term": {
+                            "district": area
+                        }
+                    },
+                    {
+                        "range": {
+                            "jgtime": {
+                                "gte": starttime,
+                                "lt": endtime
+                            }
+                        }
+                    }, {"terms": {"bidstatus": ["中标", "成交", "合同"]}}
+                ]
+            }
+        },"aggs":{"age_stats":{"sum":{"field":"bidamount"}}}
+    }
+    return query

+ 9 - 0
Dataquality/BasicMethods/record

@@ -0,0 +1,9 @@
+#带账号密码连接187
+upwd = parse.quote_plus('SJZY@O17t8herB3B')
+data_db = MongoClient("mongodb://SJZY_RWESBid_Other:{}@172.17.145.163:27083,172.17.4.187:27082/".format(upwd), unicode_decode_error_handler="ignore")
+data_col = data_db["qfw"]["bidding"]
+
+upwd = parse.quote_plus('SJZY@O17t8herB3B')
+data_db=MongoClient("127.0.0.1:27187",unicode_decode_error_handler="ignore")
+data_db.admin.authenticate("dataFx", "data@fenxi")
+data_col = data_db["qfw"]["bidding"]

+ 17 - 0
Dataquality/BasicMethods/time_change.py

@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+import time,datetime
+
+#转时间戳
+match_time_1 = '2018-05-22 08:30:00'
+ans_time_stamp_1 = int(time.mktime(time.strptime(match_time_1, "%Y-%m-%d %H:%M:%S")))
+
+match_time_2 = '2018年05月22日'
+ans_time_stamp_2 = int(time.mktime(time.strptime(match_time_2, "%Y年%m月%d日")))
+
+
+#转2021-09-07 14:39:32格式
+struct_time = time.localtime(time.time())  # 得到结构化时间格式
+now_time = time.strftime("%Y-%m-%d %H:%M:%S", struct_time)
+print(now_time)

+ 29 - 0
Dataquality/BasicMethods/time_tools.py

@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+#date : 2021/5/20
+import time
+import pandas as pd
+class TimeTools:
+    @staticmethod
+    def get_time(start_time,end_time,type_time):
+        time_list=[]
+        time_range = pd.date_range(start=start_time, end=end_time, freq=type_time)
+        for i in range(0, len(time_range) - 1):
+            publishtime_start = int(time.mktime(time.strptime(str(time_range[0 + i]), "%Y-%m-%d %H:%M:%S")))+1
+            publishtime_end = int(time.mktime(time.strptime(str(time_range[i + 1]), "%Y-%m-%d %H:%M:%S")))+1
+            time_list.append([publishtime_start,publishtime_end])
+        return time_list
+    @staticmethod
+    def time_format(publishtime_start,publishtime_end,format_type):
+        #timeArray_start格式为time.struct_time(tm_year=2021, tm_mon=5, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=5, tm_yday=121, tm_isdst=0)
+        timeArray_start = time.localtime(int(publishtime_start))
+        timeArray_end = time.localtime(int(publishtime_end))
+        # 转为"%Y/%m/%d","%Y-%m-%d"格式
+        times_start = time.strftime(format_type, timeArray_start)
+        times_end = time.strftime(format_type, timeArray_end)
+        return times_start,times_end,timeArray_start,timeArray_end,
+
+
+
+

+ 18 - 0
Dataquality/BasicMethods/tongji_tools.py

@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : zhaolongyue
+#date : 2021/5/20
+import time
+import numpy as np
+
+# 获取箱体图特征
+percentile = np.percentile(time_count_list, (25, 50, 75), interpolation='linear')
+# 以下为箱线图的五个特征值
+Q1 = percentile[0]  # 上四分位数
+Q2 = percentile[1]
+Q3 = percentile[2]  # 下四分位数
+IQR = Q3 - Q1  # 四分位距
+ulim = Q3 + 1.5 * IQR  # 上限 非异常范围内的最大值
+llim = Q1 - 1.5 * IQR  # 下限 非异常范围内的最小值
+llim = 0 if llim < 0 else llim
+out_list = [llim/86400,ulim/86400]

+ 1 - 0
Dataquality/README.md

@@ -0,0 +1 @@
+数据质量检查

+ 41 - 0
Dataquality/dataquality/bidding_es.py

@@ -0,0 +1,41 @@
+from elasticsearch import Elasticsearch
+from pymongo import MongoClient
+
+host = {"host": "127.0.0.1", "port": 9800,"http_auth":('test_dataQuality','t9s3Ed0gBBowf5')}
+es = Elasticsearch([host])
+# db = MongoClient("192.168.3.206:27080").data_quality
+# collection1 = db.bidding_20230707
+
+
+def get_body():
+    body_str = {
+                "query": {"bool": {"must": [{"range": {"publishtime": {"from": "1688659200", "to": "1688745600"}}}]}}}
+
+    return body_str
+
+def run():
+    bodystr = ""
+    result_all = es.search(index="bidding", body=bodystr, size=10)
+    results = result_all['hits']['hits']
+    saveBulk = []
+    for res in results:
+        print(res)
+        saveBulk.append(res['_source'])
+    #     if len(saveBulk) % 1000 == 0:
+    #         try:
+    #             collection1.insert_many(saveBulk)
+    #         except:
+    #             for info in saveBulk:
+    #                 collection1.save(info)
+    # if saveBulk:
+    #     try:
+    #         collection1.insert_many(saveBulk)
+    #     except:
+    #         for info in saveBulk:
+    #             collection1.save(info)
+
+
+
+
+
+run()

+ 37 - 0
Dataquality/dataquality/es.py

@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# author : liumiaomiao
+#从es库中导出数据到测试环境mongo库
+from BasicMethods.es_tools import esutil
+from pymongo import MongoClient
+def ES_bidding(es_query):
+    """
+    操作样例:直接拉取数据
+    :return:
+    """
+    db_config = {
+        # es
+        'es_host': '127.0.0.1',
+        'es_port': 9800,
+        'es_http_auth': ('test_dataQuality','t9s3Ed0gBBowf5'),  # 重新申请
+        'timeout': 10000,
+        # 'index': "projectset",
+        'index': "bidding",
+        'size': 1000,
+        # mongo存的数据库表
+        'mg_host': '192.168.3.206',
+        'mg_port': 27080,
+        'database': 'data_quality',
+        'collection': 'bidding_20230707'
+    }
+    query = es_query
+    # 传入查询语句query 以及配置信息
+    esutil.es_query_save(query, **db_config)
+
+def run():
+    ## 根据ES语句查找bidding
+    es_query = {"track_total_hits": True,
+                "query": {"bool": {"must": [{"range": {"publishtime": {"from": "1688659200", "to": "1688745600"}}}]}}}
+    ES_bidding(es_query)
+
+run()

+ 16 - 0
Dataquality/main.py

@@ -0,0 +1,16 @@
+# 这是一个示例 Python 脚本。
+
+# 按 ⌃R 执行或将其替换为您的代码。
+# 按 双击 ⇧ 在所有地方搜索类、文件、工具窗口、操作和设置。
+
+
+def print_hi(name):
+    # 在下面的代码行中使用断点来调试脚本。
+    print(f'Hi, {name}')  # 按 ⌘F8 切换断点。
+
+
+# 按间距中的绿色按钮以运行脚本。
+if __name__ == '__main__':
+    print_hi('PyCharm')
+
+# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助