há 2 anos atrás · 193312b55e
--- a/A数据处理/site_monitor/.gitignore
+++ b/A数据处理/site_monitor/.gitignore
@@ -1,141 +0,0 @@
 
				-### Python template
			
 
				-# Byte-compiled / optimized / DLL files
			
 
				-__pycache__/
			
 
				-*.py[cod]
			
 
				-*$py.class
			
 
				-
			
 
				-# C extensions
			
 
				-*.so
			
 
				-
			
 
				-# Distribution / packaging
			
 
				-.Python
			
 
				-build/
			
 
				-develop-eggs/
			
 
				-dist/
			
 
				-downloads/
			
 
				-eggs/
			
 
				-.eggs/
			
 
				-lib/
			
 
				-lib64/
			
 
				-parts/
			
 
				-sdist/
			
 
				-var/
			
 
				-wheels/
			
 
				-share/python-wheels/
			
 
				-*.egg-info/
			
 
				-.installed.cfg
			
 
				-*.egg
			
 
				-MANIFEST
			
 
				-
			
 
				-# PyInstaller
			
 
				-#  Usually these files are written by a python script from a template
			
 
				-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				-*.manifest
			
 
				-*.spec
			
 
				-
			
 
				-# Installer logs
			
 
				-pip-log.txt
			
 
				-pip-delete-this-directory.txt
			
 
				-
			
 
				-# Unit test / coverage reports
			
 
				-htmlcov/
			
 
				-.tox/
			
 
				-.nox/
			
 
				-.coverage
			
 
				-.coverage.*
			
 
				-.cache
			
 
				-nosetests.xml
			
 
				-coverage.xml
			
 
				-*.cover
			
 
				-*.py,cover
			
 
				-.hypothesis/
			
 
				-.pytest_cache/
			
 
				-cover/
			
 
				-
			
 
				-# Translations
			
 
				-*.mo
			
 
				-*.pot
			
 
				-
			
 
				-# Django stuff:
			
 
				-*.log
			
 
				-local_settings.py
			
 
				-db.sqlite3
			
 
				-db.sqlite3-journal
			
 
				-
			
 
				-# Flask stuff:
			
 
				-instance/
			
 
				-.webassets-cache
			
 
				-
			
 
				-# Scrapy stuff:
			
 
				-.scrapy
			
 
				-
			
 
				-# Sphinx documentation
			
 
				-docs/_build/
			
 
				-
			
 
				-# PyBuilder
			
 
				-.pybuilder/
			
 
				-target/
			
 
				-
			
 
				-# Jupyter Notebook
			
 
				-.ipynb_checkpoints
			
 
				-
			
 
				-# IPython
			
 
				-profile_default/
			
 
				-ipython_config.py
			
 
				-
			
 
				-# pyenv
			
 
				-#   For a library or package, you might want to ignore these files since the code is
			
 
				-#   intended to run in multiple environments; otherwise, check them in:
			
 
				-# .python-version
			
 
				-
			
 
				-# pipenv
			
 
				-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
			
 
				-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
			
 
				-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
			
 
				-#   install all needed dependencies.
			
 
				-#Pipfile.lock
			
 
				-
			
 
				-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
			
 
				-__pypackages__/
			
 
				-
			
 
				-# Celery stuff
			
 
				-celerybeat-schedule
			
 
				-celerybeat.pid
			
 
				-
			
 
				-# SageMath parsed files
			
 
				-*.sage.py
			
 
				-
			
 
				-# Environments
			
 
				-.env
			
 
				-.venv
			
 
				-env/
			
 
				-venv/
			
 
				-ENV/
			
 
				-env.bak/
			
 
				-venv.bak/
			
 
				-
			
 
				-# Spyder project settings
			
 
				-.spyderproject
			
 
				-.spyproject
			
 
				-
			
 
				-# Rope project settings
			
 
				-.ropeproject
			
 
				-
			
 
				-# mkdocs documentation
			
 
				-/site
			
 
				-
			
 
				-# mypy
			
 
				-.mypy_cache/
			
 
				-.dmypy.json
			
 
				-dmypy.json
			
 
				-
			
 
				-# Pyre type checker
			
 
				-.pyre/
			
 
				-
			
 
				-# pytype static type analyzer
			
 
				-.pytype/
			
 
				-
			
 
				-# Cython debug symbols
			
 
				-cython_debug/
			
 
				-
			
 
				-.idea
			
--- a/A数据处理/site_monitor/README.md
+++ b/A数据处理/site_monitor/README.md
@@ -1,19 +0,0 @@
 
				-# 原网站监控
			
 
				-
			
 
				-#### 构建镜像
			
 
				-```shell
			
 
				-$ cd site_monitor
			
 
				-$ docker build -t site_monitor:v1.0 -f docker/Dockerfile .
			
 
				-```
			
 
				-
			
 
				-#### 创建容器
			
 
				-```shell
			
 
				-$ cd site_monitor
			
 
				-$ docker-compose -f docker/docker-compose.yml up -d
			
 
				-```
			
 
				-
			
 
				-#### 关闭容器
			
 
				-```shell
			
 
				-$ cd site_monitor
			
 
				-$ docker-compose -f docker/docker-compose.yml down
			
 
				-```
			
--- a/A数据处理/site_monitor/db/__init__.py
+++ b/A数据处理/site_monitor/db/__init__.py
@@ -1,9 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2020/4/23 12:09 AM
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
--- a/A数据处理/site_monitor/db/mongodb.py
+++ b/A数据处理/site_monitor/db/mongodb.py
@@ -1,422 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2021-04-18 14:12:21
			
 
				----------
			
 
				-@summary: 操作mongo数据库
			
 
				----------
			
 
				-@author: Mkdir700
			
 
				-@email:  mkdir700@gmail.com
			
 
				-"""
			
 
				-import re
			
 
				-from typing import List, Dict, Optional
			
 
				-from urllib import parse
			
 
				-
			
 
				-import pymongo
			
 
				-from pymongo import MongoClient
			
 
				-from pymongo.collection import Collection
			
 
				-from pymongo.database import Database
			
 
				-from pymongo.errors import DuplicateKeyError, BulkWriteError
			
 
				-
			
 
				-import setting as setting
			
 
				-from utils.log import logger as log
			
 
				-
			
 
				-
			
 
				-class MongoDB:
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        ip=None,
			
 
				-        port=None,
			
 
				-        db=None,
			
 
				-        user_name=None,
			
 
				-        user_pass=None,
			
 
				-        url=None,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        if url:
			
 
				-            self.client = MongoClient(url, **kwargs)
			
 
				-        else:
			
 
				-            if not ip:
			
 
				-                ip = setting.MONGO_IP
			
 
				-            if not port:
			
 
				-                port = setting.MONGO_PORT
			
 
				-            if not db:
			
 
				-                db = setting.MONGO_DB
			
 
				-            if not user_name:
			
 
				-                user_name = setting.MONGO_USER_NAME
			
 
				-            if not user_pass:
			
 
				-                user_pass = setting.MONGO_USER_PASS
			
 
				-            self.client = MongoClient(
			
 
				-                host=ip, port=port, username=user_name, password=user_pass
			
 
				-            )
			
 
				-
			
 
				-        self.db = self.get_database(db)
			
 
				-
			
 
				-        # 缓存索引信息
			
 
				-        self.__index__cached = {}
			
 
				-
			
 
				-    @classmethod
			
 
				-    def from_url(cls, url, **kwargs):
			
 
				-        """
			
 
				-        Args:
			
 
				-            url: mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]]
			
 
				-                 参考：http://mongodb.github.io/mongo-java-driver/3.4/javadoc/com/mongodb/MongoClientURI.html
			
 
				-            **kwargs:
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        url_parsed = parse.urlparse(url)
			
 
				-
			
 
				-        db_type = url_parsed.scheme.strip()
			
 
				-        if db_type != "mongodb":
			
 
				-            raise Exception(
			
 
				-                "url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
			
 
				-                    url
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        return cls(url=url, **kwargs)
			
 
				-
			
 
				-    def get_database(self, database, **kwargs) -> Database:
			
 
				-        """
			
 
				-        获取数据库对象
			
 
				-        @param database: 数据库名
			
 
				-        @return:
			
 
				-        """
			
 
				-        return self.client.get_database(database, **kwargs)
			
 
				-
			
 
				-    def get_collection(self, coll_name, **kwargs) -> Collection:
			
 
				-        """
			
 
				-        根据集合名获取集合对象
			
 
				-        @param coll_name: 集合名
			
 
				-        @return:
			
 
				-        """
			
 
				-        return self.db.get_collection(coll_name, **kwargs)
			
 
				-
			
 
				-    def find(
			
 
				-        self, coll_name: str, condition: Optional[Dict] = None, limit: int = 0, **kwargs
			
 
				-    ) -> List[Dict]:
			
 
				-        """
			
 
				-        @summary:
			
 
				-        无数据： 返回[]
			
 
				-        有数据： [{'_id': 'xx', ...}, ...]
			
 
				-        ---------
			
 
				-        @param coll_name: 集合名(表名)
			
 
				-        @param condition: 查询条件
			
 
				-        @param limit: 结果数量
			
 
				-        @param kwargs:
			
 
				-            更多参数 https://docs.mongodb.com/manual/reference/command/find/#command-fields
			
 
				-
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-        condition = {} if condition is None else condition
			
 
				-        command = {"find": coll_name, "filter": condition, "limit": limit}
			
 
				-        command.update(kwargs)
			
 
				-        result = self.run_command(command)
			
 
				-        cursor = result["cursor"]
			
 
				-        cursor_id = cursor["id"]
			
 
				-        dataset = cursor["firstBatch"]
			
 
				-        while True:
			
 
				-            if cursor_id == 0:
			
 
				-                break
			
 
				-            result = self.run_command(
			
 
				-                {
			
 
				-                    "getMore": cursor_id,
			
 
				-                    "collection": coll_name,
			
 
				-                    "batchSize": kwargs.get("batchSize", 100),
			
 
				-                }
			
 
				-            )
			
 
				-            cursor = result["cursor"]
			
 
				-            cursor_id = cursor["id"]
			
 
				-            dataset.extend(cursor["nextBatch"])
			
 
				-        return dataset
			
 
				-
			
 
				-    def add(
			
 
				-        self,
			
 
				-        coll_name,
			
 
				-        data: Dict,
			
 
				-        replace=False,
			
 
				-        update_columns=(),
			
 
				-        update_columns_value=(),
			
 
				-        insert_ignore=False,
			
 
				-    ):
			
 
				-        """
			
 
				-        添加单条数据
			
 
				-        Args:
			
 
				-            coll_name: 集合名
			
 
				-            data: 单条数据
			
 
				-            replace: 唯一索引冲突时直接覆盖旧数据，默认为False
			
 
				-            update_columns: 更新指定的列（如果数据唯一索引冲突，则更新指定字段，如 update_columns = ["name", "title"]
			
 
				-            update_columns_value: 指定更新的字段对应的值, 不指定则用数据本身的值更新
			
 
				-            insert_ignore: 索引冲突是否忽略 默认False
			
 
				-
			
 
				-        Returns: 插入成功的行数
			
 
				-
			
 
				-        """
			
 
				-        affect_count = 1
			
 
				-        collection = self.get_collection(coll_name)
			
 
				-        try:
			
 
				-            collection.insert_one(data)
			
 
				-        except DuplicateKeyError as e:
			
 
				-            # 存在则更新
			
 
				-            if update_columns:
			
 
				-                if not isinstance(update_columns, (tuple, list)):
			
 
				-                    update_columns = [update_columns]
			
 
				-
			
 
				-                condition = self.__get_update_condition(
			
 
				-                    coll_name, data, e.details.get("errmsg")
			
 
				-                )
			
 
				-
			
 
				-                # 更新指定的列
			
 
				-                if update_columns_value:
			
 
				-                    # 使用指定的值更新
			
 
				-                    doc = {
			
 
				-                        key: value
			
 
				-                        for key, value in zip(update_columns, update_columns_value)
			
 
				-                    }
			
 
				-                else:
			
 
				-                    # 使用数据本身的值更新
			
 
				-                    doc = {key: data[key] for key in update_columns}
			
 
				-
			
 
				-                collection.update_one(condition, {"$set": doc})
			
 
				-
			
 
				-            # 覆盖更新
			
 
				-            elif replace:
			
 
				-                condition = self.__get_update_condition(
			
 
				-                    coll_name, data, e.details.get("errmsg")
			
 
				-                )
			
 
				-                # 替换已存在的数据
			
 
				-                collection.replace_one(condition, data)
			
 
				-
			
 
				-            elif not insert_ignore:
			
 
				-                raise e
			
 
				-
			
 
				-        return affect_count
			
 
				-
			
 
				-    def add_batch(
			
 
				-        self,
			
 
				-        coll_name: str,
			
 
				-        datas: List[Dict],
			
 
				-        replace=False,
			
 
				-        update_columns=(),
			
 
				-        update_columns_value=(),
			
 
				-        condition_fields: dict = None,
			
 
				-    ):
			
 
				-        """
			
 
				-        批量添加数据
			
 
				-        Args:
			
 
				-            coll_name: 集合名
			
 
				-            datas: 数据 [{'_id': 'xx'}, ... ]
			
 
				-            replace:  唯一索引冲突时直接覆盖旧数据，默认为False
			
 
				-            update_columns: 更新指定的列（如果数据的唯一索引存在，则更新指定字段，如 update_columns = ["name", "title"]
			
 
				-            update_columns_value: 指定更新的字段对应的值, 不指定则用数据本身的值更新
			
 
				-            condition_fields: 用于条件查找的字段，不指定则用索引冲突中的字段查找
			
 
				-
			
 
				-        Returns: 添加行数，不包含更新
			
 
				-
			
 
				-        """
			
 
				-        add_count = 0
			
 
				-
			
 
				-        if not datas:
			
 
				-            return add_count
			
 
				-
			
 
				-        collection = self.get_collection(coll_name)
			
 
				-        if not isinstance(update_columns, (tuple, list)):
			
 
				-            update_columns = [update_columns]
			
 
				-
			
 
				-        try:
			
 
				-            add_count = len(datas)
			
 
				-            collection.insert_many(datas, ordered=False)
			
 
				-        except BulkWriteError as e:
			
 
				-            write_errors = e.details.get("writeErrors")
			
 
				-            for error in write_errors:
			
 
				-                if error.get("code") == 11000:
			
 
				-                    # 数据重复
			
 
				-                    # 获取重复的数据
			
 
				-                    data = error.get("op")
			
 
				-
			
 
				-                    def get_condition():
			
 
				-                        # 获取更新条件
			
 
				-                        if condition_fields:
			
 
				-                            condition = {
			
 
				-                                condition_field: data[condition_field]
			
 
				-                                for condition_field in condition_fields
			
 
				-                            }
			
 
				-                        else:
			
 
				-                            # 根据重复的值获取更新条件
			
 
				-                            condition = self.__get_update_condition(
			
 
				-                                coll_name, data, error.get("errmsg")
			
 
				-                            )
			
 
				-
			
 
				-                        return condition
			
 
				-
			
 
				-                    if update_columns:
			
 
				-                        # 更新指定的列
			
 
				-                        if update_columns_value:
			
 
				-                            # 使用指定的值更新
			
 
				-                            doc = {
			
 
				-                                key: value
			
 
				-                                for key, value in zip(
			
 
				-                                    update_columns, update_columns_value
			
 
				-                                )
			
 
				-                            }
			
 
				-                        else:
			
 
				-                            # 使用数据本身的值更新
			
 
				-                            doc = {key: data.get(key) for key in update_columns}
			
 
				-
			
 
				-                        collection.update_one(get_condition(), {"$set": doc})
			
 
				-                        add_count -= 1
			
 
				-
			
 
				-                    elif replace:
			
 
				-                        # 覆盖更新
			
 
				-                        collection.replace_one(get_condition(), data)
			
 
				-                        add_count -= 1
			
 
				-
			
 
				-                    else:
			
 
				-                        # log.error(error)
			
 
				-                        add_count -= 1
			
 
				-
			
 
				-        return add_count
			
 
				-
			
 
				-    def count(self, coll_name, condition: Optional[Dict], limit=0, **kwargs):
			
 
				-        """
			
 
				-        计数
			
 
				-        @param coll_name: 集合名
			
 
				-        @param condition: 查询条件
			
 
				-        @param limit: 限制数量
			
 
				-        @param kwargs:
			
 
				-        ----
			
 
				-        command = {
			
 
				-          count: <collection or view>,
			
 
				-          query: <document>,
			
 
				-          limit: <integer>,
			
 
				-          skip: <integer>,
			
 
				-          hint: <hint>,
			
 
				-          readConcern: <document>,
			
 
				-          collation: <document>,
			
 
				-          comment: <any>
			
 
				-        }
			
 
				-        https://docs.mongodb.com/manual/reference/command/count/#mongodb-dbcommand-dbcmd.count
			
 
				-        @return: 数据数量
			
 
				-        """
			
 
				-        command = {"count": coll_name, "query": condition, "limit": limit, **kwargs}
			
 
				-        result = self.run_command(command)
			
 
				-        return result["n"]
			
 
				-
			
 
				-    def update(self, coll_name, data: Dict, condition: Dict, upsert: bool = False):
			
 
				-        """
			
 
				-        更新
			
 
				-        Args:
			
 
				-            coll_name: 集合名
			
 
				-            data: 单条数据 {"xxx":"xxx"}
			
 
				-            condition: 更新条件 {"_id": "xxxx"}
			
 
				-            upsert: 数据不存在则插入,默认为 False
			
 
				-
			
 
				-        Returns: True / False
			
 
				-        """
			
 
				-        try:
			
 
				-            collection = self.get_collection(coll_name)
			
 
				-            collection.update_one(condition, {"$set": data}, upsert=upsert)
			
 
				-        except Exception as e:
			
 
				-            log.error(
			
 
				-                """
			
 
				-                error:{}
			
 
				-                condition: {}
			
 
				-            """.format(
			
 
				-                    e, condition
			
 
				-                )
			
 
				-            )
			
 
				-            return False
			
 
				-        else:
			
 
				-            return True
			
 
				-
			
 
				-    def delete(self, coll_name, condition: Dict) -> bool:
			
 
				-        """
			
 
				-        删除
			
 
				-        Args:
			
 
				-            coll_name: 集合名
			
 
				-            condition: 查找条件
			
 
				-        Returns: True / False
			
 
				-
			
 
				-        """
			
 
				-        try:
			
 
				-            collection = self.get_collection(coll_name)
			
 
				-            collection.delete_one(condition)
			
 
				-        except Exception as e:
			
 
				-            log.error(
			
 
				-                """
			
 
				-                error:{}
			
 
				-                condition: {}
			
 
				-            """.format(
			
 
				-                    e, condition
			
 
				-                )
			
 
				-            )
			
 
				-            return False
			
 
				-        else:
			
 
				-            return True
			
 
				-
			
 
				-    def run_command(self, command: Dict):
			
 
				-        """
			
 
				-        运行指令
			
 
				-        参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
			
 
				-        @param command:
			
 
				-        @return:
			
 
				-        """
			
 
				-        return self.db.command(command)
			
 
				-
			
 
				-    def create_index(self, coll_name, keys, unique=True):
			
 
				-        collection = self.get_collection(coll_name)
			
 
				-        _keys = [(key, pymongo.ASCENDING) for key in keys]
			
 
				-        collection.create_index(_keys, unique=unique)
			
 
				-
			
 
				-    def get_index(self, coll_name):
			
 
				-        return self.get_collection(coll_name).index_information()
			
 
				-
			
 
				-    def drop_collection(self, coll_name):
			
 
				-        return self.db.drop_collection(coll_name)
			
 
				-
			
 
				-    def get_index_key(self, coll_name, index_name):
			
 
				-        """
			
 
				-        获取参与索引的key
			
 
				-        Args:
			
 
				-            index_name: 索引名
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        cache_key = f"{coll_name}:{index_name}"
			
 
				-
			
 
				-        if cache_key in self.__index__cached:
			
 
				-            return self.__index__cached.get(cache_key)
			
 
				-
			
 
				-        index = self.get_index(coll_name)
			
 
				-        index_detail = index.get(index_name)
			
 
				-        if not index_detail:
			
 
				-            errmsg = f"not found index {index_name} in collection {coll_name}"
			
 
				-            raise Exception(errmsg)
			
 
				-
			
 
				-        index_keys = [val[0] for val in index_detail.get("key")]
			
 
				-        self.__index__cached[cache_key] = index_keys
			
 
				-        return index_keys
			
 
				-
			
 
				-    def __get_update_condition(
			
 
				-        self, coll_name: str, data: dict, duplicate_errmsg: str
			
 
				-    ) -> dict:
			
 
				-        """
			
 
				-        根据索引冲突的报错信息 获取更新条件
			
 
				-        Args:
			
 
				-            duplicate_errmsg: E11000 duplicate key error collection: feapder.test index: a_1_b_1 dup key: { : 1, : "你好" }
			
 
				-            data: {"a": 1, "b": "你好", "c": "嘻嘻"}
			
 
				-
			
 
				-        Returns: {"a": 1, "b": "你好"}
			
 
				-
			
 
				-        """
			
 
				-        index_name = re.search(r"index: (\w+)", duplicate_errmsg).group(1)
			
 
				-        index_keys = self.get_index_key(coll_name, index_name)
			
 
				-
			
 
				-        condition = {key: data.get(key) for key in index_keys}
			
 
				-        return condition
			
 
				-
			
 
				-    def __getattr__(self, name):
			
 
				-        return getattr(self.db, name)
			
--- a/A数据处理/site_monitor/db/redisdb.py
+++ b/A数据处理/site_monitor/db/redisdb.py
@@ -1,924 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2016-11-16 16:25
			
 
				----------
			
 
				-@summary: 操作redis数据库
			
 
				----------
			
 
				-@author: Boris
			
 
				-"""
			
 
				-
			
 
				-import time
			
 
				-
			
 
				-import redis
			
 
				-from redis._compat import unicode, long, basestring
			
 
				-from redis.connection import Encoder as _Encoder
			
 
				-from redis.exceptions import ConnectionError, TimeoutError
			
 
				-from redis.exceptions import DataError
			
 
				-from redis.sentinel import Sentinel
			
 
				-from rediscluster import RedisCluster
			
 
				-
			
 
				-import setting as setting
			
 
				-from utils.log import logger as log
			
 
				-
			
 
				-
			
 
				-class Encoder(_Encoder):
			
 
				-    def encode(self, value):
			
 
				-        "Return a bytestring or bytes-like representation of the value"
			
 
				-        if isinstance(value, (bytes, memoryview)):
			
 
				-            return value
			
 
				-        # elif isinstance(value, bool):
			
 
				-        #     # special case bool since it is a subclass of int
			
 
				-        #     raise DataError(
			
 
				-        #         "Invalid input of type: 'bool'. Convert to a "
			
 
				-        #         "bytes, string, int or float first."
			
 
				-        #     )
			
 
				-        elif isinstance(value, float):
			
 
				-            value = repr(value).encode()
			
 
				-        elif isinstance(value, (int, long)):
			
 
				-            # python 2 repr() on longs is '123L', so use str() instead
			
 
				-            value = str(value).encode()
			
 
				-        elif isinstance(value, (list, dict, tuple)):
			
 
				-            value = unicode(value)
			
 
				-        elif not isinstance(value, basestring):
			
 
				-            # a value we don't know how to deal with. throw an error
			
 
				-            typename = type(value).__name__
			
 
				-            raise DataError(
			
 
				-                "Invalid input of type: '%s'. Convert to a "
			
 
				-                "bytes, string, int or float first." % typename
			
 
				-            )
			
 
				-        if isinstance(value, unicode):
			
 
				-            value = value.encode(self.encoding, self.encoding_errors)
			
 
				-        return value
			
 
				-
			
 
				-
			
 
				-redis.connection.Encoder = Encoder
			
 
				-
			
 
				-
			
 
				-class RedisDB:
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        ip_ports=None,
			
 
				-        db=None,
			
 
				-        user_pass=None,
			
 
				-        url=None,
			
 
				-        decode_responses=True,
			
 
				-        service_name=None,
			
 
				-        max_connections=1000,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        """
			
 
				-        redis的封装
			
 
				-        Args:
			
 
				-            ip_ports: ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"]
			
 
				-            db:
			
 
				-            user_pass:
			
 
				-            url:
			
 
				-            decode_responses:
			
 
				-            service_name: 适用于redis哨兵模式
			
 
				-            max_connections: 同一个redis对象使用的并发数（连接池的最大连接数），超过这个数量会抛出redis.ConnectionError
			
 
				-        """
			
 
				-
			
 
				-        # 可能会改setting中的值，所以此处不能直接赋值为默认值，需要后加载赋值
			
 
				-        if ip_ports is None:
			
 
				-            ip_ports = setting.REDISDB_IP_PORTS
			
 
				-        if db is None:
			
 
				-            db = setting.REDISDB_DB
			
 
				-        if user_pass is None:
			
 
				-            user_pass = setting.REDISDB_USER_PASS
			
 
				-        if service_name is None:
			
 
				-            service_name = setting.REDISDB_SERVICE_NAME
			
 
				-
			
 
				-        self._is_redis_cluster = False
			
 
				-
			
 
				-        self.__redis = None
			
 
				-        self._url = url
			
 
				-        self._ip_ports = ip_ports
			
 
				-        self._db = db
			
 
				-        self._user_pass = user_pass
			
 
				-        self._decode_responses = decode_responses
			
 
				-        self._service_name = service_name
			
 
				-        self._max_connections = max_connections
			
 
				-        self._kwargs = kwargs
			
 
				-        self.get_connect()
			
 
				-
			
 
				-    def __repr__(self):
			
 
				-        if self._url:
			
 
				-            return "<Redisdb url:{}>".format(self._url)
			
 
				-
			
 
				-        return "<Redisdb ip_ports: {} db:{} user_pass:{}>".format(
			
 
				-            self._ip_ports, self._db, self._user_pass
			
 
				-        )
			
 
				-
			
 
				-    @property
			
 
				-    def _redis(self):
			
 
				-        try:
			
 
				-            if not self.__redis.ping():
			
 
				-                raise ConnectionError("unable to connect to redis")
			
 
				-        except:
			
 
				-            self._reconnect()
			
 
				-
			
 
				-        return self.__redis
			
 
				-
			
 
				-    @_redis.setter
			
 
				-    def _redis(self, val):
			
 
				-        self.__redis = val
			
 
				-
			
 
				-    def get_connect(self):
			
 
				-        # 获取数据库连接
			
 
				-        try:
			
 
				-            if not self._url:
			
 
				-                if not self._ip_ports:
			
 
				-                    raise ConnectionError("未设置 redis 连接信息")
			
 
				-
			
 
				-                ip_ports = (
			
 
				-                    self._ip_ports
			
 
				-                    if isinstance(self._ip_ports, list)
			
 
				-                    else self._ip_ports.split(",")
			
 
				-                )
			
 
				-                if len(ip_ports) > 1:
			
 
				-                    startup_nodes = []
			
 
				-                    for ip_port in ip_ports:
			
 
				-                        ip, port = ip_port.split(":")
			
 
				-                        startup_nodes.append({"host": ip, "port": port})
			
 
				-
			
 
				-                    if self._service_name:
			
 
				-                        # log.debug("使用redis哨兵模式")
			
 
				-                        hosts = [(node["host"], node["port"]) for node in startup_nodes]
			
 
				-                        sentinel = Sentinel(hosts, socket_timeout=3, **self._kwargs)
			
 
				-                        self._redis = sentinel.master_for(
			
 
				-                            self._service_name,
			
 
				-                            password=self._user_pass,
			
 
				-                            db=self._db,
			
 
				-                            redis_class=redis.StrictRedis,
			
 
				-                            decode_responses=self._decode_responses,
			
 
				-                            max_connections=self._max_connections,
			
 
				-                            **self._kwargs,
			
 
				-                        )
			
 
				-
			
 
				-                    else:
			
 
				-                        # log.debug("使用redis集群模式")
			
 
				-                        self._redis = RedisCluster(
			
 
				-                            startup_nodes=startup_nodes,
			
 
				-                            decode_responses=self._decode_responses,
			
 
				-                            password=self._user_pass,
			
 
				-                            max_connections=self._max_connections,
			
 
				-                            **self._kwargs,
			
 
				-                        )
			
 
				-
			
 
				-                    self._is_redis_cluster = True
			
 
				-                else:
			
 
				-                    ip, port = ip_ports[0].split(":")
			
 
				-                    self._redis = redis.StrictRedis(
			
 
				-                        host=ip,
			
 
				-                        port=port,
			
 
				-                        db=self._db,
			
 
				-                        password=self._user_pass,
			
 
				-                        decode_responses=self._decode_responses,
			
 
				-                        max_connections=self._max_connections,
			
 
				-                        **self._kwargs,
			
 
				-                    )
			
 
				-                    self._is_redis_cluster = False
			
 
				-            else:
			
 
				-                self._redis = redis.StrictRedis.from_url(
			
 
				-                    self._url, decode_responses=self._decode_responses
			
 
				-                )
			
 
				-                self._is_redis_cluster = False
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            raise e
			
 
				-
			
 
				-        # 不要写成self._redis.ping() 否则循环调用了
			
 
				-        return self.__redis.ping()
			
 
				-
			
 
				-    @classmethod
			
 
				-    def from_url(cls, url):
			
 
				-        """
			
 
				-
			
 
				-        Args:
			
 
				-            url: redis://[[username]:[password]]@[host]:[port]/[db]
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        return cls(url=url)
			
 
				-
			
 
				-    def sadd(self, table, values):
			
 
				-        """
			
 
				-        @summary: 使用无序set集合存储数据， 去重
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param values: 值； 支持list 或 单个值
			
 
				-        ---------
			
 
				-        @result: 若库中存在 返回0，否则入库，返回1。 批量添加返回None
			
 
				-        """
			
 
				-
			
 
				-        if isinstance(values, list):
			
 
				-            pipe = self._redis.pipeline()
			
 
				-
			
 
				-            if not self._is_redis_cluster:
			
 
				-                pipe.multi()
			
 
				-            for value in values:
			
 
				-                pipe.sadd(table, value)
			
 
				-            pipe.execute()
			
 
				-
			
 
				-        else:
			
 
				-            return self._redis.sadd(table, values)
			
 
				-
			
 
				-    def sget(self, table, count=1, is_pop=True):
			
 
				-        """
			
 
				-        返回 list 如 ['1'] 或 []
			
 
				-        @param table:
			
 
				-        @param count:
			
 
				-        @param is_pop:
			
 
				-        @return:
			
 
				-        """
			
 
				-
			
 
				-        datas = []
			
 
				-        if is_pop:
			
 
				-            count = count if count <= self.sget_count(table) else self.sget_count(table)
			
 
				-            if count:
			
 
				-                if count > 1:
			
 
				-                    pipe = self._redis.pipeline()
			
 
				-
			
 
				-                    if not self._is_redis_cluster:
			
 
				-                        pipe.multi()
			
 
				-                    while count:
			
 
				-                        pipe.spop(table)
			
 
				-                        count -= 1
			
 
				-                    datas = pipe.execute()
			
 
				-
			
 
				-                else:
			
 
				-                    datas.append(self._redis.spop(table))
			
 
				-
			
 
				-        else:
			
 
				-            datas = self._redis.srandmember(table, count)
			
 
				-
			
 
				-        return datas
			
 
				-
			
 
				-    def srem(self, table, values):
			
 
				-        """
			
 
				-        @summary: 移除集合中的指定元素
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param values: 一个或者列表
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        if isinstance(values, list):
			
 
				-            pipe = self._redis.pipeline()
			
 
				-
			
 
				-            if not self._is_redis_cluster:
			
 
				-                pipe.multi()
			
 
				-            for value in values:
			
 
				-                pipe.srem(table, value)
			
 
				-            pipe.execute()
			
 
				-        else:
			
 
				-            self._redis.srem(table, values)
			
 
				-
			
 
				-    def sget_count(self, table):
			
 
				-        return self._redis.scard(table)
			
 
				-
			
 
				-    def sdelete(self, table):
			
 
				-        """
			
 
				-        @summary: 删除set集合的大键（数据量大的表）
			
 
				-        删除大set键，使用sscan命令，每次扫描集合中500个元素，再用srem命令每次删除一个键
			
 
				-        若直接用delete命令，会导致Redis阻塞，出现故障切换和应用程序崩溃的故障。
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        # 当 SCAN 命令的游标参数被设置为 0 时， 服务器将开始一次新的迭代， 而当服务器向用户返回值为 0 的游标时， 表示迭代已结束
			
 
				-        cursor = "0"
			
 
				-        while cursor != 0:
			
 
				-            cursor, data = self._redis.sscan(table, cursor=cursor, count=500)
			
 
				-            for item in data:
			
 
				-                # pipe.srem(table, item)
			
 
				-                self._redis.srem(table, item)
			
 
				-
			
 
				-            # pipe.execute()
			
 
				-
			
 
				-    def sismember(self, table, key):
			
 
				-        "Return a boolean indicating if ``value`` is a member of set ``name``"
			
 
				-        return self._redis.sismember(table, key)
			
 
				-
			
 
				-    def zadd(self, table, values, prioritys=0):
			
 
				-        """
			
 
				-        @summary: 使用有序set集合存储数据， 去重(值存在更新)
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param values: 值； 支持list 或 单个值
			
 
				-        @param prioritys: 优先级； double类型，支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值，默认value的优先级为0
			
 
				-        ---------
			
 
				-        @result:若库中存在 返回0，否则入库，返回1。 批量添加返回 [0, 1 ...]
			
 
				-        """
			
 
				-        if isinstance(values, list):
			
 
				-            if not isinstance(prioritys, list):
			
 
				-                prioritys = [prioritys] * len(values)
			
 
				-            else:
			
 
				-                assert len(values) == len(prioritys), "values值要与prioritys值一一对应"
			
 
				-
			
 
				-            pipe = self._redis.pipeline()
			
 
				-
			
 
				-            if not self._is_redis_cluster:
			
 
				-                pipe.multi()
			
 
				-            for value, priority in zip(values, prioritys):
			
 
				-                pipe.execute_command(
			
 
				-                    "ZADD", table, priority, value
			
 
				-                )  # 为了兼容2.x与3.x版本的redis
			
 
				-            return pipe.execute()
			
 
				-
			
 
				-        else:
			
 
				-            return self._redis.execute_command(
			
 
				-                "ZADD", table, prioritys, values
			
 
				-            )  # 为了兼容2.x与3.x版本的redis
			
 
				-
			
 
				-    def zget(self, table, count=1, is_pop=True):
			
 
				-        """
			
 
				-        @summary: 从有序set集合中获取数据 优先返回分数小的（优先级高的）
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param count: 数量 -1 返回全部数据
			
 
				-        @param is_pop:获取数据后，是否在原set集合中删除，默认是
			
 
				-        ---------
			
 
				-        @result: 列表
			
 
				-        """
			
 
				-
			
 
				-        start_pos = 0  # 包含
			
 
				-        end_pos = count - 1 if count > 0 else count
			
 
				-
			
 
				-        pipe = self._redis.pipeline()
			
 
				-
			
 
				-        if not self._is_redis_cluster:
			
 
				-            pipe.multi()  # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html
			
 
				-        pipe.zrange(table, start_pos, end_pos)  # 取值
			
 
				-        if is_pop:
			
 
				-            pipe.zremrangebyrank(table, start_pos, end_pos)  # 删除
			
 
				-        results, *count = pipe.execute()
			
 
				-        return results
			
 
				-
			
 
				-    def zremrangebyscore(self, table, priority_min, priority_max):
			
 
				-        """
			
 
				-        根据分数移除成员 闭区间
			
 
				-        @param table:
			
 
				-        @param priority_min:
			
 
				-        @param priority_max:
			
 
				-        @return: 被移除的成员个数
			
 
				-        """
			
 
				-        return self._redis.zremrangebyscore(table, priority_min, priority_max)
			
 
				-
			
 
				-    def zrangebyscore(self, table, priority_min, priority_max, count=None, is_pop=True):
			
 
				-        """
			
 
				-        @summary: 返回指定分数区间的数据 闭区间
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param priority_min: 优先级越小越优先
			
 
				-        @param priority_max:
			
 
				-        @param count: 获取的数量，为空则表示分数区间内的全部数据
			
 
				-        @param is_pop: 是否删除
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        # 使用lua脚本， 保证操作的原子性
			
 
				-        lua = """
			
 
				-            -- local key = KEYS[1]
			
 
				-            local min_score = ARGV[2]
			
 
				-            local max_score = ARGV[3]
			
 
				-            local is_pop = ARGV[4]
			
 
				-            local count = ARGV[5]
			
 
				-
			
 
				-            -- 取值
			
 
				-            local datas = nil
			
 
				-            if count then
			
 
				-                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'limit', 0, count)
			
 
				-            else
			
 
				-                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score)
			
 
				-            end
			
 
				-
			
 
				-            -- 删除redis中刚取到的值
			
 
				-            if (is_pop=='True' or is_pop=='1') then
			
 
				-                for i=1, #datas do
			
 
				-                    redis.call('zrem', KEYS[1], datas[i])
			
 
				-                end
			
 
				-            end
			
 
				-
			
 
				-
			
 
				-            return datas
			
 
				-
			
 
				-        """
			
 
				-        cmd = self._redis.register_script(lua)
			
 
				-        if count:
			
 
				-            res = cmd(
			
 
				-                keys=[table], args=[table, priority_min, priority_max, is_pop, count]
			
 
				-            )
			
 
				-        else:
			
 
				-            res = cmd(keys=[table], args=[table, priority_min, priority_max, is_pop])
			
 
				-
			
 
				-        return res
			
 
				-
			
 
				-    def zrangebyscore_increase_score(
			
 
				-        self, table, priority_min, priority_max, increase_score, count=None
			
 
				-    ):
			
 
				-        """
			
 
				-        @summary: 返回指定分数区间的数据 闭区间， 同时修改分数
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param priority_min: 最小分数
			
 
				-        @param priority_max: 最大分数
			
 
				-        @param increase_score: 分数值增量 正数则在原有的分数上叠加，负数则相减
			
 
				-        @param count: 获取的数量，为空则表示分数区间内的全部数据
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        # 使用lua脚本， 保证操作的原子性
			
 
				-        lua = """
			
 
				-            -- local key = KEYS[1]
			
 
				-            local min_score = ARGV[1]
			
 
				-            local max_score = ARGV[2]
			
 
				-            local increase_score = ARGV[3]
			
 
				-            local count = ARGV[4]
			
 
				-
			
 
				-            -- 取值
			
 
				-            local datas = nil
			
 
				-            if count then
			
 
				-                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'limit', 0, count)
			
 
				-            else
			
 
				-                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score)
			
 
				-            end
			
 
				-
			
 
				-            --修改优先级
			
 
				-            for i=1, #datas do
			
 
				-                redis.call('zincrby', KEYS[1], increase_score, datas[i])
			
 
				-            end
			
 
				-
			
 
				-            return datas
			
 
				-
			
 
				-        """
			
 
				-        cmd = self._redis.register_script(lua)
			
 
				-        if count:
			
 
				-            res = cmd(
			
 
				-                keys=[table], args=[priority_min, priority_max, increase_score, count]
			
 
				-            )
			
 
				-        else:
			
 
				-            res = cmd(keys=[table], args=[priority_min, priority_max, increase_score])
			
 
				-
			
 
				-        return res
			
 
				-
			
 
				-    def zrangebyscore_set_score(
			
 
				-        self, table, priority_min, priority_max, score, count=None
			
 
				-    ):
			
 
				-        """
			
 
				-        @summary: 返回指定分数区间的数据 闭区间， 同时修改分数
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param priority_min: 最小分数
			
 
				-        @param priority_max: 最大分数
			
 
				-        @param score: 分数值
			
 
				-        @param count: 获取的数量，为空则表示分数区间内的全部数据
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        # 使用lua脚本， 保证操作的原子性
			
 
				-        lua = """
			
 
				-            -- local key = KEYS[1]
			
 
				-            local min_score = ARGV[1]
			
 
				-            local max_score = ARGV[2]
			
 
				-            local set_score = ARGV[3]
			
 
				-            local count = ARGV[4]
			
 
				-
			
 
				-            -- 取值
			
 
				-            local datas = nil
			
 
				-            if count then
			
 
				-                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'withscores','limit', 0, count)
			
 
				-            else
			
 
				-                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'withscores')
			
 
				-            end
			
 
				-
			
 
				-            local real_datas = {} -- 数据
			
 
				-            --修改优先级
			
 
				-            for i=1, #datas, 2 do
			
 
				-               local data = datas[i]
			
 
				-               local score = datas[i+1]
			
 
				-
			
 
				-               table.insert(real_datas, data) -- 添加数据
			
 
				-
			
 
				-               redis.call('zincrby', KEYS[1], set_score - score, datas[i])
			
 
				-            end
			
 
				-
			
 
				-            return real_datas
			
 
				-
			
 
				-        """
			
 
				-        cmd = self._redis.register_script(lua)
			
 
				-        if count:
			
 
				-            res = cmd(keys=[table], args=[priority_min, priority_max, score, count])
			
 
				-        else:
			
 
				-            res = cmd(keys=[table], args=[priority_min, priority_max, score])
			
 
				-
			
 
				-        return res
			
 
				-
			
 
				-    def zincrby(self, table, amount, value):
			
 
				-        return self._redis.zincrby(table, amount, value)
			
 
				-
			
 
				-    def zget_count(self, table, priority_min=None, priority_max=None):
			
 
				-        """
			
 
				-        @summary: 获取表数据的数量
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param priority_min:优先级范围 最小值（包含）
			
 
				-        @param priority_max:优先级范围 最大值（包含）
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        if priority_min != None and priority_max != None:
			
 
				-            return self._redis.zcount(table, priority_min, priority_max)
			
 
				-        else:
			
 
				-            return self._redis.zcard(table)
			
 
				-
			
 
				-    def zrem(self, table, values):
			
 
				-        """
			
 
				-        @summary: 移除集合中的指定元素
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param values: 一个或者列表
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        if isinstance(values, list):
			
 
				-            self._redis.zrem(table, *values)
			
 
				-        else:
			
 
				-            self._redis.zrem(table, values)
			
 
				-
			
 
				-    def zexists(self, table, values):
			
 
				-        """
			
 
				-        利用zscore判断某元素是否存在
			
 
				-        @param values:
			
 
				-        @return:
			
 
				-        """
			
 
				-
			
 
				-        is_exists = []
			
 
				-
			
 
				-        if isinstance(values, list):
			
 
				-            pipe = self._redis.pipeline()
			
 
				-            pipe.multi()
			
 
				-            for value in values:
			
 
				-                pipe.zscore(table, value)
			
 
				-            is_exists_temp = pipe.execute()
			
 
				-            for is_exist in is_exists_temp:
			
 
				-                if is_exist != None:
			
 
				-                    is_exists.append(1)
			
 
				-                else:
			
 
				-                    is_exists.append(0)
			
 
				-
			
 
				-        else:
			
 
				-            is_exists = self._redis.zscore(table, values)
			
 
				-            is_exists = 1 if is_exists != None else 0
			
 
				-
			
 
				-        return is_exists
			
 
				-
			
 
				-    def lpush(self, table, values):
			
 
				-
			
 
				-        if isinstance(values, list):
			
 
				-            pipe = self._redis.pipeline()
			
 
				-
			
 
				-            if not self._is_redis_cluster:
			
 
				-                pipe.multi()
			
 
				-            for value in values:
			
 
				-                pipe.rpush(table, value)
			
 
				-            pipe.execute()
			
 
				-
			
 
				-        else:
			
 
				-            return self._redis.rpush(table, values)
			
 
				-
			
 
				-    def lpop(self, table, count=1):
			
 
				-        """
			
 
				-        @summary:
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param count:
			
 
				-        ---------
			
 
				-        @result: count>1时返回列表
			
 
				-        """
			
 
				-
			
 
				-        datas = None
			
 
				-        lcount = self.lget_count(table)
			
 
				-        count = count if count <= lcount else lcount
			
 
				-
			
 
				-        if count:
			
 
				-            if count > 1:
			
 
				-                pipe = self._redis.pipeline()
			
 
				-
			
 
				-                if not self._is_redis_cluster:
			
 
				-                    pipe.multi()
			
 
				-                while count:
			
 
				-                    pipe.lpop(table)
			
 
				-                    count -= 1
			
 
				-                datas = pipe.execute()
			
 
				-
			
 
				-            else:
			
 
				-                datas = self._redis.lpop(table)
			
 
				-
			
 
				-        return datas
			
 
				-
			
 
				-    def rpoplpush(self, from_table, to_table=None):
			
 
				-        """
			
 
				-        将列表 from_table 中的最后一个元素(尾元素)弹出，并返回给客户端。
			
 
				-        将 from_table 弹出的元素插入到列表 to_table ，作为 to_table 列表的的头元素。
			
 
				-        如果 from_table 和 to_table 相同，则列表中的表尾元素被移动到表头，并返回该元素，可以把这种特殊情况视作列表的旋转(rotation)操作
			
 
				-        @param from_table:
			
 
				-        @param to_table:
			
 
				-        @return:
			
 
				-        """
			
 
				-
			
 
				-        if not to_table:
			
 
				-            to_table = from_table
			
 
				-
			
 
				-        return self._redis.rpoplpush(from_table, to_table)
			
 
				-
			
 
				-    def lget_count(self, table):
			
 
				-        return self._redis.llen(table)
			
 
				-
			
 
				-    def lrem(self, table, value, num=0):
			
 
				-        """
			
 
				-        @summary:
			
 
				-        删除value
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param value:
			
 
				-        @param num:
			
 
				-        ---------
			
 
				-        @result: 删除的条数
			
 
				-        """
			
 
				-        return self._redis.lrem(table, num, value)
			
 
				-
			
 
				-    def lrange(self, table, start=0, end=-1):
			
 
				-        return self._redis.lrange(table, start, end)
			
 
				-
			
 
				-    def hset(self, table, key, value):
			
 
				-        """
			
 
				-        @summary:
			
 
				-        如果 key 不存在，一个新的哈希表被创建并进行 HSET 操作。
			
 
				-        如果域 field 已经存在于哈希表中，旧值将被覆盖
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param key:
			
 
				-        @param value:
			
 
				-        ---------
			
 
				-        @result: 1 新插入； 0 覆盖
			
 
				-        """
			
 
				-        return self._redis.hset(table, key, value)
			
 
				-
			
 
				-    def hset_batch(self, table, datas):
			
 
				-        """
			
 
				-        批量插入
			
 
				-        Args:
			
 
				-            datas:
			
 
				-                [[key, value]]
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        pipe = self._redis.pipeline()
			
 
				-
			
 
				-        if not self._is_redis_cluster:
			
 
				-            pipe.multi()
			
 
				-        for key, value in datas:
			
 
				-            pipe.hset(table, key, value)
			
 
				-        return pipe.execute()
			
 
				-
			
 
				-    def hincrby(self, table, key, increment):
			
 
				-        return self._redis.hincrby(table, key, increment)
			
 
				-
			
 
				-    def hget(self, table, key, is_pop=False):
			
 
				-        if not is_pop:
			
 
				-            return self._redis.hget(table, key)
			
 
				-        else:
			
 
				-            lua = """
			
 
				-                -- local key = KEYS[1]
			
 
				-                local field = ARGV[1]
			
 
				-
			
 
				-                -- 取值
			
 
				-                local datas = redis.call('hget', KEYS[1], field)
			
 
				-                -- 删除值
			
 
				-                redis.call('hdel', KEYS[1], field)
			
 
				-
			
 
				-                return datas
			
 
				-
			
 
				-                    """
			
 
				-            cmd = self._redis.register_script(lua)
			
 
				-            res = cmd(keys=[table], args=[key])
			
 
				-
			
 
				-            return res
			
 
				-
			
 
				-    def hgetall(self, table):
			
 
				-        return self._redis.hgetall(table)
			
 
				-
			
 
				-    def hexists(self, table, key):
			
 
				-        return self._redis.hexists(table, key)
			
 
				-
			
 
				-    def hdel(self, table, *keys):
			
 
				-        """
			
 
				-        @summary: 删除对应的key 可传多个
			
 
				-        ---------
			
 
				-        @param table:
			
 
				-        @param *keys:
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-        self._redis.hdel(table, *keys)
			
 
				-
			
 
				-    def hget_count(self, table):
			
 
				-        return self._redis.hlen(table)
			
 
				-
			
 
				-    def hkeys(self, table):
			
 
				-        return self._redis.hkeys(table)
			
 
				-
			
 
				-    def setbit(self, table, offsets, values):
			
 
				-        """
			
 
				-        设置字符串数组某一位的值， 返回之前的值
			
 
				-        @param table:
			
 
				-        @param offsets: 支持列表或单个值
			
 
				-        @param values: 支持列表或单个值
			
 
				-        @return: list / 单个值
			
 
				-        """
			
 
				-        if isinstance(offsets, list):
			
 
				-            if not isinstance(values, list):
			
 
				-                values = [values] * len(offsets)
			
 
				-            else:
			
 
				-                assert len(offsets) == len(values), "offsets值要与values值一一对应"
			
 
				-
			
 
				-            pipe = self._redis.pipeline()
			
 
				-            pipe.multi()
			
 
				-
			
 
				-            for offset, value in zip(offsets, values):
			
 
				-                pipe.setbit(table, offset, value)
			
 
				-
			
 
				-            return pipe.execute()
			
 
				-
			
 
				-        else:
			
 
				-            return self._redis.setbit(table, offsets, values)
			
 
				-
			
 
				-    def getbit(self, table, offsets):
			
 
				-        """
			
 
				-        取字符串数组某一位的值
			
 
				-        @param table:
			
 
				-        @param offsets: 支持列表
			
 
				-        @return: list / 单个值
			
 
				-        """
			
 
				-        if isinstance(offsets, list):
			
 
				-            pipe = self._redis.pipeline()
			
 
				-            pipe.multi()
			
 
				-            for offset in offsets:
			
 
				-                pipe.getbit(table, offset)
			
 
				-
			
 
				-            return pipe.execute()
			
 
				-
			
 
				-        else:
			
 
				-            return self._redis.getbit(table, offsets)
			
 
				-
			
 
				-    def bitcount(self, table):
			
 
				-        return self._redis.bitcount(table)
			
 
				-
			
 
				-    def strset(self, table, value, **kwargs):
			
 
				-        return self._redis.set(table, value, **kwargs)
			
 
				-
			
 
				-    def str_incrby(self, table, value):
			
 
				-        return self._redis.incrby(table, value)
			
 
				-
			
 
				-    def strget(self, table):
			
 
				-        return self._redis.get(table)
			
 
				-
			
 
				-    def strlen(self, table):
			
 
				-        return self._redis.strlen(table)
			
 
				-
			
 
				-    def getkeys(self, regex):
			
 
				-        return self._redis.keys(regex)
			
 
				-
			
 
				-    def exists_key(self, key):
			
 
				-        return self._redis.exists(key)
			
 
				-
			
 
				-    def set_expire(self, key, seconds):
			
 
				-        """
			
 
				-        @summary: 设置过期时间
			
 
				-        ---------
			
 
				-        @param key:
			
 
				-        @param seconds: 秒
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-        self._redis.expire(key, seconds)
			
 
				-
			
 
				-    def get_expire(self, key):
			
 
				-        """
			
 
				-        @summary: 查询过期时间
			
 
				-        ---------
			
 
				-        @param key:
			
 
				-        @param seconds: 秒
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-        return self._redis.ttl(key)
			
 
				-
			
 
				-    def clear(self, table):
			
 
				-        try:
			
 
				-            self._redis.delete(table)
			
 
				-        except Exception as e:
			
 
				-            log.error(e)
			
 
				-
			
 
				-    def get_redis_obj(self):
			
 
				-        return self._redis
			
 
				-
			
 
				-    def _reconnect(self):
			
 
				-        # 检测连接状态, 当数据库重启或设置 timeout 导致断开连接时自动重连
			
 
				-        retry_count = 0
			
 
				-        while True:
			
 
				-            try:
			
 
				-                retry_count += 1
			
 
				-                log.error(f"redis 连接断开, 重新连接 {retry_count}")
			
 
				-                if self.get_connect():
			
 
				-                    log.info(f"redis 连接成功")
			
 
				-                    return True
			
 
				-            except (ConnectionError, TimeoutError) as e:
			
 
				-                log.error(f"连接失败 e: {e}")
			
 
				-
			
 
				-            time.sleep(2)
			
 
				-
			
 
				-    def __getattr__(self, name):
			
 
				-        return getattr(self._redis, name)
			
 
				-
			
 
				-    def current_status(self, show_key=True, filter_key_by_used_memory=10 * 1024 * 1024):
			
 
				-        """
			
 
				-        统计redis当前使用情况
			
 
				-        Args:
			
 
				-            show_key: 是否统计每个key的内存
			
 
				-            filter_key_by_used_memory: 根据内存的使用量过滤key 只显示使用量大于指定内存的key
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        from prettytable import PrettyTable
			
 
				-        from tqdm import tqdm
			
 
				-
			
 
				-        status_msg = ""
			
 
				-
			
 
				-        print("正在查询最大连接数...")
			
 
				-        clients_count = self._redis.execute_command("info clients")
			
 
				-        max_clients_count = self._redis.execute_command("config get maxclients")
			
 
				-        status_msg += ": ".join(max_clients_count) + "\n"
			
 
				-        status_msg += clients_count + "\n"
			
 
				-
			
 
				-        print("正在查询整体内存使用情况...")
			
 
				-        total_status = self._redis.execute_command("info memory")
			
 
				-        status_msg += total_status + "\n"
			
 
				-
			
 
				-        if show_key:
			
 
				-            print("正在查询每个key占用内存情况等信息...")
			
 
				-            table = PrettyTable(
			
 
				-                field_names=[
			
 
				-                    "type",
			
 
				-                    "key",
			
 
				-                    "value_count",
			
 
				-                    "used_memory_human",
			
 
				-                    "used_memory",
			
 
				-                ],
			
 
				-                sortby="used_memory",
			
 
				-                reversesort=True,
			
 
				-                header_style="title",
			
 
				-            )
			
 
				-
			
 
				-            keys = self._redis.execute_command("keys *")
			
 
				-            for key in tqdm(keys):
			
 
				-                key_type = self._redis.execute_command("type {}".format(key))
			
 
				-                if key_type == "set":
			
 
				-                    value_count = self._redis.scard(key)
			
 
				-                elif key_type == "zset":
			
 
				-                    value_count = self._redis.zcard(key)
			
 
				-                elif key_type == "list":
			
 
				-                    value_count = self._redis.llen(key)
			
 
				-                elif key_type == "hash":
			
 
				-                    value_count = self._redis.hlen(key)
			
 
				-                elif key_type == "string":
			
 
				-                    value_count = self._redis.strlen(key)
			
 
				-                elif key_type == "none":
			
 
				-                    continue
			
 
				-                else:
			
 
				-                    raise TypeError("尚不支持 {} 类型的key".format(key_type))
			
 
				-
			
 
				-                used_memory = self._redis.execute_command("memory usage {}".format(key))
			
 
				-                if used_memory >= filter_key_by_used_memory:
			
 
				-                    used_memory_human = (
			
 
				-                        "%0.2fMB" % (used_memory / 1024 / 1024) if used_memory else 0
			
 
				-                    )
			
 
				-
			
 
				-                    table.add_row(
			
 
				-                        [key_type, key, value_count, used_memory_human, used_memory]
			
 
				-                    )
			
 
				-
			
 
				-            status_msg += str(table)
			
 
				-
			
 
				-        return status_msg
			
--- a/A数据处理/site_monitor/docker/Dockerfile
+++ b/A数据处理/site_monitor/docker/Dockerfile
@@ -1,35 +0,0 @@
 
				-# 拉取镜像
			
 
				-FROM ubuntu:22.04
			
 
				-
			
 
				-# 配置容器时间
			
 
				-RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone
			
 
				-
			
 
				-# 更新源 - 阿里源
			
 
				-RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
			
 
				-RUN sed -i s@/security.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
			
 
				-RUN apt-get clean && apt-get update
			
 
				-RUN apt-get install -y wget unzip curl vim
			
 
				-
			
 
				-# 安装 python3.8.10 gcc相关配置
			
 
				-WORKDIR /opt
			
 
				-RUN apt-get install -y gcc build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libbz2-dev liblzma-dev sqlite3 libsqlite3-dev tk-dev uuid-dev libgdbm-compat-dev libncurses-dev libnspr4-dev
			
 
				-
			
 
				-# python3.8.10下载与解压缩
			
 
				-RUN curl -o python3.8.10.tgz https://mirrors.huaweicloud.com/python/3.8.10/Python-3.8.10.tgz && tar -zxvf python3.8.10.tgz
			
 
				-# 创建编译安装目录, 配置安装位置
			
 
				-RUN mkdir /usr/local/python38
			
 
				-WORKDIR /opt/Python-3.8.10
			
 
				-RUN ./configure --prefix=/usr/local/python38 && make && make install
			
 
				-# 添加python3的软连接
			
 
				-RUN rm -rf /usr/bin/python3 /usr/bin/pip3 && ln -s /usr/local/python38/bin/python3 /usr/bin/python3 && ln -s /usr/local/python38/bin/pip3.8 /usr/bin/pip3
			
 
				-# 更换pip源&更新pip
			
 
				-RUN pip3 config set global.index-url https://mirrors.bfsu.edu.cn/pypi/web/simple && pip3 install --upgrade pip
			
 
				-
			
 
				-# 安装项目依赖
			
 
				-COPY requirements.txt requirements.txt
			
 
				-RUN pip3 install -r requirements.txt
			
 
				-# 安装playwright - webkit 驱动和依赖
			
 
				-RUN python3 -m playwright install --with-deps webkit
			
 
				-
			
 
				-# 设置工作目录
			
 
				-WORKDIR /mnt
			
--- a/A数据处理/site_monitor/docker/docker-compose.yml
+++ b/A数据处理/site_monitor/docker/docker-compose.yml
@@ -1,17 +0,0 @@
 
				-version: "3"
			
 
				-services: # 一组容器
			
 
				-  worker01:
			
 
				-    container_name: site_monitor
			
 
				-    image: site_monitor:v1.0
			
 
				-    volumes: # 映射文件夹
			
 
				-      - /mnt/site_monitor:/mnt
			
 
				-    network_mode: "host" # 指定网络名称
			
 
				-    restart: always
			
 
				-    privileged: true
			
 
				-    shm_size: 2GB
			
 
				-    logging:
			
 
				-      driver: "json-file"
			
 
				-      options:
			
 
				-        max-size: "200k"
			
 
				-        max-file: "10"
			
 
				-    command: 'python3 /mnt/monitor.py'
			
--- a/A数据处理/site_monitor/monitor.py
+++ b/A数据处理/site_monitor/monitor.py
@@ -1,205 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2023-05-10 
			
 
				----------
			
 
				-@summary:  网站监控
			
 
				----------
			
 
				-@author: Dzr
			
 
				-"""
			
 
				-import copy
			
 
				-import threading
			
 
				-
			
 
				-import bson
			
 
				-import numpy as np
			
 
				-import requests
			
 
				-import requests.exceptions as requests_exceptions
			
 
				-from playwright._impl._api_types import Error
			
 
				-
			
 
				-import utils.tools as tools
			
 
				-from db.mongodb import MongoDB
			
 
				-from network.request import Request
			
 
				-from network.response import Response
			
 
				-from utils.log import logger
			
 
				-
			
 
				-
			
 
				-class MonitorParser(threading.Thread):
			
 
				-
			
 
				-    def __init__(self, mongo_db, coll_name):
			
 
				-        threading.Thread.__init__(self)
			
 
				-        self.mgo_db = mongo_db
			
 
				-        self.coll_name = coll_name
			
 
				-
			
 
				-        self.monitor_api = 'http://cc.spdata.jianyu360.com/crawl/site_monitor/task/fetch'
			
 
				-
			
 
				-    def get_task(self):
			
 
				-        items = {}
			
 
				-        try:
			
 
				-            response = requests.get(self.monitor_api, timeout=5)
			
 
				-            items = response.json()['data']
			
 
				-            if '_id' in items:
			
 
				-                items['_id'] = bson.ObjectId(items['_id'])
			
 
				-        finally:
			
 
				-            return items
			
 
				-
			
 
				-    def get_response(self, url, render=False, **kwargs):
			
 
				-        response = Response.from_dict({
			
 
				-            "url": url,
			
 
				-            "_content": b"",
			
 
				-            "cookies": {},
			
 
				-            "status_code": -1,
			
 
				-            "elapsed": 666,
			
 
				-            "headers": {}
			
 
				-        })
			
 
				-        request = Request(url=url, render=render, **kwargs)
			
 
				-        for i in range(3):
			
 
				-            try:
			
 
				-                response = request.get_response()
			
 
				-                if response.status_code != 200:
			
 
				-                    if any([
			
 
				-                        response.text is None,
			
 
				-                        len(response.plain_text) == 0,
			
 
				-                        response.tags()['tags_count'] == 0
			
 
				-                    ]):
			
 
				-                        continue
			
 
				-                break
			
 
				-            except Error as e:
			
 
				-                if 'The certificate for this server is invalid.' in e.message:
			
 
				-                    url = url.replace('https', 'http')
			
 
				-                    request = Request(url=url, render=render, **kwargs)
			
 
				-            except requests_exceptions.SSLError:
			
 
				-                url = url.replace('https', 'http')
			
 
				-                request = Request(url=url, render=True, **kwargs)
			
 
				-            except requests_exceptions.ConnectionError:
			
 
				-                kw = copy.deepcopy(kwargs)
			
 
				-                kw.pop('proxies', '')
			
 
				-                request = Request(url=url, render=True, **kw)
			
 
				-            except requests_exceptions as e:
			
 
				-                logger.exception(e)
			
 
				-                break
			
 
				-
			
 
				-        # 释放浏览器
			
 
				-        response.close_browser(request)
			
 
				-
			
 
				-        logger.debug(
			
 
				-            """
			
 
				-                -------------- %s response for ----------------
			
 
				-                url = %s
			
 
				-                title = %s
			
 
				-                response = %s
			
 
				-            """
			
 
				-            % (
			
 
				-                self.getName(),
			
 
				-                url,
			
 
				-                response.title(),
			
 
				-                response
			
 
				-            )
			
 
				-        )
			
 
				-
			
 
				-        if response.status_code != -1:
			
 
				-            response = Response(response)
			
 
				-        # 设置编码
			
 
				-        response.encoding = response.encoding or "utf-8"
			
 
				-        return response
			
 
				-
			
 
				-    def __add_items_to_db(self, task, items):
			
 
				-        result = self.mgo_db.update(
			
 
				-            coll_name=self.coll_name,
			
 
				-            condition={'_id': task['_id']},
			
 
				-            data=items
			
 
				-        )
			
 
				-        # print({'_id': task['_id']})
			
 
				-        return result
			
 
				-
			
 
				-    def deal_task(self, task):
			
 
				-        # 栏目
			
 
				-        url = task['url']
			
 
				-        response = self.get_response(url, render=True, proxies=False)
			
 
				-        status_code = response.status_code
			
 
				-
			
 
				-        # 栏目页面标签
			
 
				-        tags_count = response.tags()['tags_count']
			
 
				-        tags_count_diff = abs(tags_count - task['tags_count'])
			
 
				-        tags_count_diff_lst = list(task['tags_count_diff_lst'])
			
 
				-
			
 
				-        # 栏目是否改版
			
 
				-        channel_ischange = task['channel_ischange']
			
 
				-        if len(tags_count_diff_lst) >= 3 and not channel_ischange:
			
 
				-            mean = np.mean(tags_count_diff_lst)  # 均值
			
 
				-            std = np.std(tags_count_diff_lst, ddof=1)  # 标准差
			
 
				-            std_range = [mean - (2 * std), mean + (2 * std)]
			
 
				-            if tags_count_diff not in std_range:
			
 
				-                channel_ischange = True
			
 
				-
			
 
				-        if len(tags_count_diff_lst) > 3 and sum(tags_count_diff_lst) == 0:
			
 
				-            channel_ischange = True
			
 
				-            status_code = 500
			
 
				-
			
 
				-        # 访问频次
			
 
				-        update_dt = tools.timestamp_to_date(task['update_at'], '%Y-%m-%d')
			
 
				-        is_first_monitor = tools.get_current_date('%Y-%m-%d') != update_dt
			
 
				-        if is_first_monitor:
			
 
				-            visit_count, failure_count = 1, 0
			
 
				-            if status_code != 200:
			
 
				-                failure_count = 1
			
 
				-
			
 
				-            tags_count_diff_lst = []
			
 
				-            tags_count_diff_lst.insert(0, tags_count_diff)
			
 
				-        else:
			
 
				-            visit_count = task['visit_count'] + 1
			
 
				-            failure_count = task['failure_count']
			
 
				-            if status_code != 200:
			
 
				-                failure_count += 1
			
 
				-
			
 
				-            tags_count_diff_lst.insert(0, tags_count_diff)
			
 
				-
			
 
				-        items = {
			
 
				-            'title': response.title(),  # 页面标头
			
 
				-            'tags_count': tags_count,
			
 
				-            'tags_count_diff': tags_count_diff,
			
 
				-            'tags_count_diff_lst': tags_count_diff_lst,
			
 
				-            'channel_ischange': channel_ischange,
			
 
				-            'status_code': status_code,
			
 
				-            'visit_count': visit_count,
			
 
				-            'failure_count': failure_count,
			
 
				-            'update_at': tools.ensure_int64(tools.get_current_timestamp())
			
 
				-        }
			
 
				-        self.__add_items_to_db(task, items)
			
 
				-
			
 
				-    def run(self):
			
 
				-        while True:
			
 
				-            task = self.get_task()
			
 
				-            if not task:
			
 
				-                logger.debug(f"[{self.getName()}]暂无监控任务")
			
 
				-                tools.delay_time(2)
			
 
				-                continue
			
 
				-
			
 
				-            try:
			
 
				-                self.deal_task(task)
			
 
				-            except Exception as e:
			
 
				-                logger.exception(e)
			
 
				-
			
 
				-
			
 
				-class MonitorServer(threading.Thread):
			
 
				-
			
 
				-    def __init__(self, thread_nums=1):
			
 
				-        threading.Thread.__init__(self)
			
 
				-        self.mongo_db = MongoDB()
			
 
				-        self.coll_name = 'site_monitor'
			
 
				-
			
 
				-        self.thread_nums = thread_nums
			
 
				-
			
 
				-        self.parser_control_obj = MonitorParser
			
 
				-        self.parser_controls = []
			
 
				-
			
 
				-    def run(self):
			
 
				-        for _ in range(self.thread_nums):
			
 
				-            parser_control = self.parser_control_obj(
			
 
				-                mongo_db=self.mongo_db,
			
 
				-                coll_name=self.coll_name
			
 
				-            )
			
 
				-            parser_control.start()
			
 
				-            self.parser_controls.append(parser_control)
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    MonitorServer(thread_nums=5).start()
			
--- a/A数据处理/site_monitor/network/__init__.py
+++ b/A数据处理/site_monitor/network/__init__.py
@@ -1,8 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2023-05-10 
			
 
				----------
			
 
				-@summary:  
			
 
				----------
			
 
				-@author: Dzr
			
 
				-"""
			
--- a/A数据处理/site_monitor/network/downloader/__init__.py
+++ b/A数据处理/site_monitor/network/downloader/__init__.py
@@ -1,3 +0,0 @@
 
				-from ._requests import RequestsDownloader
			
 
				-from ._requests import RequestsSessionDownloader
			
 
				-from ._playwright import PlaywrightDownloader
			
--- a/A数据处理/site_monitor/network/downloader/_playwright.py
+++ b/A数据处理/site_monitor/network/downloader/_playwright.py
@@ -1,104 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2022/9/7 4:05 PM
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import setting as setting
			
 
				-import utils.tools as tools
			
 
				-from network.downloader.base import RenderDownloader
			
 
				-from network.response import Response
			
 
				-from utils.webdriver import WebDriverPool, PlaywrightDriver
			
 
				-
			
 
				-
			
 
				-class PlaywrightDownloader(RenderDownloader):
			
 
				-    webdriver_pool: WebDriverPool = None
			
 
				-
			
 
				-    @property
			
 
				-    def _webdriver_pool(self):
			
 
				-        if not self.__class__.webdriver_pool:
			
 
				-            self.__class__.webdriver_pool = WebDriverPool(
			
 
				-                **setting.PLAYWRIGHT, driver_cls=PlaywrightDriver, thread_safe=True
			
 
				-            )
			
 
				-
			
 
				-        return self.__class__.webdriver_pool
			
 
				-
			
 
				-    def download(self, request) -> Response:
			
 
				-        # 代理优先级 自定义 > 配置文件 > 随机
			
 
				-        if request.custom_proxies:
			
 
				-            proxy = request.get_proxy()
			
 
				-        elif setting.PLAYWRIGHT.get("proxy"):
			
 
				-            proxy = setting.PLAYWRIGHT.get("proxy")
			
 
				-        else:
			
 
				-            proxy = request.get_proxy()
			
 
				-
			
 
				-        # user_agent优先级 自定义 > 配置文件 > 随机
			
 
				-        if request.custom_ua:
			
 
				-            user_agent = request.get_user_agent()
			
 
				-        elif setting.PLAYWRIGHT.get("user_agent"):
			
 
				-            user_agent = setting.PLAYWRIGHT.get("user_agent")
			
 
				-        else:
			
 
				-            user_agent = request.get_user_agent()
			
 
				-
			
 
				-        cookies = request.get_cookies()
			
 
				-        url = request.url
			
 
				-        render_time = request.render_time or setting.PLAYWRIGHT.get("render_time")
			
 
				-        wait_until = setting.PLAYWRIGHT.get("wait_until") or "domcontentloaded"
			
 
				-        if request.get_params():
			
 
				-            url = tools.joint_url(url, request.get_params())
			
 
				-
			
 
				-        driver: PlaywrightDriver = self._webdriver_pool.get(
			
 
				-            user_agent=user_agent, proxy=proxy
			
 
				-        )
			
 
				-        try:
			
 
				-            if cookies:
			
 
				-                driver.url = url
			
 
				-                driver.cookies = cookies
			
 
				-            driver.page.goto(url, wait_until=wait_until)
			
 
				-
			
 
				-            if render_time:
			
 
				-                tools.delay_time(render_time)
			
 
				-
			
 
				-            html = driver.page.content()
			
 
				-            response = Response.from_dict(
			
 
				-                {
			
 
				-                    "url": driver.page.url,
			
 
				-                    "cookies": driver.cookies,
			
 
				-                    "_content": html.encode(),
			
 
				-                    "status_code": 200,
			
 
				-                    "elapsed": 666,
			
 
				-                    "headers": {
			
 
				-                        "User-Agent": driver.user_agent,
			
 
				-                        "Cookie": tools.cookies2str(driver.cookies),
			
 
				-                    },
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-            response.driver = driver
			
 
				-            response.browser = driver
			
 
				-            return response
			
 
				-        except Exception as e:
			
 
				-            self._webdriver_pool.remove(driver)
			
 
				-            raise e
			
 
				-
			
 
				-    def close(self, driver):
			
 
				-        if driver:
			
 
				-            self._webdriver_pool.remove(driver)
			
 
				-
			
 
				-    def put_back(self, driver):
			
 
				-        """
			
 
				-        释放浏览器对象
			
 
				-        """
			
 
				-        self._webdriver_pool.put(driver)
			
 
				-
			
 
				-    def close_all(self):
			
 
				-        """
			
 
				-        关闭所有浏览器
			
 
				-        """
			
 
				-        # 不支持
			
 
				-        # self._webdriver_pool.close()
			
 
				-        pass
			
--- a/A数据处理/site_monitor/network/downloader/_requests.py
+++ b/A数据处理/site_monitor/network/downloader/_requests.py
@@ -1,46 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2022/4/10 5:57 下午
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import requests
			
 
				-from requests.adapters import HTTPAdapter
			
 
				-
			
 
				-from network.downloader.base import Downloader
			
 
				-from network.response import Response
			
 
				-
			
 
				-
			
 
				-class RequestsDownloader(Downloader):
			
 
				-    def download(self, request) -> Response:
			
 
				-        response = requests.request(
			
 
				-            request.method, request.url, **request.requests_kwargs
			
 
				-        )
			
 
				-        response = Response(response)
			
 
				-        return response
			
 
				-
			
 
				-
			
 
				-class RequestsSessionDownloader(Downloader):
			
 
				-    session = None
			
 
				-
			
 
				-    @property
			
 
				-    def _session(self):
			
 
				-        if not self.__class__.session:
			
 
				-            self.__class__.session = requests.Session()
			
 
				-            # pool_connections – 缓存的 urllib3 连接池个数  pool_maxsize – 连接池中保存的最大连接数
			
 
				-            http_adapter = HTTPAdapter(pool_connections=1000, pool_maxsize=1000)
			
 
				-            # 任何使用该session会话的 HTTP 请求，只要其 URL 是以给定的前缀开头，该传输适配器就会被使用到。
			
 
				-            self.__class__.session.mount("http", http_adapter)
			
 
				-
			
 
				-        return self.__class__.session
			
 
				-
			
 
				-    def download(self, request) -> Response:
			
 
				-        response = self._session.request(
			
 
				-            request.method, request.url, **request.requests_kwargs
			
 
				-        )
			
 
				-        response = Response(response)
			
 
				-        return response
			
--- a/A数据处理/site_monitor/network/downloader/base.py
+++ b/A数据处理/site_monitor/network/downloader/base.py
@@ -1,41 +0,0 @@
 
				-import abc
			
 
				-from abc import ABC
			
 
				-
			
 
				-from network.response import Response
			
 
				-
			
 
				-
			
 
				-class Downloader:
			
 
				-    @abc.abstractmethod
			
 
				-    def download(self, request) -> Response:
			
 
				-        """
			
 
				-
			
 
				-        Args:
			
 
				-            request: Request
			
 
				-
			
 
				-        Returns: Response
			
 
				-
			
 
				-        """
			
 
				-        raise NotImplementedError
			
 
				-
			
 
				-    def close(self, response: Response):
			
 
				-        pass
			
 
				-
			
 
				-
			
 
				-class RenderDownloader(Downloader, ABC):
			
 
				-    def put_back(self, driver):
			
 
				-        """
			
 
				-        释放浏览器对象
			
 
				-        """
			
 
				-        pass
			
 
				-
			
 
				-    def close(self, driver):
			
 
				-        """
			
 
				-        关闭浏览器
			
 
				-        """
			
 
				-        pass
			
 
				-
			
 
				-    def close_all(self):
			
 
				-        """
			
 
				-        关闭所有浏览器
			
 
				-        """
			
 
				-        pass
			
--- a/A数据处理/site_monitor/network/proxy_file/de9f83d546a39eca6979d2a6dca3407a.txt
+++ b/A数据处理/site_monitor/network/proxy_file/de9f83d546a39eca6979d2a6dca3407a.txt
@@ -1,32 +0,0 @@
 
				-180.105.104.247:8860&&1684743244
			
 
				-115.208.199.134:8860&&1684742848
			
 
				-42.84.93.124:8861&&1684742999
			
 
				-180.127.72.88:8860&&1684743979
			
 
				-144.255.48.89:8860&&1684744166
			
 
				-180.106.242.48:8860&&1684743307
			
 
				-121.207.84.107:8860&&1684742787
			
 
				-180.127.72.79:8860&&1684743262
			
 
				-182.107.181.130:8860&&1684742689
			
 
				-218.67.90.253:8860&&1684743824
			
 
				-59.61.165.88:8860&&1684742786
			
 
				-114.233.0.176:8860&&1684742924
			
 
				-113.93.224.26:8860&&1684743064
			
 
				-123.169.34.24:8860&&1684743176
			
 
				-182.34.27.242:8860&&1684744210
			
 
				-125.69.91.209:8860&&1684743202
			
 
				-36.27.184.4:8860&&1684743545
			
 
				-49.69.209.246:8860&&1684742763
			
 
				-123.146.150.68:8860&&1684742715
			
 
				-114.235.254.245:8860&&1684742840
			
 
				-106.32.10.20:8860&&1684743120
			
 
				-140.250.148.156:8860&&1684742873
			
 
				-180.111.177.16:8860&&1684743024
			
 
				-180.108.151.90:8860&&1684743675
			
 
				-121.238.107.47:8860&&1684742780
			
 
				-123.160.96.180:8860&&1684742820
			
 
				-223.215.119.152:8860&&1684742729
			
 
				-182.34.102.138:8860&&1684743505
			
 
				-59.58.211.240:8860&&1684744113
			
 
				-180.140.47.156:8860&&1684743073
			
 
				-125.123.136.247:8861&&1684743189
			
 
				-49.86.182.103:8860&&1684742719
			
--- a/A数据处理/site_monitor/network/proxy_pool.py
+++ b/A数据处理/site_monitor/network/proxy_pool.py
@@ -1,746 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-代理池
			
 
				-"""
			
 
				-import datetime
			
 
				-import json
			
 
				-import os
			
 
				-import random
			
 
				-import socket
			
 
				-import time
			
 
				-from urllib import parse
			
 
				-
			
 
				-import redis
			
 
				-import requests
			
 
				-
			
 
				-import setting
			
 
				-from utils import tools
			
 
				-from utils.log import logger as log
			
 
				-
			
 
				-
			
 
				-def decrypt(input_str: str) -> str:
			
 
				-    """
			
 
				-    改写：新增
			
 
				-    定义base64解密函数
			
 
				-
			
 
				-    :param input_str:
			
 
				-    :return:
			
 
				-    """
			
 
				-    key = "ABNOPqrceQRSTklmUDEFGXYZabnopfghHVWdijstuvwCIJKLMxyz0123456789+/"
			
 
				-    ascii_list = ['{:0>6}'.format(str(bin(key.index(i))).replace('0b', '')) for i in input_str if i != '=']
			
 
				-    output_str = ''
			
 
				-    # 对前面不是“=”的字节取索引，然后转换为2进制
			
 
				-    # 补齐“=”的个数
			
 
				-    equal_num = input_str.count('=')
			
 
				-    while ascii_list:
			
 
				-        temp_list = ascii_list[:4]
			
 
				-        # 转换成2进制字符串
			
 
				-        temp_str = ''.join(temp_list)
			
 
				-        # 对没有8位2进制的字符串补够8位2进制
			
 
				-        if len(temp_str) % 8 != 0:
			
 
				-            temp_str = temp_str[0:-1 * equal_num * 2]
			
 
				-        # 4个6字节的二进制  转换  为三个8字节的二进制
			
 
				-        temp_str_list = [temp_str[x:x + 8] for x in [0, 8, 16]]
			
 
				-        # 二进制转为10进制
			
 
				-        temp_str_list = [int(x, 2) for x in temp_str_list if x]
			
 
				-        # 连接成字符串
			
 
				-        output_str += ''.join([chr(x) for x in temp_str_list])
			
 
				-        ascii_list = ascii_list[4:]
			
 
				-    return output_str
			
 
				-
			
 
				-
			
 
				-# 建立本地缓存代理文件夹
			
 
				-proxy_path = os.path.join(os.path.dirname(__file__), "proxy_file")
			
 
				-if not os.path.exists(proxy_path):
			
 
				-    os.mkdir(proxy_path)
			
 
				-
			
 
				-
			
 
				-def get_proxy_from_url(**kwargs):
			
 
				-    """
			
 
				-    获取指定url的代理
			
 
				-    :param kwargs:
			
 
				-    :return:
			
 
				-    """
			
 
				-    proxy_source_url = kwargs.get("proxy_source_url", [])
			
 
				-    # proxy_source_url = "http://socks.spdata.jianyu360.com/socks/getips?limit=100"
			
 
				-
			
 
				-    if not isinstance(proxy_source_url, list):
			
 
				-        proxy_source_url = [proxy_source_url]
			
 
				-        proxy_source_url = [x for x in proxy_source_url if x]
			
 
				-    if not proxy_source_url:
			
 
				-        raise ValueError("no specify proxy_source_url: {}".format(proxy_source_url))
			
 
				-    kwargs = kwargs.copy()
			
 
				-    kwargs.pop("proxy_source_url")
			
 
				-    proxies_list = []
			
 
				-    for url in proxy_source_url:
			
 
				-        if url.startswith("http"):
			
 
				-            proxies_list.extend(get_proxy_from_http(url, **kwargs))
			
 
				-        elif url.startswith("redis"):
			
 
				-            proxies_list.extend(get_proxy_from_redis(url, **kwargs))
			
 
				-
			
 
				-    if proxies_list:
			
 
				-        # 顺序打乱
			
 
				-        random.shuffle(proxies_list)
			
 
				-    return proxies_list
			
 
				-
			
 
				-
			
 
				-def get_proxy_from_http(proxy_source_url, **kwargs):
			
 
				-    """
			
 
				-    从指定 http 地址获取代理
			
 
				-    :param proxy_source_url:
			
 
				-    :param kwargs:
			
 
				-    :return:
			
 
				-    """
			
 
				-    filename = tools.get_md5(proxy_source_url) + ".txt"
			
 
				-    abs_filename = os.path.join(proxy_path, filename)
			
 
				-    update_interval = kwargs.get("local_proxy_file_cache_timeout", 30)
			
 
				-    update_flag = 0
			
 
				-    if not update_interval:
			
 
				-        # 强制更新
			
 
				-        update_flag = 1
			
 
				-    elif not os.path.exists(abs_filename):
			
 
				-        # 文件不存在则更新
			
 
				-        update_flag = 1
			
 
				-    elif time.time() - os.stat(abs_filename).st_mtime > update_interval:
			
 
				-        # 超过更新间隔
			
 
				-        update_flag = 1
			
 
				-    if update_flag:
			
 
				-        pool = []
			
 
				-        response = requests.get(proxy_source_url, timeout=20)
			
 
				-        # 改写：获取scocks代理的response处理
			
 
				-        for proxy in response.json():
			
 
				-            host = decrypt(proxy['ip'])
			
 
				-            port = proxy['ports'][0]
			
 
				-            endTime = proxy['lifetime']
			
 
				-            pool.append(f"{host}:{port}&&{endTime}")
			
 
				-
			
 
				-        with open(os.path.join(proxy_path, filename), "w") as f:
			
 
				-            f.write('\n'.join(pool))
			
 
				-    return get_proxy_from_file(filename)
			
 
				-
			
 
				-
			
 
				-def get_proxy_from_file(filename, **kwargs):
			
 
				-    """
			
 
				-    从指定本地文件获取代理
			
 
				-        文件格式
			
 
				-        ip:port:https
			
 
				-        ip:port:http
			
 
				-        ip:port
			
 
				-    :param filename:
			
 
				-    :param kwargs:
			
 
				-    :return:
			
 
				-    """
			
 
				-    proxies_list = []
			
 
				-    with open(os.path.join(proxy_path, filename), "r") as f:
			
 
				-        lines = f.readlines()
			
 
				-
			
 
				-    for line in lines:
			
 
				-        line = line.strip()
			
 
				-        if not line:
			
 
				-            continue
			
 
				-        # 解析
			
 
				-        auth = ""
			
 
				-        if "@" in line:
			
 
				-            auth, line = line.split("@")
			
 
				-        # 改写，解析代理有效期结束时间
			
 
				-        line, end = line.split("&&")
			
 
				-
			
 
				-        items = line.split(":")
			
 
				-        if len(items) < 2:
			
 
				-            continue
			
 
				-
			
 
				-        ip, port, *protocol = items
			
 
				-        if not all([port, ip]):
			
 
				-            continue
			
 
				-        if auth:
			
 
				-            ip = "{}@{}".format(auth, ip)
			
 
				-        if not protocol:
			
 
				-            # 改写：判断代理是否在有效期内，并将代理格式重http格式改成socks格式
			
 
				-            if time.time() < int(end):
			
 
				-                proxies = {
			
 
				-                    "https": "socks5://%s:%s" % (ip, port),
			
 
				-                    "http": "socks5://%s:%s" % (ip, port),
			
 
				-                    # "end":end
			
 
				-                }
			
 
				-            else:
			
 
				-                continue
			
 
				-        else:
			
 
				-            proxies = {protocol[0]: "%s://%s:%s" % (protocol[0], ip, port)}
			
 
				-        proxies_list.append(proxies)
			
 
				-
			
 
				-    return proxies_list
			
 
				-
			
 
				-
			
 
				-def get_proxy_from_redis(proxy_source_url, **kwargs):
			
 
				-    """
			
 
				-    从指定 redis 地址获取代理
			
 
				-    @param proxy_source_url: redis://:passwd@host:ip/db
			
 
				-        redis 存储结构 zset
			
 
				-        ip:port ts
			
 
				-    @param kwargs:
			
 
				-        {"redis_proxies_key": "xxx"}
			
 
				-    @return: [{'http':'http://xxx.xxx.xxx:xxx', 'https':'https://xxx.xxx.xxx.xxx:xxx'}]
			
 
				-    """
			
 
				-
			
 
				-    redis_conn = redis.StrictRedis.from_url(proxy_source_url)
			
 
				-    key = kwargs.get("redis_proxies_key")
			
 
				-    assert key, "从redis中获取代理 需要指定 redis_proxies_key"
			
 
				-    proxies = redis_conn.zrange(key, 0, -1)
			
 
				-    proxies_list = []
			
 
				-    for proxy in proxies:
			
 
				-        proxy = proxy.decode()
			
 
				-        proxies_list.append(
			
 
				-            {"https": "https://%s" % proxy, "http": "http://%s" % proxy}
			
 
				-        )
			
 
				-    return proxies_list
			
 
				-
			
 
				-
			
 
				-def check_proxy(
			
 
				-        ip="",
			
 
				-        port="",
			
 
				-        proxies=None,
			
 
				-        type=0,
			
 
				-        timeout=5,
			
 
				-        logger=None,
			
 
				-        show_error_log=True,
			
 
				-        **kwargs,
			
 
				-):
			
 
				-    """
			
 
				-    代理有效性检查
			
 
				-    :param ip:
			
 
				-    :param port:
			
 
				-    :param type: 0:socket  1:requests
			
 
				-    :param timeout:
			
 
				-    :param logger:
			
 
				-    :return:
			
 
				-    """
			
 
				-    if not logger:
			
 
				-        logger = log
			
 
				-    ok = 0
			
 
				-    if type == 0 and ip and port:
			
 
				-        # socket检测成功 不代表代理一定可用 Connection closed by foreign host. 这种情况就不行
			
 
				-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sk:
			
 
				-            sk.settimeout(timeout)
			
 
				-            try:
			
 
				-                # 必须检测 否则代理永远不刷新
			
 
				-                sk.connect((ip, int(port)))
			
 
				-                ok = 1
			
 
				-            except Exception as e:
			
 
				-                if show_error_log:
			
 
				-                    logger.debug("check proxy failed: {} {}:{}".format(e, ip, port))
			
 
				-            sk.close()
			
 
				-    else:
			
 
				-        if not proxies:
			
 
				-            proxies = {
			
 
				-                "http": "socks5://{}:{}".format(ip, port),
			
 
				-                "https": "socks5//{}:{}".format(ip, port),
			
 
				-            }
			
 
				-        try:
			
 
				-            # 改写：代理检测的url
			
 
				-            r = requests.get(
			
 
				-                "https://myip.ipip.net", proxies=proxies, timeout=timeout, stream=True
			
 
				-            )
			
 
				-            ok = 1
			
 
				-            r.close()
			
 
				-        except Exception as e:
			
 
				-            if show_error_log:
			
 
				-                logger.debug(
			
 
				-                    "check proxy failed: {} {}:{} {}".format(e, ip, port, proxies)
			
 
				-                )
			
 
				-    return ok
			
 
				-
			
 
				-
			
 
				-class ProxyItem(object):
			
 
				-    """单个代理对象"""
			
 
				-
			
 
				-    # 代理标记
			
 
				-    proxy_tag_list = (-1, 0, 1)
			
 
				-
			
 
				-    def __init__(
			
 
				-            self,
			
 
				-            proxies=None,
			
 
				-            valid_timeout=20,
			
 
				-            check_interval=180,
			
 
				-            max_proxy_use_num=10000,
			
 
				-            delay=30,
			
 
				-            use_interval=None,
			
 
				-            **kwargs,
			
 
				-    ):
			
 
				-        """
			
 
				-        :param proxies:
			
 
				-        :param valid_timeout:  代理检测超时时间 默认-1    20181008  默认不再监测有效性
			
 
				-        :param check_interval:
			
 
				-        :param max_proxy_use_num:
			
 
				-        :param delay:
			
 
				-        :param use_interval: 使用间隔 单位秒 默认不限制
			
 
				-        :param logger: 日志处理器 默认 log.get_logger()
			
 
				-        :param kwargs:
			
 
				-        """
			
 
				-        # {"http": ..., "https": ...}
			
 
				-        self.proxies = proxies
			
 
				-        # 检测超时时间 秒
			
 
				-        self.valid_timeout = valid_timeout
			
 
				-        # 检测间隔 秒
			
 
				-        self.check_interval = check_interval
			
 
				-
			
 
				-        # 标记  0:正常 -1:丢弃  1: 待会再用 ...
			
 
				-        self.flag = 0
			
 
				-        # 上次状态变化时间
			
 
				-        self.flag_ts = 0
			
 
				-        # 上次更新时间 有效时间
			
 
				-        self.update_ts = 0
			
 
				-        # 最大被使用次数
			
 
				-        self.max_proxy_use_num = max_proxy_use_num
			
 
				-        # 被使用次数记录
			
 
				-        self.use_num = 0
			
 
				-        # 延迟使用时间
			
 
				-        self.delay = delay
			
 
				-        # 使用间隔 单位秒
			
 
				-        self.use_interval = use_interval
			
 
				-        # 使用时间
			
 
				-        self.use_ts = 0
			
 
				-
			
 
				-        self.proxy_args = self.parse_proxies(self.proxies)
			
 
				-        self.proxy_ip = self.proxy_args["ip"]
			
 
				-        self.proxy_port = self.proxy_args["port"]
			
 
				-        self.proxy_ip_port = "{}:{}".format(self.proxy_ip, self.proxy_port)
			
 
				-        if self.proxy_args["user"]:
			
 
				-            self.proxy_id = "{user}:{password}@{ip}:{port}".format(**self.proxy_args)
			
 
				-        else:
			
 
				-            self.proxy_id = self.proxy_ip_port
			
 
				-
			
 
				-        # 日志处理器
			
 
				-        self.logger = log
			
 
				-
			
 
				-    def get_proxies(self):
			
 
				-        self.use_num += 1
			
 
				-        return self.proxies
			
 
				-
			
 
				-    def is_delay(self):
			
 
				-        return self.flag == 1
			
 
				-
			
 
				-    def is_valid(self, force=0, type=0):
			
 
				-        """
			
 
				-        检测代理是否有效
			
 
				-            1 有效
			
 
				-            2 延时使用
			
 
				-            0 无效 直接在代理池删除
			
 
				-        :param force:
			
 
				-        :param type:
			
 
				-        :return:
			
 
				-        """
			
 
				-        if self.use_num > self.max_proxy_use_num > 0:
			
 
				-            self.logger.debug("代理达到最大使用次数: {} {}".format(self.use_num, self.proxies))
			
 
				-            return 0
			
 
				-        if self.flag == -1:
			
 
				-            self.logger.debug("代理被标记 -1 丢弃 %s" % self.proxies)
			
 
				-            return 0
			
 
				-        if self.delay > 0 and self.flag == 1:
			
 
				-            if time.time() - self.flag_ts < self.delay:
			
 
				-                self.logger.debug("代理被标记 1 延迟 %s" % self.proxies)
			
 
				-                return 2
			
 
				-            else:
			
 
				-                self.flag = 0
			
 
				-                self.logger.debug("延迟代理释放: {}".format(self.proxies))
			
 
				-        if self.use_interval:
			
 
				-            if time.time() - self.use_ts < self.use_interval:
			
 
				-                return 2
			
 
				-        if not force:
			
 
				-            if time.time() - self.update_ts < self.check_interval:
			
 
				-                return 1
			
 
				-        if self.valid_timeout > 0:
			
 
				-            ok = check_proxy(
			
 
				-                proxies=self.proxies,
			
 
				-                type=type,
			
 
				-                timeout=self.valid_timeout,
			
 
				-                logger=self.logger,
			
 
				-            )
			
 
				-        else:
			
 
				-            ok = 1
			
 
				-        self.update_ts = time.time()
			
 
				-        return ok
			
 
				-
			
 
				-    @classmethod
			
 
				-    def parse_proxies(self, proxies):
			
 
				-        """
			
 
				-        分解代理组成部分
			
 
				-        :param proxies:
			
 
				-        :return:
			
 
				-        """
			
 
				-        if not proxies:
			
 
				-            return {}
			
 
				-        if isinstance(proxies, (str, bytes)):
			
 
				-            proxies = json.loads(proxies)
			
 
				-        protocol = list(proxies.keys())
			
 
				-        if not protocol:
			
 
				-            return {}
			
 
				-        _url = proxies.get(protocol[0])
			
 
				-        # 改写：注释http代理url的拼接，以正常生成代理池
			
 
				-        # if not _url.startswith("http"):
			
 
				-        #     _url = "http://" + _url
			
 
				-        _url_parse = parse.urlparse(_url)
			
 
				-        netloc = _url_parse.netloc
			
 
				-        if "@" in netloc:
			
 
				-            netloc_auth, netloc_host = netloc.split("@")
			
 
				-        else:
			
 
				-            netloc_auth, netloc_host = "", netloc
			
 
				-        ip, *port = netloc_host.split(":")
			
 
				-        port = port[0] if port else "80"
			
 
				-        user, *password = netloc_auth.split(":")
			
 
				-        password = password[0] if password else ""
			
 
				-        return {
			
 
				-            "protocol": protocol,
			
 
				-            "ip": ip,
			
 
				-            "port": port,
			
 
				-            "user": user,
			
 
				-            "password": password,
			
 
				-            "ip_port": "{}:{}".format(ip, port),
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-class ProxyPoolBase(object):
			
 
				-    def __init__(self, *args, **kwargs):
			
 
				-        pass
			
 
				-
			
 
				-    def get(self, *args, **kwargs):
			
 
				-        raise NotImplementedError
			
 
				-
			
 
				-
			
 
				-class ProxyPool(ProxyPoolBase):
			
 
				-    """代理池"""
			
 
				-
			
 
				-    def __init__(self, **kwargs):
			
 
				-        """
			
 
				-        :param size: 代理池大小  -1 为不限制
			
 
				-        :param proxy_source_url: 代理文件地址 支持列表
			
 
				-        :param proxy_instance:  提供代理的实例
			
 
				-        :param reset_interval:  代理池重置间隔 最小间隔
			
 
				-        :param reset_interval_max:  代理池重置间隔 最大间隔 默认2分钟
			
 
				-        :param check_valid: 是否在获取代理时进行检测有效性
			
 
				-        :param local_proxy_file_cache_timeout: 本地缓存的代理文件超时时间
			
 
				-        :param logger: 日志处理器 默认 log.get_logger()
			
 
				-        :param kwargs: 其他的参数
			
 
				-        """
			
 
				-        kwargs.setdefault("size", -1)
			
 
				-        kwargs.setdefault("proxy_source_url", setting.PROXY_EXTRACT_API)
			
 
				-
			
 
				-        super(ProxyPool, self).__init__(**kwargs)
			
 
				-        # 队列最大长度
			
 
				-        self.max_queue_size = kwargs.get("size", -1)
			
 
				-        # 实际代理数量
			
 
				-        self.real_max_proxy_count = 1000
			
 
				-        # 代理可用最大次数
			
 
				-        # 代理获取地址 http://localhost/proxy.txt
			
 
				-        self.proxy_source_url = kwargs.get("proxy_source_url", [])
			
 
				-        if not isinstance(self.proxy_source_url, list):
			
 
				-            self.proxy_source_url = [self.proxy_source_url]
			
 
				-            self.proxy_source_url = [x for x in self.proxy_source_url if x]
			
 
				-            self.proxy_source_url = list(set(self.proxy_source_url))
			
 
				-            kwargs.update({"proxy_source_url": self.proxy_source_url})
			
 
				-        # 处理日志
			
 
				-        self.logger = kwargs.get("logger") or log
			
 
				-        kwargs["logger"] = self.logger
			
 
				-        if not self.proxy_source_url:
			
 
				-            self.logger.warn("need set proxy_source_url or proxy_instance")
			
 
				-
			
 
				-        # 代理池重置间隔
			
 
				-        self.reset_interval = kwargs.get("reset_interval", 5)
			
 
				-        # 强制重置一下代理 添加新的代理进来 防止一直使用旧的被封的代理
			
 
				-        self.reset_interval_max = kwargs.get("reset_interval_max", 180)
			
 
				-        # 是否监测代理有效性
			
 
				-        self.check_valid = kwargs.get("check_valid", True)
			
 
				-
			
 
				-        # 代理队列
			
 
				-        self.proxy_queue = None
			
 
				-        # {代理id: ProxyItem, ...}
			
 
				-        self.proxy_dict = {}
			
 
				-        # 失效代理队列
			
 
				-        self.invalid_proxy_dict = {}
			
 
				-
			
 
				-        self.kwargs = kwargs
			
 
				-
			
 
				-        # 重置代理池锁
			
 
				-        self.reset_lock = None
			
 
				-        # 重置时间
			
 
				-        self.last_reset_time = 0
			
 
				-        # 重置的太快了  计数
			
 
				-        self.reset_fast_count = 0
			
 
				-        # 计数 获取代理重试3次仍然失败 次数
			
 
				-        self.no_valid_proxy_times = 0
			
 
				-
			
 
				-        # 上次获取代理时间
			
 
				-        self.last_get_ts = time.time()
			
 
				-
			
 
				-        # 记录ProxyItem的update_ts 防止由于重置太快导致重复检测有效性
			
 
				-        self.proxy_item_update_ts_dict = {}
			
 
				-
			
 
				-        # 警告
			
 
				-        self.warn_flag = False
			
 
				-
			
 
				-    def warn(self):
			
 
				-        if not self.warn_flag:
			
 
				-            for url in self.proxy_source_url:
			
 
				-                if "zhima" in url:
			
 
				-                    continue
			
 
				-            self.warn_flag = True
			
 
				-        return
			
 
				-
			
 
				-    @property
			
 
				-    def queue_size(self):
			
 
				-        """
			
 
				-        当前代理池中代理数量
			
 
				-        :return:
			
 
				-        """
			
 
				-        return self.proxy_queue.qsize() if self.proxy_queue is not None else 0
			
 
				-
			
 
				-    def clear(self):
			
 
				-        """
			
 
				-        清空自己
			
 
				-        :return:
			
 
				-        """
			
 
				-        self.proxy_queue = None
			
 
				-        # {代理ip: ProxyItem, ...}
			
 
				-        self.proxy_dict = {}
			
 
				-        # 清理失效代理集合
			
 
				-        _limit = datetime.datetime.now() - datetime.timedelta(minutes=10)
			
 
				-        self.invalid_proxy_dict = {
			
 
				-            k: v for k, v in self.invalid_proxy_dict.items() if v > _limit
			
 
				-        }
			
 
				-        # 清理超时的update_ts记录
			
 
				-        _limit = time.time() - 600
			
 
				-        self.proxy_item_update_ts_dict = {
			
 
				-            k: v for k, v in self.proxy_item_update_ts_dict.items() if v > _limit
			
 
				-        }
			
 
				-        return
			
 
				-
			
 
				-    def get(self, retry: int = 0) -> dict:
			
 
				-        """
			
 
				-        从代理池中获取代理
			
 
				-        :param retry:
			
 
				-        :return:
			
 
				-        """
			
 
				-        retry += 1
			
 
				-        if retry > 3:
			
 
				-            self.no_valid_proxy_times += 1
			
 
				-            return None
			
 
				-        # if time.time() - self.last_get_ts > 3 * 60:
			
 
				-        #     # 3分钟没有获取过 重置一下
			
 
				-        #     try:
			
 
				-        #         self.reset_proxy_pool()
			
 
				-        #     except Exception as e:
			
 
				-        #         self.logger.exception(e)
			
 
				-        # 记录获取时间
			
 
				-        self.last_get_ts = time.time()
			
 
				-        #
			
 
				-        self.warn()
			
 
				-        proxy_item = self.get_random_proxy()
			
 
				-        if proxy_item:
			
 
				-            # 不检测
			
 
				-            if not self.check_valid:  #
			
 
				-                # 塞回去
			
 
				-                proxies = proxy_item.get_proxies()
			
 
				-                self.put_proxy_item(proxy_item)
			
 
				-                return proxies
			
 
				-            else:
			
 
				-                is_valid = proxy_item.is_valid()
			
 
				-                if is_valid:
			
 
				-                    # 记录update_ts
			
 
				-                    self.proxy_item_update_ts_dict[
			
 
				-                        proxy_item.proxy_id
			
 
				-                    ] = proxy_item.update_ts
			
 
				-                    # 塞回去
			
 
				-                    proxies = proxy_item.get_proxies()
			
 
				-                    self.put_proxy_item(proxy_item)
			
 
				-                    if is_valid == 1:
			
 
				-                        if proxy_item.use_interval:
			
 
				-                            proxy_item.use_ts = time.time()
			
 
				-                        return proxies
			
 
				-                else:
			
 
				-                    # 处理失效代理
			
 
				-                    self.proxy_dict.pop(proxy_item.proxy_id, "")
			
 
				-                    self.invalid_proxy_dict[
			
 
				-                        proxy_item.proxy_id
			
 
				-                    ] = datetime.datetime.now()
			
 
				-        else:
			
 
				-            try:
			
 
				-                time.sleep(3)
			
 
				-                self.reset_proxy_pool()
			
 
				-            except Exception as e:
			
 
				-                self.logger.exception(e)
			
 
				-        if self.no_valid_proxy_times >= 5:
			
 
				-            # 解决bug: 当爬虫仅剩一个任务时 由于只有一个线程检测代理 而不可用代理又刚好很多（时间越长越多） 可能出现一直获取不到代理的情况
			
 
				-            # 导致爬虫烂尾
			
 
				-            try:
			
 
				-                time.sleep(3)
			
 
				-                self.reset_proxy_pool()
			
 
				-            except Exception as e:
			
 
				-                self.logger.exception(e)
			
 
				-        return self.get(retry)
			
 
				-
			
 
				-    get_proxy = get
			
 
				-
			
 
				-    def get_random_proxy(self) -> ProxyItem:
			
 
				-        """
			
 
				-        随机获取代理
			
 
				-        :return:
			
 
				-        """
			
 
				-        if self.proxy_queue is not None:
			
 
				-            if random.random() < 0.5:
			
 
				-                # 一半概率检查 这是个高频操作 优化一下
			
 
				-                if time.time() - self.last_reset_time > self.reset_interval_max:
			
 
				-                    time.sleep(3)
			
 
				-                    self.reset_proxy_pool(force=True)
			
 
				-                else:
			
 
				-                    min_q_size = (
			
 
				-                        min(self.max_queue_size / 2, self.real_max_proxy_count / 2)
			
 
				-                        if self.max_queue_size > 0
			
 
				-                        else self.real_max_proxy_count / 2
			
 
				-                    )
			
 
				-                    if self.proxy_queue.qsize() < min_q_size:
			
 
				-                        time.sleep(3)
			
 
				-                        self.reset_proxy_pool()
			
 
				-            try:
			
 
				-                return self.proxy_queue.get_nowait()
			
 
				-            except Exception:
			
 
				-                pass
			
 
				-        return None
			
 
				-
			
 
				-    def append_proxies(self, proxies_list: list) -> int:
			
 
				-        """
			
 
				-        添加代理到代理池
			
 
				-        :param proxies_list:
			
 
				-        :return:
			
 
				-        """
			
 
				-        count = 0
			
 
				-        if not isinstance(proxies_list, list):
			
 
				-            proxies_list = [proxies_list]
			
 
				-        for proxies in proxies_list:
			
 
				-            if proxies:
			
 
				-                proxy_item = ProxyItem(proxies=proxies, **self.kwargs)
			
 
				-                # 增加失效判断 2018/12/18
			
 
				-                if proxy_item.proxy_id in self.invalid_proxy_dict:
			
 
				-                    continue
			
 
				-                if proxy_item.proxy_id not in self.proxy_dict:
			
 
				-                    # 补充update_ts
			
 
				-                    if not proxy_item.update_ts:
			
 
				-                        proxy_item.update_ts = self.proxy_item_update_ts_dict.get(
			
 
				-                            proxy_item.proxy_id, 0
			
 
				-                        )
			
 
				-                    self.put_proxy_item(proxy_item)
			
 
				-                    self.proxy_dict[proxy_item.proxy_id] = proxy_item
			
 
				-                    count += 1
			
 
				-        return count
			
 
				-
			
 
				-    def put_proxy_item(self, proxy_item: ProxyItem):
			
 
				-        """
			
 
				-        添加 ProxyItem 到代理池
			
 
				-        :param proxy_item:
			
 
				-        :return:
			
 
				-        """
			
 
				-        return self.proxy_queue.put_nowait(proxy_item)
			
 
				-
			
 
				-    def reset_proxy_pool(self, force: bool = False):
			
 
				-        """
			
 
				-        重置代理池
			
 
				-        :param force: 是否强制重置代理池
			
 
				-        :return:
			
 
				-        """
			
 
				-        if not self.reset_lock:
			
 
				-            # 必须用时调用 否则 可能存在 gevent patch前 threading就已经被导入 导致的Rlock patch失效
			
 
				-            import threading
			
 
				-
			
 
				-            self.reset_lock = threading.RLock()
			
 
				-        with self.reset_lock:
			
 
				-            if (
			
 
				-                    force
			
 
				-                    or self.proxy_queue is None
			
 
				-                    or (
			
 
				-                    self.max_queue_size > 0
			
 
				-                    and self.proxy_queue.qsize() < self.max_queue_size / 2
			
 
				-            )
			
 
				-                    or (
			
 
				-                    self.max_queue_size < 0
			
 
				-                    and self.proxy_queue.qsize() < self.real_max_proxy_count / 2
			
 
				-            )
			
 
				-                    or self.no_valid_proxy_times >= 5
			
 
				-            ):
			
 
				-                if time.time() - self.last_reset_time < self.reset_interval:
			
 
				-                    self.reset_fast_count += 1
			
 
				-                    if self.reset_fast_count % 10 == 0:
			
 
				-                        self.logger.debug(
			
 
				-                            "代理池重置的太快了:) {}".format(self.reset_fast_count)
			
 
				-                        )
			
 
				-                        time.sleep(1)
			
 
				-                else:
			
 
				-                    self.clear()
			
 
				-                    if self.proxy_queue is None:
			
 
				-                        import queue
			
 
				-
			
 
				-                        self.proxy_queue = queue.Queue()
			
 
				-                    # TODO 这里获取到的可能重复
			
 
				-                    proxies_list = get_proxy_from_url(**self.kwargs)
			
 
				-                    self.real_max_proxy_count = len(proxies_list)
			
 
				-                    if 0 < self.max_queue_size < self.real_max_proxy_count:
			
 
				-                        proxies_list = random.sample(proxies_list, self.max_queue_size)
			
 
				-                    _valid_count = self.append_proxies(proxies_list)
			
 
				-                    self.last_reset_time = time.time()
			
 
				-                    self.no_valid_proxy_times = 0
			
 
				-                    self.logger.debug(
			
 
				-                        "重置代理池成功: 获取{}, 成功添加{}, 失效{},  当前代理数{},".format(
			
 
				-                            len(proxies_list),
			
 
				-                            _valid_count,
			
 
				-                            len(self.invalid_proxy_dict),
			
 
				-                            len(self.proxy_dict),
			
 
				-                        )
			
 
				-                    )
			
 
				-        return
			
 
				-
			
 
				-    def tag_proxy(self, proxies_list: list, flag: int, *, delay=30) -> bool:
			
 
				-        """
			
 
				-        对代理进行标记
			
 
				-        :param proxies_list:
			
 
				-        :param flag:
			
 
				-                    -1  废弃
			
 
				-                    1 延迟使用
			
 
				-        :param delay: 延迟时间
			
 
				-        :return:
			
 
				-        """
			
 
				-        if int(flag) not in ProxyItem.proxy_tag_list or not proxies_list:
			
 
				-            return False
			
 
				-        if not isinstance(proxies_list, list):
			
 
				-            proxies_list = [proxies_list]
			
 
				-        for proxies in proxies_list:
			
 
				-            if not proxies:
			
 
				-                continue
			
 
				-            proxy_id = ProxyItem(proxies).proxy_id
			
 
				-            if proxy_id not in self.proxy_dict:
			
 
				-                continue
			
 
				-            self.proxy_dict[proxy_id].flag = flag
			
 
				-            self.proxy_dict[proxy_id].flag_ts = time.time()
			
 
				-            self.proxy_dict[proxy_id].delay = delay
			
 
				-
			
 
				-        return True
			
 
				-
			
 
				-    def get_proxy_item(self, proxy_id="", proxies=None):
			
 
				-        """
			
 
				-        获取代理对象
			
 
				-        :param proxy_id:
			
 
				-        :param proxies:
			
 
				-        :return:
			
 
				-        """
			
 
				-        if proxy_id:
			
 
				-            return self.proxy_dict.get(proxy_id)
			
 
				-        if proxies:
			
 
				-            proxy_id = ProxyItem(proxies).proxy_id
			
 
				-            return self.proxy_dict.get(proxy_id)
			
 
				-        return
			
 
				-
			
 
				-    def copy(self):
			
 
				-        return ProxyPool(**self.kwargs)
			
 
				-
			
 
				-    def all(self) -> list:
			
 
				-        """
			
 
				-        获取当前代理池中的全部代理
			
 
				-        :return:
			
 
				-        """
			
 
				-        return get_proxy_from_url(**self.kwargs)
			
--- a/A数据处理/site_monitor/network/request.py
+++ b/A数据处理/site_monitor/network/request.py
@@ -1,524 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2018-07-25 11:49:08
			
 
				----------
			
 
				-@summary: 请求结构体
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email:  boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import copy
			
 
				-import re
			
 
				-
			
 
				-import requests
			
 
				-from requests.cookies import RequestsCookieJar
			
 
				-from requests.packages.urllib3.exceptions import InsecureRequestWarning
			
 
				-
			
 
				-import setting as setting
			
 
				-import utils.tools as tools
			
 
				-from db.redisdb import RedisDB
			
 
				-from network import user_agent
			
 
				-from network.downloader.base import Downloader, RenderDownloader
			
 
				-from network.proxy_pool import ProxyPool
			
 
				-from network.response import Response
			
 
				-from utils.log import logger as log
			
 
				-
			
 
				-# 屏蔽warning信息
			
 
				-requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
			
 
				-
			
 
				-
			
 
				-class Request:
			
 
				-    user_agent_pool = user_agent
			
 
				-    proxies_pool: ProxyPool = None
			
 
				-
			
 
				-    cache_db = None  # redis / pika
			
 
				-    cached_redis_key = None  # 缓存response的文件文件夹 response_cached:cached_redis_key:md5
			
 
				-    cached_expire_time = 1200  # 缓存过期时间
			
 
				-
			
 
				-    # 下载器
			
 
				-    downloader: Downloader = None
			
 
				-    session_downloader: Downloader = None
			
 
				-    render_downloader: RenderDownloader = None
			
 
				-
			
 
				-    __REQUEST_ATTRS__ = {
			
 
				-        # "method",
			
 
				-        # "url",
			
 
				-        "params",
			
 
				-        "data",
			
 
				-        "headers",
			
 
				-        "cookies",
			
 
				-        "files",
			
 
				-        "auth",
			
 
				-        "timeout",
			
 
				-        "allow_redirects",
			
 
				-        "proxies",
			
 
				-        "hooks",
			
 
				-        "stream",
			
 
				-        "verify",
			
 
				-        "cert",
			
 
				-        "json",
			
 
				-    }
			
 
				-
			
 
				-    _DEFAULT_KEY_VALUE_ = dict(
			
 
				-        url="",
			
 
				-        method=None,
			
 
				-        retry_times=0,
			
 
				-        priority=300,
			
 
				-        parser_name=None,
			
 
				-        callback=None,
			
 
				-        filter_repeat=True,
			
 
				-        auto_request=True,
			
 
				-        request_sync=False,
			
 
				-        use_session=None,
			
 
				-        random_user_agent=True,
			
 
				-        download_midware=None,
			
 
				-        is_abandoned=False,
			
 
				-        render=False,
			
 
				-        render_time=0,
			
 
				-        make_absolute_links=None,
			
 
				-    )
			
 
				-
			
 
				-    _CUSTOM_PROPERTIES_ = {
			
 
				-        "requests_kwargs",
			
 
				-        "custom_ua",
			
 
				-        "custom_proxies",
			
 
				-    }
			
 
				-
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        url="",
			
 
				-        retry_times=0,
			
 
				-        priority=300,
			
 
				-        parser_name=None,
			
 
				-        callback=None,
			
 
				-        filter_repeat=True,
			
 
				-        auto_request=True,
			
 
				-        request_sync=False,
			
 
				-        use_session=None,
			
 
				-        random_user_agent=True,
			
 
				-        download_midware=None,
			
 
				-        is_abandoned=False,
			
 
				-        render=False,
			
 
				-        render_time=0,
			
 
				-        make_absolute_links=None,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        """
			
 
				-        @summary: Request参数
			
 
				-        ---------
			
 
				-        框架参数
			
 
				-        @param url: 待抓取url
			
 
				-        @param retry_times: 当前重试次数
			
 
				-        @param priority: 优先级 越小越优先 默认300
			
 
				-        @param parser_name: 回调函数所在的类名 默认为当前类
			
 
				-        @param callback: 回调函数 可以是函数 也可是函数名（如想跨类回调时，parser_name指定那个类名，callback指定那个类想回调的方法名即可）
			
 
				-        @param filter_repeat: 是否需要去重 (True/False) 当setting中的REQUEST_FILTER_ENABLE设置为True时该参数生效 默认True
			
 
				-        @param auto_request: 是否需要自动请求下载网页 默认是。设置为False时返回的response为空，需要自己去请求网页
			
 
				-        @param request_sync: 是否同步请求下载网页，默认异步。如果该请求url过期时间快，可设置为True，相当于yield的reqeust会立即响应，而不是去排队
			
 
				-        @param use_session: 是否使用session方式
			
 
				-        @param random_user_agent: 是否随机User-Agent (True/False) 当setting中的RANDOM_HEADERS设置为True时该参数生效 默认True
			
 
				-        @param download_midware: 下载中间件。默认为parser中的download_midware
			
 
				-        @param is_abandoned: 当发生异常时是否放弃重试 True/False. 默认False
			
 
				-        @param render: 是否用浏览器渲染
			
 
				-        @param render_time: 渲染时长，即打开网页等待指定时间后再获取源码
			
 
				-        @param make_absolute_links: 是否转成绝对连接，默认是
			
 
				-        --
			
 
				-        以下参数与requests参数使用方式一致
			
 
				-        @param method: 请求方式，如POST或GET，默认根据data值是否为空来判断
			
 
				-        @param params: 请求参数
			
 
				-        @param data: 请求body
			
 
				-        @param json: 请求json字符串，同 json.dumps(data)
			
 
				-        @param headers:
			
 
				-        @param cookies: 字典 或 CookieJar 对象
			
 
				-        @param files:
			
 
				-        @param auth:
			
 
				-        @param timeout: (浮点或元组)等待服务器数据的超时限制，是一个浮点数，或是一个(connect timeout, read timeout) 元组
			
 
				-        @param allow_redirects : Boolean. True 表示允许跟踪 POST/PUT/DELETE 方法的重定向
			
 
				-        @param proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
			
 
				-        @param verify: 为 True 时将会验证 SSL 证书
			
 
				-        @param stream: 如果为 False，将会立即下载响应内容
			
 
				-        @param cert:
			
 
				-        --
			
 
				-        @param **kwargs: 其他值: 如 Request(item=item) 则item可直接用 request.item 取出
			
 
				-        ---------
			
 
				-        @result:
			
 
				-        """
			
 
				-
			
 
				-        self.url = url
			
 
				-        self.method = None
			
 
				-        self.retry_times = retry_times
			
 
				-        self.priority = priority
			
 
				-        self.parser_name = parser_name
			
 
				-        self.callback = callback
			
 
				-        self.filter_repeat = filter_repeat
			
 
				-        self.auto_request = auto_request
			
 
				-        self.request_sync = request_sync
			
 
				-        self.use_session = use_session
			
 
				-        self.random_user_agent = random_user_agent
			
 
				-        self.download_midware = download_midware
			
 
				-        self.is_abandoned = is_abandoned
			
 
				-        self.render = render
			
 
				-        self.render_time = render_time
			
 
				-        self.make_absolute_links = (
			
 
				-            make_absolute_links
			
 
				-            if make_absolute_links is not None
			
 
				-            else setting.MAKE_ABSOLUTE_LINKS
			
 
				-        )
			
 
				-
			
 
				-        # 自定义属性，不参与序列化
			
 
				-        self.requests_kwargs = {}
			
 
				-        for key, value in kwargs.items():
			
 
				-            if key in self.__class__.__REQUEST_ATTRS__:  # 取requests参数
			
 
				-                self.requests_kwargs[key] = value
			
 
				-
			
 
				-            self.__dict__[key] = value
			
 
				-
			
 
				-        self.custom_ua = False
			
 
				-        self.custom_proxies = False
			
 
				-
			
 
				-    def __repr__(self):
			
 
				-        try:
			
 
				-            return "<Request {}>".format(self.url)
			
 
				-        except:
			
 
				-            return "<Request {}>".format(str(self.to_dict)[:40])
			
 
				-
			
 
				-    def __setattr__(self, key, value):
			
 
				-        """
			
 
				-        针对 request.xxx = xxx 的形式，更新reqeust及内部参数值
			
 
				-        @param key:
			
 
				-        @param value:
			
 
				-        @return:
			
 
				-        """
			
 
				-        self.__dict__[key] = value
			
 
				-
			
 
				-        if key in self.__class__.__REQUEST_ATTRS__:
			
 
				-            self.requests_kwargs[key] = value
			
 
				-
			
 
				-    def __lt__(self, other):
			
 
				-        return self.priority < other.priority
			
 
				-
			
 
				-    @property
			
 
				-    def _proxies_pool(self):
			
 
				-        if not self.__class__.proxies_pool:
			
 
				-            self.__class__.proxies_pool = ProxyPool()
			
 
				-
			
 
				-        return self.__class__.proxies_pool
			
 
				-
			
 
				-    @property
			
 
				-    def _downloader(self):
			
 
				-        if not self.__class__.downloader:
			
 
				-            self.__class__.downloader = tools.import_cls(setting.DOWNLOADER)()
			
 
				-
			
 
				-        return self.__class__.downloader
			
 
				-
			
 
				-    @property
			
 
				-    def _session_downloader(self):
			
 
				-        if not self.__class__.session_downloader:
			
 
				-            self.__class__.session_downloader = tools.import_cls(
			
 
				-                setting.SESSION_DOWNLOADER
			
 
				-            )()
			
 
				-
			
 
				-        return self.__class__.session_downloader
			
 
				-
			
 
				-    @property
			
 
				-    def _render_downloader(self):
			
 
				-        if not self.__class__.render_downloader:
			
 
				-            self.__class__.render_downloader = tools.import_cls(
			
 
				-                setting.RENDER_DOWNLOADER
			
 
				-            )()
			
 
				-
			
 
				-        return self.__class__.render_downloader
			
 
				-
			
 
				-    @property
			
 
				-    def to_dict(self):
			
 
				-        request_dict = {}
			
 
				-
			
 
				-        self.callback = (
			
 
				-            getattr(self.callback, "__name__")
			
 
				-            if callable(self.callback)
			
 
				-            else self.callback
			
 
				-        )
			
 
				-
			
 
				-        if isinstance(self.download_midware, (tuple, list)):
			
 
				-            self.download_midware = [
			
 
				-                getattr(download_midware, "__name__")
			
 
				-                if callable(download_midware)
			
 
				-                else download_midware
			
 
				-                for download_midware in self.download_midware
			
 
				-            ]
			
 
				-        else:
			
 
				-            self.download_midware = (
			
 
				-                getattr(self.download_midware, "__name__")
			
 
				-                if callable(self.download_midware)
			
 
				-                else self.download_midware
			
 
				-            )
			
 
				-
			
 
				-        for key, value in self.__dict__.items():
			
 
				-            if (
			
 
				-                key in self.__class__._DEFAULT_KEY_VALUE_
			
 
				-                and self.__class__._DEFAULT_KEY_VALUE_.get(key) == value
			
 
				-                or key in self.__class__._CUSTOM_PROPERTIES_
			
 
				-            ):
			
 
				-                continue
			
 
				-
			
 
				-            if value is not None:
			
 
				-                if key in self.__class__.__REQUEST_ATTRS__:
			
 
				-                    if not isinstance(
			
 
				-                        value, (bytes, bool, float, int, str, tuple, list, dict)
			
 
				-                    ):
			
 
				-                        value = tools.dumps_obj(value)
			
 
				-                else:
			
 
				-                    if not isinstance(value, (bytes, bool, float, int, str)):
			
 
				-                        value = tools.dumps_obj(value)
			
 
				-
			
 
				-            request_dict[key] = value
			
 
				-
			
 
				-        return request_dict
			
 
				-
			
 
				-    @property
			
 
				-    def callback_name(self):
			
 
				-        return (
			
 
				-            getattr(self.callback, "__name__")
			
 
				-            if callable(self.callback)
			
 
				-            else self.callback
			
 
				-        )
			
 
				-
			
 
				-    def make_requests_kwargs(self):
			
 
				-        """
			
 
				-        处理参数
			
 
				-        """
			
 
				-        # 设置超时默认时间
			
 
				-        self.requests_kwargs.setdefault(
			
 
				-            "timeout", setting.REQUEST_TIMEOUT
			
 
				-        )  # connect=22 read=22
			
 
				-
			
 
				-        # 设置stream
			
 
				-        # 默认情况下，当你进行网络请求后，响应体会立即被下载。
			
 
				-        # stream=True是，调用Response.content 才会下载响应体，默认只返回header。
			
 
				-        # 缺点： stream 设为 True，Requests 无法将连接释放回连接池，除非消耗了所有的数据，或者调用了 Response.close。 这样会带来连接效率低下的问题。
			
 
				-        self.requests_kwargs.setdefault("stream", True)
			
 
				-
			
 
				-        # 关闭证书验证
			
 
				-        self.requests_kwargs.setdefault("verify", False)
			
 
				-
			
 
				-        # 设置请求方法
			
 
				-        method = self.__dict__.get("method")
			
 
				-        if not method:
			
 
				-            if "data" in self.requests_kwargs or "json" in self.requests_kwargs:
			
 
				-                method = "POST"
			
 
				-            else:
			
 
				-                method = "GET"
			
 
				-        self.method = method
			
 
				-
			
 
				-        # 设置user—agent
			
 
				-        headers = self.requests_kwargs.get("headers", {})
			
 
				-        if "user-agent" not in headers and "User-Agent" not in headers:
			
 
				-            if self.random_user_agent and setting.RANDOM_HEADERS:
			
 
				-                # 随机user—agent
			
 
				-                ua = self.__class__.user_agent_pool.get(setting.USER_AGENT_TYPE)
			
 
				-                headers.update({"User-Agent": ua})
			
 
				-                self.requests_kwargs.update(headers=headers)
			
 
				-            else:
			
 
				-                # 使用默认的user—agent
			
 
				-                self.requests_kwargs.setdefault(
			
 
				-                    "headers", {"User-Agent": setting.DEFAULT_USERAGENT}
			
 
				-                )
			
 
				-        else:
			
 
				-            self.custom_ua = True
			
 
				-
			
 
				-        # 代理
			
 
				-        proxies = self.requests_kwargs.get("proxies", -1)
			
 
				-        if proxies == -1 and setting.PROXY_ENABLE and setting.PROXY_EXTRACT_API:
			
 
				-            while True:
			
 
				-                proxies = self._proxies_pool.get()
			
 
				-                if proxies:
			
 
				-                    self.requests_kwargs.update(proxies=proxies)
			
 
				-                    break
			
 
				-                else:
			
 
				-                    log.debug("暂无可用代理 ...")
			
 
				-        else:
			
 
				-            self.custom_proxies = True
			
 
				-
			
 
				-    def get_response(self, save_cached=False):
			
 
				-        """
			
 
				-        获取带有selector功能的response
			
 
				-        @param save_cached: 保存缓存 方便调试时不用每次都重新下载
			
 
				-        @return:
			
 
				-        """
			
 
				-        self.make_requests_kwargs()
			
 
				-
			
 
				-        log.debug(
			
 
				-            """
			
 
				-                -------------- %srequest for ----------------
			
 
				-                url  = %s
			
 
				-                method = %s
			
 
				-                args = %s
			
 
				-                """
			
 
				-            % (
			
 
				-                ""
			
 
				-                if not self.parser_name
			
 
				-                else "%s.%s "
			
 
				-                % (
			
 
				-                    self.parser_name,
			
 
				-                    (
			
 
				-                        self.callback
			
 
				-                        and callable(self.callback)
			
 
				-                        and getattr(self.callback, "__name__")
			
 
				-                        or self.callback
			
 
				-                    )
			
 
				-                    or "parse",
			
 
				-                ),
			
 
				-                self.url,
			
 
				-                self.method,
			
 
				-                self.requests_kwargs,
			
 
				-            )
			
 
				-        )
			
 
				-
			
 
				-        # def hooks(response, *args, **kwargs):
			
 
				-        #     print(response.url)
			
 
				-        #
			
 
				-        # self.requests_kwargs.update(hooks={'response': hooks})
			
 
				-
			
 
				-        # self.use_session 优先级高
			
 
				-        use_session = (
			
 
				-            setting.USE_SESSION if self.use_session is None else self.use_session
			
 
				-        )
			
 
				-
			
 
				-        if self.render:
			
 
				-            response = self._render_downloader.download(self)
			
 
				-        elif use_session:
			
 
				-            response = self._session_downloader.download(self)
			
 
				-        else:
			
 
				-            response = self._downloader.download(self)
			
 
				-
			
 
				-        response.make_absolute_links = self.make_absolute_links
			
 
				-
			
 
				-        if save_cached:
			
 
				-            self.save_cached(response, expire_time=self.__class__.cached_expire_time)
			
 
				-
			
 
				-        return response
			
 
				-
			
 
				-    def get_params(self):
			
 
				-        return self.requests_kwargs.get("params")
			
 
				-
			
 
				-    def get_proxies(self) -> dict:
			
 
				-        """
			
 
				-
			
 
				-        Returns: {"https": "https://ip:port", "http": "http://ip:port"}
			
 
				-
			
 
				-        """
			
 
				-        return self.requests_kwargs.get("proxies")
			
 
				-
			
 
				-    def get_proxy(self) -> str:
			
 
				-        """
			
 
				-
			
 
				-        Returns: ip:port
			
 
				-
			
 
				-        """
			
 
				-        proxies = self.get_proxies()
			
 
				-        if proxies:
			
 
				-            return re.sub(
			
 
				-                "http.*?//", "", proxies.get("http", "") or proxies.get("https", "")
			
 
				-            )
			
 
				-
			
 
				-    def get_headers(self) -> dict:
			
 
				-        return self.requests_kwargs.get("headers", {})
			
 
				-
			
 
				-    def get_user_agent(self) -> str:
			
 
				-        return self.get_headers().get("user_agent") or self.get_headers().get(
			
 
				-            "User-Agent"
			
 
				-        )
			
 
				-
			
 
				-    def get_cookies(self) -> dict:
			
 
				-        cookies = self.requests_kwargs.get("cookies")
			
 
				-        if cookies and isinstance(cookies, RequestsCookieJar):
			
 
				-            cookies = cookies.get_dict()
			
 
				-
			
 
				-        if not cookies:
			
 
				-            cookie_str = self.get_headers().get("Cookie") or self.get_headers().get(
			
 
				-                "cookie"
			
 
				-            )
			
 
				-            if cookie_str:
			
 
				-                cookies = tools.get_cookies_from_str(cookie_str)
			
 
				-        return cookies
			
 
				-
			
 
				-    @property
			
 
				-    def fingerprint(self):
			
 
				-        """
			
 
				-        request唯一表识
			
 
				-        @return:
			
 
				-        """
			
 
				-        url = self.__dict__.get("url", "")
			
 
				-        # url 归一化
			
 
				-        url = tools.canonicalize_url(url)
			
 
				-        args = [url]
			
 
				-
			
 
				-        for arg in ["params", "data", "files", "auth", "cert", "json"]:
			
 
				-            if self.requests_kwargs.get(arg):
			
 
				-                args.append(self.requests_kwargs.get(arg))
			
 
				-
			
 
				-        return tools.get_md5(*args)
			
 
				-
			
 
				-    @property
			
 
				-    def _cache_db(self):
			
 
				-        if not self.__class__.cache_db:
			
 
				-            self.__class__.cache_db = RedisDB()  # .from_url(setting.pika_spider_1_uri)
			
 
				-
			
 
				-        return self.__class__.cache_db
			
 
				-
			
 
				-    @property
			
 
				-    def _cached_redis_key(self):
			
 
				-        if self.__class__.cached_redis_key:
			
 
				-            return (
			
 
				-                f"response_cached:{self.__class__.cached_redis_key}:{self.fingerprint}"
			
 
				-            )
			
 
				-        else:
			
 
				-            return f"response_cached:test:{self.fingerprint}"
			
 
				-
			
 
				-    def save_cached(self, response, expire_time=1200):
			
 
				-        """
			
 
				-        使用redis保存response 用于调试 不用每回都下载
			
 
				-        @param response:
			
 
				-        @param expire_time: 过期时间
			
 
				-        @return:
			
 
				-        """
			
 
				-
			
 
				-        self._cache_db.strset(self._cached_redis_key, response.to_dict, ex=expire_time)
			
 
				-
			
 
				-    def get_response_from_cached(self, save_cached=True):
			
 
				-        """
			
 
				-        从缓存中获取response
			
 
				-        注意：
			
 
				-            属性值为空：
			
 
				-                -raw ： urllib3.response.HTTPResponse
			
 
				-                -connection：requests.adapters.HTTPAdapter
			
 
				-                -history
			
 
				-
			
 
				-            属性含义改变：
			
 
				-                - request 由requests 改为Request
			
 
				-        @param: save_cached 当无缓存 直接下载 下载完是否保存缓存
			
 
				-        @return:
			
 
				-        """
			
 
				-        response_dict = self._cache_db.strget(self._cached_redis_key)
			
 
				-        if not response_dict:
			
 
				-            log.info("无response缓存  重新下载")
			
 
				-            response_obj = self.get_response(save_cached=save_cached)
			
 
				-        else:
			
 
				-            response_dict = eval(response_dict)
			
 
				-            response_obj = Response.from_dict(response_dict)
			
 
				-        return response_obj
			
 
				-
			
 
				-    def del_response_cached(self):
			
 
				-        self._cache_db.clear(self._cached_redis_key)
			
 
				-
			
 
				-    @classmethod
			
 
				-    def from_dict(cls, request_dict):
			
 
				-        for key, value in request_dict.items():
			
 
				-            if isinstance(value, bytes):  # 反序列化 如item
			
 
				-                request_dict[key] = tools.loads_obj(value)
			
 
				-
			
 
				-        return cls(**request_dict)
			
 
				-
			
 
				-    def copy(self):
			
 
				-        return self.__class__.from_dict(copy.deepcopy(self.to_dict))
			
--- a/A数据处理/site_monitor/network/response.py
+++ b/A数据处理/site_monitor/network/response.py
@@ -1,414 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2023-05-10 
			
 
				----------
			
 
				-@summary:  
			
 
				----------
			
 
				-@author: Dzr
			
 
				-"""
			
 
				-import copy
			
 
				-import datetime
			
 
				-import os
			
 
				-import re
			
 
				-import time
			
 
				-from urllib.parse import urlparse, urlunparse, urljoin
			
 
				-
			
 
				-from bs4 import BeautifulSoup
			
 
				-from bs4.dammit import UnicodeDammit
			
 
				-from lxml.html import fromstring, HtmlElement
			
 
				-from lxml.html.clean import Cleaner
			
 
				-from parsel import Selector
			
 
				-from requests.cookies import RequestsCookieJar
			
 
				-from requests.models import Response as res
			
 
				-from w3lib.encoding import (
			
 
				-    http_content_type_encoding,
			
 
				-    html_body_declared_encoding
			
 
				-)
			
 
				-import utils.tools as tools
			
 
				-from utils.log import logger as log
			
 
				-
			
 
				-FAIL_ENCODING = "ISO-8859-1"
			
 
				-
			
 
				-# html 源码中的特殊字符，需要删掉，否则会影响etree的构建
			
 
				-SPECIAL_CHARACTERS = [
			
 
				-    # 移除控制字符 全部字符列表 https://zh.wikipedia.org/wiki/%E6%8E%A7%E5%88%B6%E5%AD%97%E7%AC%A6
			
 
				-    "[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]"
			
 
				-]
			
 
				-
			
 
				-SPECIAL_CHARACTER_PATTERNS = [
			
 
				-    re.compile(special_character) for special_character in SPECIAL_CHARACTERS
			
 
				-]
			
 
				-
			
 
				-
			
 
				-def iter_node(element: HtmlElement):
			
 
				-    yield element
			
 
				-    for sub_element in element:
			
 
				-        if isinstance(sub_element, HtmlElement):
			
 
				-            yield from iter_node(sub_element)
			
 
				-
			
 
				-
			
 
				-class Response(res):
			
 
				-    def __init__(self, response):
			
 
				-        super(Response, self).__init__()
			
 
				-        self.__dict__.update(response.__dict__)
			
 
				-
			
 
				-        self._cached_selector = None
			
 
				-        self._cached_text = None
			
 
				-        self._cached_json = None
			
 
				-
			
 
				-        self._encoding = None
			
 
				-
			
 
				-        self.encoding_errors = "strict"  # strict / replace / ignore
			
 
				-
			
 
				-    @classmethod
			
 
				-    def from_dict(cls, response_dict):
			
 
				-        """
			
 
				-        利用字典获取Response对象
			
 
				-        @param response_dict: 原生的response.__dict__
			
 
				-        @return:
			
 
				-        """
			
 
				-        cookie_jar = RequestsCookieJar()
			
 
				-        cookie_jar.update(other=response_dict["cookies"])
			
 
				-        response_dict["cookies"] = cookie_jar
			
 
				-
			
 
				-        response_dict["elapsed"] = datetime.timedelta(
			
 
				-            0, 0, response_dict["elapsed"]
			
 
				-        )  # 耗时
			
 
				-        response_dict["connection"] = None
			
 
				-        response_dict["_content_consumed"] = True
			
 
				-
			
 
				-        response = res()
			
 
				-        response.__dict__.update(response_dict)
			
 
				-        return cls(response)
			
 
				-
			
 
				-    @property
			
 
				-    def to_dict(self):
			
 
				-        response_dict = {
			
 
				-            "_content": self.content,
			
 
				-            "cookies": self.cookies.get_dict(),
			
 
				-            "encoding": self.encoding,
			
 
				-            "headers": self.headers,
			
 
				-            "status_code": self.status_code,
			
 
				-            "elapsed": self.elapsed.microseconds,  # 耗时
			
 
				-            "url": self.url,
			
 
				-        }
			
 
				-
			
 
				-        return response_dict
			
 
				-
			
 
				-    def __clear_cache(self):
			
 
				-        self.__dict__["_cached_selector"] = None
			
 
				-        self.__dict__["_cached_text"] = None
			
 
				-        self.__dict__["_cached_json"] = None
			
 
				-
			
 
				-    @property
			
 
				-    def encoding(self):
			
 
				-        """
			
 
				-        编码优先级：自定义编码 > header中编码 > 页面编码 > 根据content猜测的编码
			
 
				-        """
			
 
				-        self._encoding = (
			
 
				-            self._encoding
			
 
				-            or self._headers_encoding()
			
 
				-            or self._body_declared_encoding()
			
 
				-            or self.apparent_encoding
			
 
				-        )
			
 
				-        return self._encoding
			
 
				-
			
 
				-    @encoding.setter
			
 
				-    def encoding(self, val):
			
 
				-        self.__clear_cache()
			
 
				-        self._encoding = val
			
 
				-
			
 
				-    code = encoding
			
 
				-
			
 
				-    def _headers_encoding(self):
			
 
				-        """
			
 
				-        从headers获取头部charset编码
			
 
				-        """
			
 
				-        content_type = self.headers.get("Content-Type") or self.headers.get(
			
 
				-            "content-type"
			
 
				-        )
			
 
				-        if content_type:
			
 
				-            return (
			
 
				-                http_content_type_encoding(content_type) or "utf-8"
			
 
				-                if "application/json" in content_type
			
 
				-                else None
			
 
				-            )
			
 
				-
			
 
				-    def _body_declared_encoding(self):
			
 
				-        """
			
 
				-        从html xml等获取<meta charset="编码">
			
 
				-        """
			
 
				-
			
 
				-        return html_body_declared_encoding(self.content)
			
 
				-
			
 
				-    def _get_unicode_html(self, html):
			
 
				-        if not html or not isinstance(html, bytes):
			
 
				-            return html
			
 
				-
			
 
				-        converted = UnicodeDammit(html, is_html=True)
			
 
				-        if not converted.unicode_markup:
			
 
				-            raise Exception(
			
 
				-                "Failed to detect encoding of article HTML, tried: %s"
			
 
				-                % ", ".join(converted.tried_encodings)
			
 
				-            )
			
 
				-
			
 
				-        html = converted.unicode_markup
			
 
				-        return html
			
 
				-
			
 
				-    def _make_absolute(self, link):
			
 
				-        """Makes a given link absolute."""
			
 
				-        try:
			
 
				-
			
 
				-            link = link.strip()
			
 
				-
			
 
				-            # Parse the link with stdlib.
			
 
				-            parsed = urlparse(link)._asdict()
			
 
				-
			
 
				-            # If link is relative, then join it with base_url.
			
 
				-            if not parsed["netloc"]:
			
 
				-                return urljoin(self.url, link)
			
 
				-
			
 
				-            # Link is absolute; if it lacks a scheme, add one from base_url.
			
 
				-            if not parsed["scheme"]:
			
 
				-                parsed["scheme"] = urlparse(self.url).scheme
			
 
				-
			
 
				-                # Reconstruct the URL to incorporate the new scheme.
			
 
				-                parsed = (v for v in parsed.values())
			
 
				-                return urlunparse(parsed)
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            log.error(
			
 
				-                "Invalid URL <{}> can't make absolute_link. exception: {}".format(
			
 
				-                    link, e
			
 
				-                )
			
 
				-            )
			
 
				-
			
 
				-        # Link is absolute and complete with scheme; nothing to be done here.
			
 
				-        return link
			
 
				-
			
 
				-    def _absolute_links(self, text):
			
 
				-        regexs = [
			
 
				-            r'(<(?i)a.*?href\s*?=\s*?["\'])(.+?)(["\'])',  # a
			
 
				-            r'(<(?i)img.*?src\s*?=\s*?["\'])(.+?)(["\'])',  # img
			
 
				-            r'(<(?i)link.*?href\s*?=\s*?["\'])(.+?)(["\'])',  # css
			
 
				-            r'(<(?i)script.*?src\s*?=\s*?["\'])(.+?)(["\'])',  # js
			
 
				-        ]
			
 
				-
			
 
				-        for regex in regexs:
			
 
				-
			
 
				-            def replace_href(text):
			
 
				-                # html = text.group(0)
			
 
				-                link = text.group(2)
			
 
				-                absolute_link = self._make_absolute(link)
			
 
				-
			
 
				-                # return re.sub(regex, r'\1{}\3'.format(absolute_link), html) # 使用正则替换，个别字符不支持。如该网址源代码http://permit.mep.gov.cn/permitExt/syssb/xxgk/xxgk!showImage.action?dataid=0b092f8115ff45c5a50947cdea537726
			
 
				-                return text.group(1) + absolute_link + text.group(3)
			
 
				-
			
 
				-            text = re.sub(regex, replace_href, text, flags=re.S)
			
 
				-
			
 
				-        return text
			
 
				-
			
 
				-    def _del_special_character(self, text):
			
 
				-        """
			
 
				-        删除特殊字符
			
 
				-        """
			
 
				-        for special_character_pattern in SPECIAL_CHARACTER_PATTERNS:
			
 
				-            text = special_character_pattern.sub("", text)
			
 
				-
			
 
				-        return text
			
 
				-
			
 
				-    @property
			
 
				-    def __text(self):
			
 
				-        """Content of the response, in unicode.
			
 
				-
			
 
				-        If Response.encoding is None, encoding will be guessed using
			
 
				-        ``chardet``.
			
 
				-
			
 
				-        The encoding of the response content is determined based solely on HTTP
			
 
				-        headers, following RFC 2616 to the letter. If you can take advantage of
			
 
				-        non-HTTP knowledge to make a better guess at the encoding, you should
			
 
				-        set ``r.encoding`` appropriately before accessing this property.
			
 
				-        """
			
 
				-
			
 
				-        if not self.content:
			
 
				-            return ""
			
 
				-
			
 
				-        # Decode unicode from given encoding.
			
 
				-        try:
			
 
				-            content = str(self.content, self.encoding, errors=self.encoding_errors)
			
 
				-        except (LookupError, TypeError):
			
 
				-            # A LookupError is raised if the encoding was not found which could
			
 
				-            # indicate a misspelling or similar mistake.
			
 
				-            #
			
 
				-            # A TypeError can be raised if encoding is None
			
 
				-            #
			
 
				-            # So we try blindly encoding.
			
 
				-            content = str(self.content, errors=self.encoding_errors)
			
 
				-
			
 
				-        return content
			
 
				-
			
 
				-    @property
			
 
				-    def text(self):
			
 
				-        if self._cached_text is None:
			
 
				-            if self.encoding and self.encoding.upper() != FAIL_ENCODING:
			
 
				-                try:
			
 
				-                    self._cached_text = self.__text
			
 
				-                except UnicodeDecodeError:
			
 
				-                    self._cached_text = self._get_unicode_html(self.content)
			
 
				-            else:
			
 
				-                self._cached_text = self._get_unicode_html(self.content)
			
 
				-
			
 
				-            if self._cached_text:
			
 
				-                self._cached_text = self._absolute_links(self._cached_text)
			
 
				-                self._cached_text = self._del_special_character(self._cached_text)
			
 
				-
			
 
				-        return self._cached_text
			
 
				-
			
 
				-    @text.setter
			
 
				-    def text(self, html):
			
 
				-        self._cached_text = html
			
 
				-        self._cached_text = self._absolute_links(self._cached_text)
			
 
				-        self._cached_text = self._del_special_character(self._cached_text)
			
 
				-        self._cached_selector = Selector(self.text)
			
 
				-
			
 
				-    @property
			
 
				-    def json(self, **kwargs):
			
 
				-        if self._cached_json is None:
			
 
				-            self.encoding = self.encoding or "utf-8"
			
 
				-            self._cached_json = super(Response, self).json(**kwargs)
			
 
				-
			
 
				-        return self._cached_json
			
 
				-
			
 
				-    @property
			
 
				-    def content(self):
			
 
				-        content = super(Response, self).content
			
 
				-        return content
			
 
				-
			
 
				-    @property
			
 
				-    def is_html(self):
			
 
				-        content_type = self.headers.get("Content-Type", "")
			
 
				-        if "text/html" in content_type:
			
 
				-            return True
			
 
				-        else:
			
 
				-            return False
			
 
				-
			
 
				-    @property
			
 
				-    def selector(self):
			
 
				-        if self._cached_selector is None:
			
 
				-            self._cached_selector = Selector(self.text)
			
 
				-        return self._cached_selector
			
 
				-
			
 
				-    def bs4(self, features="html.parser"):
			
 
				-        soup = BeautifulSoup(self.text, features)
			
 
				-        return soup
			
 
				-
			
 
				-    def extract(self):
			
 
				-        return self.selector.get()
			
 
				-
			
 
				-    def xpath(self, query, **kwargs):
			
 
				-        return self.selector.xpath(query, **kwargs)
			
 
				-
			
 
				-    def css(self, query):
			
 
				-        return self.selector.css(query)
			
 
				-
			
 
				-    def re(self, regex, replace_entities=False):
			
 
				-        """
			
 
				-        @summary: 正则匹配
			
 
				-        注意：网页源码<a class='page-numbers'...  会被处理成<a class="page-numbers" ； 写正则时要写<a class="(.*?)"。 但不会改非html的文本引号格式
			
 
				-        为了使用方便，正则单双引号自动处理为不敏感
			
 
				-        ---------
			
 
				-        @param regex: 正则或者re.compile
			
 
				-        @param replace_entities: 为True时 去掉&nbsp;等字符， 转义&quot;为 " 等， 会使网页结构发生变化。如在网页源码中提取json， 建议设置成False
			
 
				-        ---------
			
 
				-        @result: 列表
			
 
				-        """
			
 
				-
			
 
				-        # 将单双引号设置为不敏感
			
 
				-        if isinstance(regex, str):
			
 
				-            regex = re.sub("['\"]", "['\"]", regex)
			
 
				-
			
 
				-        return self.selector.re(regex, replace_entities)
			
 
				-
			
 
				-    def re_first(self, regex, default=None, replace_entities=False):
			
 
				-        """
			
 
				-        @summary: 正则匹配
			
 
				-        注意：网页源码<a class='page-numbers'...  会被处理成<a class="page-numbers" ； 写正则时要写<a class="(.*?)"。 但不会改非html的文本引号格式
			
 
				-        为了使用方便，正则单双引号自动处理为不敏感
			
 
				-        ---------
			
 
				-        @param regex: 正则或者re.compile
			
 
				-        @param default: 未匹配到， 默认值
			
 
				-        @param replace_entities: 为True时 去掉&nbsp;等字符， 转义&quot;为 " 等， 会使网页结构发生变化。如在网页源码中提取json， 建议设置成False
			
 
				-        ---------
			
 
				-        @result: 第一个值或默认值
			
 
				-        """
			
 
				-
			
 
				-        # 将单双引号设置为不敏感
			
 
				-        if isinstance(regex, str):
			
 
				-            regex = re.sub("['\"]", "['\"]", regex)
			
 
				-
			
 
				-        return self.selector.re_first(regex, default, replace_entities)
			
 
				-
			
 
				-    def close_browser(self, request):
			
 
				-        if hasattr(self, "browser"):
			
 
				-            request._render_downloader.webdriver_pool.remove(self.browser)
			
 
				-            del self.browser
			
 
				-
			
 
				-    def __del__(self):
			
 
				-        self.close()
			
 
				-
			
 
				-    def open(self, delete_temp_file=False):
			
 
				-        with open("temp.html", "w", encoding=self.encoding, errors="replace") as html:
			
 
				-            self.encoding_errors = "replace"
			
 
				-            html.write(self.text)
			
 
				-
			
 
				-        os.system("open temp.html")
			
 
				-
			
 
				-        if delete_temp_file:
			
 
				-            time.sleep(1)
			
 
				-            os.remove("temp.html")
			
 
				-
			
 
				-    @property
			
 
				-    def plain_text(self):
			
 
				-        return re.findall('[\u4e00-\u9fa5]', self.text, re.S)
			
 
				-
			
 
				-    def tags(self):
			
 
				-        tags_dict = {}
			
 
				-
			
 
				-        html = copy.deepcopy(self.text)
			
 
				-        if len(html) == 0:
			
 
				-            tags_dict['tags_count'] = 0
			
 
				-            return tags_dict
			
 
				-
			
 
				-        cleaner = Cleaner()
			
 
				-        html = cleaner.clean_html(html)
			
 
				-
			
 
				-        count = 0
			
 
				-        node = fromstring(html)
			
 
				-        for elem in iter_node(node.xpath('/html')[0]):
			
 
				-            count += 1
			
 
				-            tag = elem.tag
			
 
				-            if not tags_dict.get(tag):
			
 
				-                tags_dict[tag] = 1
			
 
				-            else:
			
 
				-                tags_dict[tag] += 1
			
 
				-
			
 
				-        tags_dict['tags_count'] = count
			
 
				-        return tags_dict
			
 
				-
			
 
				-    def title(self):
			
 
				-        title_text = self.xpath('//title/text()').extract_first("")
			
 
				-
			
 
				-        htag = '//h1//text() | //h2//text() | //h3//text() | //h4//text()'
			
 
				-        h_tag_texts_list = self.xpath(htag).extract()
			
 
				-        htag_text = h_tag_texts_list[0] if len(h_tag_texts_list) > 0 else ''
			
 
				-
			
 
				-        news_title = ''
			
 
				-        for h_tag_text in h_tag_texts_list:
			
 
				-            lcs = tools.get_longest_common_sub_string(title_text, h_tag_text)
			
 
				-            if len(lcs) > len(news_title):
			
 
				-                news_title = lcs
			
 
				-
			
 
				-        news_title = news_title if len(news_title) > 8 else ''
			
 
				-
			
 
				-        title = (news_title or title_text or htag_text)
			
 
				-        return title.strip()
			
--- a/A数据处理/site_monitor/network/user_agent.py
+++ b/A数据处理/site_monitor/network/user_agent.py
@@ -1,389 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2016-12-28 17:55
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import random
			
 
				-
			
 
				-USER_AGENTS = {
			
 
				-    "chrome": [
			
 
				-        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
			
 
				-        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
			
 
				-        "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
			
 
				-        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
			
 
				-        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
			
 
				-        "Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
			
 
				-        "Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
			
 
				-        "Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14",
			
 
				-    ],
			
 
				-    "opera": [
			
 
				-        "Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
			
 
				-        "Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
			
 
				-        "Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
			
 
				-        "Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
			
 
				-        "Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
			
 
				-        "Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
			
 
				-        "Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
			
 
				-        "Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
			
 
				-        "Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
			
 
				-        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
			
 
				-        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
			
 
				-        "Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
			
 
				-        "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
			
 
				-        "Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
			
 
				-        "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
			
 
				-        "Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
			
 
				-        "Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
			
 
				-        "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
			
 
				-        "Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
			
 
				-        "Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
			
 
				-        "Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
			
 
				-        "Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
			
 
				-        "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
			
 
				-        "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
			
 
				-    ],
			
 
				-    "firefox": [
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
			
 
				-        "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
			
 
				-        "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
			
 
				-        "Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
			
 
				-        "Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101  Firefox/28.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
			
 
				-        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
			
 
				-        "Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
			
 
				-        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
			
 
				-        "Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0)  Gecko/20100101 Firefox/18.0",
			
 
				-        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
			
 
				-    ],
			
 
				-    "internetexplorer": [
			
 
				-        "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
			
 
				-        "Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0;  rv:11.0) like Gecko",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
			
 
				-        "Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
			
 
				-        "Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
			
 
				-        "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
			
 
				-        "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; Tablet PC 2.0; InfoPath.3; .NET4.0C; .NET4.0E)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; yie8)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET CLR 1.1.4322; .NET4.0C; Tablet PC 2.0)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; FunWebProducts)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/13.0.782.215)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/11.0.696.57)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) chromeframe/10.0.648.205",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.1; SV1; .NET CLR 2.8.52393; WOW64; en-US)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; chromeframe/11.0.696.57)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/4.0; GTB7.4; InfoPath.3; SV1; .NET CLR 3.1.76908; WOW64; en-US)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.8.36217; WOW64; en-US)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; .NET CLR 2.7.58687; SLCC2; Media Center PC 5.0; Zune 3.4; Tablet PC 3.6; InfoPath.3)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 1.1.4322)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; SLCC1; .NET CLR 1.1.4322)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 3.0.04506.30)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.0; Trident/4.0; FBSMTWB; .NET CLR 2.0.34861; .NET CLR 3.0.3746.3218; .NET CLR 3.5.33652; msn OptimizedIE8;ENUS)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.2; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; Media Center PC 6.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.2)",
			
 
				-        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 3.0)",
			
 
				-    ],
			
 
				-    "safari": [
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; ko-KR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; fr-FR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; cs-CZ) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; sv-se) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ko-kr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; it-it) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-fr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; es-es) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-gb) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; de-de) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; sv-SE) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; hu-HU) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 5.1; it-IT) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-us) AppleWebKit/534.16+ (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-ch) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; de-de) AppleWebKit/534.15+ (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; ar) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Android 2.2; Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-HK) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; tr-TR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; nb-NO) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-TW) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; zh-cn) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
			
 
				-    ],
			
 
				-    "mobile": [
			
 
				-        "Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/14.2 Safari/536.2+",
			
 
				-        "Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/14.2 Safari/536.2+",
			
 
				-        "Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/14.2 Mobile Safari/537.10+",
			
 
				-        "Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/14.2 Mobile Safari/537.10+",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
			
 
				-        "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/14.2 Mobile/14E304 Safari/602.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/14.2 Mobile/14E304 Safari/602.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
			
 
				-        "Mozilla/5.0 (Mobile; LYF/F300B/LYF-F300B-001-01-15-130718-i;Android; rv:89.0 Gecko/48.0 Firefox/90.0 KAIOS/2.5",
			
 
				-        "Mozilla/5.0 (Mobile; LYF/F300B/LYF-F300B-001-01-15-130718-i;Android; rv:89.0 Gecko/48.0 Firefox/90.0 KAIOS/2.5",
			
 
				-        "Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true",
			
 
				-        "Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
			
 
				-        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
			
 
				-        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
			
 
				-        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
			
 
				-        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)",
			
 
				-        "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)",
			
 
				-        "Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13",
			
 
				-        "Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 11; Pixel 4a (5G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 11; Pixel 4a (5G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36 Edg/93.0.4576.0",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0 Gecko/20100101 Firefox/90.0",
			
 
				-        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Safari/605.1.15",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36 Edg/93.0.4576.0",
			
 
				-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0 Gecko/20100101 Firefox/90.0",
			
 
				-    ],
			
 
				-}
			
 
				-
			
 
				-
			
 
				-def get(ua_type: str = None):
			
 
				-    if not ua_type:
			
 
				-        ua_type = random.choice(list(USER_AGENTS.keys()))
			
 
				-    elif ua_type not in USER_AGENTS:
			
 
				-        raise ValueError(
			
 
				-            "ua_type error, expect one of {}".format(list(USER_AGENTS.keys()))
			
 
				-        )
			
 
				-
			
 
				-    return random.choice(USER_AGENTS[ua_type])
			
--- a/A数据处理/site_monitor/requirements.txt
+++ b/A数据处理/site_monitor/requirements.txt
@@ -1,14 +0,0 @@
 
				-beautifulsoup4==4.9.3
			
 
				-bs4==0.0.1
			
 
				-loguru==0.5.3
			
 
				-lxml==4.9.1
			
 
				-numpy==1.24.1
			
 
				-parsel==1.7.0
			
 
				-playwright==1.24.1
			
 
				-pymongo==3.12.0
			
 
				-redis==3.5.3
			
 
				-requests==2.30.0
			
 
				-six==1.16.0
			
 
				-w3lib==2.1.1
			
 
				-PyExecJS>=1.5.1
			
 
				-redis-py-cluster>=2.1.0
			
--- a/A数据处理/site_monitor/setting.py
+++ b/A数据处理/site_monitor/setting.py
@@ -1,65 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""爬虫配置文件"""
			
 
				-import os
			
 
				-
			
 
				-# MONGODB
			
 
				-MONGO_IP = "172.17.4.87"
			
 
				-MONGO_PORT = 27080
			
 
				-MONGO_DB = "py_spider"
			
 
				-MONGO_USER_NAME = os.getenv("MONGO_USER_NAME")
			
 
				-MONGO_USER_PASS = os.getenv("MONGO_USER_PASS")
			
 
				-
			
 
				-# REDIS
			
 
				-# ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"]
			
 
				-REDISDB_IP_PORTS = "172.17.4.232:7361"
			
 
				-REDISDB_USER_PASS = "k5ZJR5KV4q7DRZ92DQ"
			
 
				-REDISDB_DB = 4
			
 
				-# 适用于redis哨兵模式
			
 
				-REDISDB_SERVICE_NAME = os.getenv("REDISDB_SERVICE_NAME")
			
 
				-
			
 
				-# 浏览器渲染
			
 
				-PLAYWRIGHT = dict(
			
 
				-    user_agent=None,  # 字符串 或 无参函数，返回值为user_agent
			
 
				-    proxy=None,  # xxx.xxx.xxx.xxx:xxxx 或 无参函数，返回值为代理地址
			
 
				-    headless=True,  # 是否为无头浏览器
			
 
				-    driver_type="webkit",  # chromium、firefox、webkit
			
 
				-    timeout=60,  # 请求超时时间
			
 
				-    window_size=(1024, 800),  # 窗口大小
			
 
				-    executable_path=None,  # 浏览器路径，默认为默认路径
			
 
				-    download_path=None,  # 下载文件的路径
			
 
				-    render_time=0,  # 渲染时长，即打开网页等待指定时间后再获取源码
			
 
				-    wait_until="networkidle",  # 等待页面加载完成的事件,可选值："commit", "domcontentloaded", "load", "networkidle"
			
 
				-    use_stealth_js=False,  # 使用stealth.min.js隐藏浏览器特征
			
 
				-    page_on_event_callback=None,  # page.on() 事件的回调 如 page_on_event_callback={"dialog": lambda dialog: dialog.accept()}
			
 
				-    storage_state_path=None,  # 保存浏览器状态的路径
			
 
				-    url_regexes=None,  # 拦截接口，支持正则，数组类型
			
 
				-    save_all=False,  # 是否保存所有拦截的接口, 配合url_regexes使用，为False时只保存最后一次拦截的接口
			
 
				-)
			
 
				-
			
 
				-# request网络请求超时时间
			
 
				-REQUEST_TIMEOUT = 30  # 等待服务器响应的超时时间，浮点数，或(connect timeout, read timeout)元组
			
 
				-
			
 
				-# 设置代理
			
 
				-PROXY_EXTRACT_API = "http://proxy.spdata.jianyu360.com/proxy/getallip"  # 代理提取API ，返回的代理分割符为\r\n
			
 
				-PROXY_ENABLE = True
			
 
				-
			
 
				-# 随机headers
			
 
				-RANDOM_HEADERS = True
			
 
				-# UserAgent类型 支持 'chrome', 'opera', 'firefox', 'internetexplorer', 'safari'，'mobile' 若不指定则随机类型
			
 
				-USER_AGENT_TYPE = "chrome"
			
 
				-# 默认使用的浏览器头
			
 
				-DEFAULT_USERAGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"
			
 
				-# requests 使用session
			
 
				-USE_SESSION = False
			
 
				-
			
 
				-# 下载
			
 
				-DOWNLOADER = "network.downloader.RequestsDownloader"
			
 
				-SESSION_DOWNLOADER = "network.downloader.RequestsSessionDownloader"
			
 
				-RENDER_DOWNLOADER = "network.downloader.PlaywrightDownloader"
			
 
				-MAKE_ABSOLUTE_LINKS = True  # 自动转成绝对连接
			
 
				-
			
 
				-# 企业微信报警
			
 
				-WECHAT_WARNING_URL = ""  # 企业微信机器人api
			
 
				-WECHAT_WARNING_PHONE = ""  # 报警人 将会在群内@此人, 支持列表，可指定多人
			
 
				-WECHAT_WARNING_ALL = False  # 是否提示所有人， 默认为False
			
 
				-WARNING_INTERVAL = 3600  # 相同报警的报警时间间隔，防止刷屏; 0表示不去重
			
--- a/A数据处理/site_monitor/utils/__init__.py
+++ b/A数据处理/site_monitor/utils/__init__.py
@@ -1,8 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2023-05-10 
			
 
				----------
			
 
				-@summary:  
			
 
				----------
			
 
				-@author: Dzr
			
 
				-"""
			
--- a/A数据处理/site_monitor/utils/clean_html.py
+++ b/A数据处理/site_monitor/utils/clean_html.py
@@ -1,147 +0,0 @@
 
				-import re
			
 
				-__all__ = ['cleaner']
			
 
				-
			
 
				-# 独立元素
			
 
				-INDEPENDENT_TAGS = {
			
 
				-    '<head>[\s\S]*?</head>': '',
			
 
				-    '<html>|<html [^>]*>|</html>': '',
			
 
				-    '<body>|<body [^>]*>|</body>': '',
			
 
				-    '<meta[^<>]*>|<meta [^<>]*>|<meta[^<>]*>[\s\S]*?</meta>|</meta>': '',  # 元数据
			
 
				-    '&(nbsp|e[mn]sp|thinsp|zwn?j|#13);': '',  # 空格
			
 
				-    '\\xa0|\\u3000': '',  # 空格
			
 
				-    '<!--[\s\S]*?-->': '',  # 注释
			
 
				-    '<style[^<>]*>[\s\S]*?</style>': '',  # 样式
			
 
				-    '<script[^<>]*>[\s\S]*?</script>': '',  # JavaScript
			
 
				-    '<input>': '',  # 输入框
			
 
				-    '<img[^>]*>': '<br>',  # 图片
			
 
				-}
			
 
				-# 行内元素
			
 
				-INLINE_TAGS = {
			
 
				-    '<a>|<a [^>]*>|</a>': '',  # 超链接
			
 
				-    '<link>|<link [^>]*>|</link>': '',  # 超链接
			
 
				-    '<span>|<span [^>]*>|</span>': '',  # span
			
 
				-    '<label>|<label [^>]*>|</label>': '<br>',  # label
			
 
				-    '<font>|<font [^>]*>|</font>': '',  # font
			
 
				-    'data:image(.*?) ': '',            # 图片base64
			
 
				-}
			
 
				-# 块级元素
			
 
				-BLOCK_TAGS = {
			
 
				-    '<div>\s*?</div>':'',
			
 
				-    '<h[1-6][^>]*>|</h[1-6]>': '',  # 标题
			
 
				-    '<p>|<p [^>]*>': '<br>',  # 段落
			
 
				-    '</p>': '',  # 段落
			
 
				-    '<div>|<div [^>]*>': '<br>',  # 分割 division
			
 
				-    '</div>': '',  # 分割 division
			
 
				-    '<o:p>|<o:p [^>]*>|</o:p>': ''  # OFFICE微软WORD段落
			
 
				-}
			
 
				-# 其他
			
 
				-OTHER = {
			
 
				-    '<?xml[^>]*>|<?xml [^>]*>|<?xml:.*?>': '',
			
 
				-    '<epointform>': '',
			
 
				-    '<!doctype html>|<!doctype html [^>]*>': '',
			
 
				-    '【关闭】|关闭': '',
			
 
				-    '【打印】|打印本页': '',
			
 
				-    '【字体：[\s\S]*】': '',
			
 
				-    '文章来源：[\u4e00-\u9fa5]+': '',
			
 
				-    '浏览次数：.*[<]+': '',
			
 
				-    '（责任编辑：.*?）': '',
			
 
				-    '分享到[：]': '',
			
 
				-
			
 
				-}
			
 
				-# 样式
			
 
				-CSS_STYLE = {
			
 
				-    'style="[\s\S]*?"|style ="[\s\S]*?"': '',
			
 
				-    'bgcolor="[\s\S]*?"|bgcolor ="[\s\S]*?"': '',
			
 
				-    'bordercolor="[\s\S]*?"|bordercolor ="[\s\S]*?"': '',
			
 
				-    'class="[\s\S]*?"|class ="[\s\S]*?"': '',
			
 
				-    'align="[\s\S]*?"|align ="[\s\S]*?"': '',
			
 
				-    'cellpadding="(\d+)"|cellspacing="(\d+)"': '',
			
 
				-
			
 
				-}
			
 
				-# 空白符
			
 
				-BLANKS = {
			
 
				-    '\n\s*\n': '\n',
			
 
				-    '\s*\n\s*': '\n',
			
 
				-    '[^\S\n]': ' ',
			
 
				-    '\s+': ' ',
			
 
				-}
			
 
				-# css标签集合
			
 
				-TAGS = {'table', 'tr', 'td', 'div', 'span', 'p'}
			
 
				-# css属性集合
			
 
				-ATTRS = {'id', 'class', 'style', 'width'}
			
 
				-
			
 
				-
			
 
				-def _repair_tag():
			
 
				-    """异常的标签组合,用来替换非标准页面的标签"""
			
 
				-    _repairs = {}
			
 
				-    for tag in TAGS:
			
 
				-        for attr in ATTRS:
			
 
				-            key = '{}{}'.format(tag, attr)
			
 
				-            val = '{} {}'.format(tag, attr)
			
 
				-            _repairs[key] = val
			
 
				-    return _repairs
			
 
				-
			
 
				-
			
 
				-def _escape_character(html):
			
 
				-    """转义字符"""
			
 
				-    html = html.replace('&lt;', '<')
			
 
				-    html = html.replace('&gt;', '>')
			
 
				-    html = html.replace('&quot;', '"')
			
 
				-    html = html.replace('&amp;', '&')
			
 
				-    # 不显示输入框边框
			
 
				-    html = html.replace('<input', '<input style="border-color: transparent;"')
			
 
				-    return html
			
 
				-
			
 
				-
			
 
				-def _lowercase_tag(html):
			
 
				-    """标签归一化处理（全部小写 + 标签修复）"""
			
 
				-    tags = re.findall("<[^>]+>", html)
			
 
				-    tag_sets = set(tags)
			
 
				-
			
 
				-    if len(tag_sets) > 10000:
			
 
				-        from bs4 import BeautifulSoup
			
 
				-        soup = BeautifulSoup(html, "lxml")
			
 
				-        html = str(soup.body.next_element)
			
 
				-    else:
			
 
				-        for tag in tag_sets:
			
 
				-            html = html.replace(tag, str(tag).lower())
			
 
				-
			
 
				-    repair_tags = _repair_tag()
			
 
				-    for err, right in repair_tags.items():
			
 
				-        html = html.replace(err, right)
			
 
				-
			
 
				-    return html
			
 
				-
			
 
				-
			
 
				-def cleaner(html, special=None, completely=False):
			
 
				-    """
			
 
				-    数据清洗
			
 
				-
			
 
				-    :param html: 清洗的页面
			
 
				-    :param special: 额外指定页面清洗规则
			
 
				-    :param completely: 是否完全清洗页面
			
 
				-    :return: 清洗后的页面源码
			
 
				-    """
			
 
				-    if special is None:
			
 
				-        special = {}
			
 
				-
			
 
				-    OTHER.update(special)
			
 
				-    remove_tags = {
			
 
				-        **INDEPENDENT_TAGS,
			
 
				-        **INLINE_TAGS,
			
 
				-        **BLOCK_TAGS,
			
 
				-        **OTHER,
			
 
				-        **CSS_STYLE,
			
 
				-        **BLANKS,
			
 
				-    }
			
 
				-    html = _lowercase_tag(html)
			
 
				-    for tag, repl in remove_tags.items():
			
 
				-        html = re.sub(tag, repl, html)
			
 
				-
			
 
				-    if completely:
			
 
				-        html = re.sub(r'<canvas[^<>]*>[\s\S]*?</canvas>', '', html)  # 画布
			
 
				-        html = re.sub(r'<iframe[^<>]*>[\s\S]*?</iframe>', '', html)  # 内框架
			
 
				-        html = re.sub('<([^<>\u4e00-\u9fa5]|微软雅黑|宋体|仿宋)+>', '', html)
			
 
				-
			
 
				-    html = _escape_character(html)
			
 
				-    return html
			
--- a/A数据处理/site_monitor/utils/js/intercept.js
+++ b/A数据处理/site_monitor/utils/js/intercept.js
--- a/A数据处理/site_monitor/utils/js/stealth.min.js
+++ b/A数据处理/site_monitor/utils/js/stealth.min.js
--- a/A数据处理/site_monitor/utils/log.py
+++ b/A数据处理/site_monitor/utils/log.py
@@ -1,14 +0,0 @@
 
				-from pathlib import Path
			
 
				-
			
 
				-from loguru import logger
			
 
				-
			
 
				-_absolute = Path(__file__).absolute().parent.parent
			
 
				-_log_path = (_absolute / 'logs/log_{time:YYYY-MM-DD}.log').resolve()
			
 
				-logger.add(
			
 
				-    _log_path,
			
 
				-    format='{time:YYYY-MM-DD HH:mm:ss} - {level} - {message}',
			
 
				-    level='INFO',
			
 
				-    rotation='00:00',
			
 
				-    retention='1 week',
			
 
				-    encoding='utf-8',
			
 
				-)
			
--- a/A数据处理/site_monitor/utils/tools.py
+++ b/A数据处理/site_monitor/utils/tools.py
@@ -1,2401 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2018-09-06 14:21
			
 
				----------
			
 
				-@summary: 工具
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import asyncio
			
 
				-import calendar
			
 
				-import codecs
			
 
				-import configparser  # 读配置文件的
			
 
				-import datetime
			
 
				-import functools
			
 
				-import hashlib
			
 
				-import html
			
 
				-import importlib
			
 
				-import json
			
 
				-import os
			
 
				-import pickle
			
 
				-import random
			
 
				-import re
			
 
				-import socket
			
 
				-import ssl
			
 
				-import string
			
 
				-import sys
			
 
				-import time
			
 
				-import traceback
			
 
				-import urllib
			
 
				-import urllib.parse
			
 
				-import uuid
			
 
				-import weakref
			
 
				-from functools import partial, wraps
			
 
				-from hashlib import md5
			
 
				-from pprint import pformat
			
 
				-from pprint import pprint
			
 
				-from urllib import request
			
 
				-from urllib.parse import urljoin
			
 
				-
			
 
				-import bson
			
 
				-import execjs  # pip install PyExecJS
			
 
				-import redis
			
 
				-import requests
			
 
				-import six
			
 
				-from requests.cookies import RequestsCookieJar
			
 
				-from w3lib.url import canonicalize_url as _canonicalize_url
			
 
				-
			
 
				-import setting as setting
			
 
				-from db.redisdb import RedisDB
			
 
				-from utils.log import logger as log
			
 
				-
			
 
				-os.environ["EXECJS_RUNTIME"] = "Node"  # 设置使用node执行js
			
 
				-
			
 
				-# 全局取消ssl证书验证
			
 
				-ssl._create_default_https_context = ssl._create_unverified_context
			
 
				-
			
 
				-TIME_OUT = 30
			
 
				-TIMER_TIME = 5
			
 
				-
			
 
				-redisdb = None
			
 
				-
			
 
				-
			
 
				-def get_redisdb():
			
 
				-    global redisdb
			
 
				-    if not redisdb:
			
 
				-        redisdb = RedisDB()
			
 
				-    return redisdb
			
 
				-
			
 
				-
			
 
				-# 装饰器
			
 
				-class Singleton(object):
			
 
				-    def __init__(self, cls):
			
 
				-        self._cls = cls
			
 
				-        self._instance = {}
			
 
				-
			
 
				-    def __call__(self, *args, **kwargs):
			
 
				-        if self._cls not in self._instance:
			
 
				-            self._instance[self._cls] = self._cls(*args, **kwargs)
			
 
				-        return self._instance[self._cls]
			
 
				-
			
 
				-
			
 
				-def log_function_time(func):
			
 
				-    try:
			
 
				-
			
 
				-        @functools.wraps(func)  # 将函数的原来属性付给新函数
			
 
				-        def calculate_time(*args, **kw):
			
 
				-            began_time = time.time()
			
 
				-            callfunc = func(*args, **kw)
			
 
				-            end_time = time.time()
			
 
				-            log.debug(func.__name__ + " run time  = " + str(end_time - began_time))
			
 
				-            return callfunc
			
 
				-
			
 
				-        return calculate_time
			
 
				-    except:
			
 
				-        log.debug("求取时间无效 因为函数参数不符")
			
 
				-        return func
			
 
				-
			
 
				-
			
 
				-def run_safe_model(module_name):
			
 
				-    def inner_run_safe_model(func):
			
 
				-        try:
			
 
				-
			
 
				-            @functools.wraps(func)  # 将函数的原来属性付给新函数
			
 
				-            def run_func(*args, **kw):
			
 
				-                callfunc = None
			
 
				-                try:
			
 
				-                    callfunc = func(*args, **kw)
			
 
				-                except Exception as e:
			
 
				-                    log.error(module_name + ": " + func.__name__ + " - " + str(e))
			
 
				-                    traceback.print_exc()
			
 
				-                return callfunc
			
 
				-
			
 
				-            return run_func
			
 
				-        except Exception as e:
			
 
				-            log.error(module_name + ": " + func.__name__ + " - " + str(e))
			
 
				-            traceback.print_exc()
			
 
				-            return func
			
 
				-
			
 
				-    return inner_run_safe_model
			
 
				-
			
 
				-
			
 
				-def memoizemethod_noargs(method):
			
 
				-    """Decorator to cache the result of a method (without arguments) using a
			
 
				-    weak reference to its object
			
 
				-    """
			
 
				-    cache = weakref.WeakKeyDictionary()
			
 
				-
			
 
				-    @functools.wraps(method)
			
 
				-    def new_method(self, *args, **kwargs):
			
 
				-        if self not in cache:
			
 
				-            cache[self] = method(self, *args, **kwargs)
			
 
				-        return cache[self]
			
 
				-
			
 
				-    return new_method
			
 
				-
			
 
				-
			
 
				-########################【网页解析相关】###############################
			
 
				-def get_longest_common_sub_string(str1: str, str2: str) -> str:
			
 
				-    """
			
 
				-    获取两个字符串的最长公共子串。
			
 
				-
			
 
				-    构造一个矩阵，横向是字符串1，纵向是字符串2，例如：
			
 
				-
			
 
				-      青南是天才！？
			
 
				-    听0 0 0 0 00 0
			
 
				-    说0 0 0 0 00 0
			
 
				-    青1 0 0 0 00 0
			
 
				-    南0 1 0 0 00 0
			
 
				-    是0 0 1 0 00 0
			
 
				-    天0 0 0 1 00 0
			
 
				-    才0 0 0 0 10 0
			
 
				-    ！0 0 0 0 01 0
			
 
				-
			
 
				-    显然，只要斜对角线最长的就是最长公共子串
			
 
				-
			
 
				-    :param str1:
			
 
				-    :param str2:
			
 
				-    :return:
			
 
				-    """
			
 
				-    if not all([str1, str2]):
			
 
				-        return ''
			
 
				-    matrix = [[0] * (len(str2) + 1) for _ in range(len(str1) + 1)]
			
 
				-    max_length = 0
			
 
				-    start_position = 0
			
 
				-    for index_of_str1 in range(1, len(str1) + 1):
			
 
				-        for index_of_str2 in range(1, len(str2) + 1):
			
 
				-            if str1[index_of_str1 - 1] == str2[index_of_str2 - 1]:
			
 
				-                matrix[index_of_str1][index_of_str2] = matrix[index_of_str1 - 1][index_of_str2 - 1] + 1
			
 
				-                if matrix[index_of_str1][index_of_str2] > max_length:
			
 
				-                    max_length = matrix[index_of_str1][index_of_str2]
			
 
				-                    start_position = index_of_str1 - max_length
			
 
				-            else:
			
 
				-                matrix[index_of_str1][index_of_str2] = 0
			
 
				-    return str1[start_position: start_position + max_length]
			
 
				-
			
 
				-
			
 
				-def get_cookies(response):
			
 
				-    cookies = requests.utils.dict_from_cookiejar(response.cookies)
			
 
				-    return cookies
			
 
				-
			
 
				-
			
 
				-def get_cookies_from_str(cookie_str):
			
 
				-    """
			
 
				-    >>> get_cookies_from_str("key=value; key2=value2; key3=; key4=; ")
			
 
				-    {'key': 'value', 'key2': 'value2', 'key3': '', 'key4': ''}
			
 
				-
			
 
				-    Args:
			
 
				-        cookie_str: key=value; key2=value2; key3=; key4=
			
 
				-
			
 
				-    Returns:
			
 
				-
			
 
				-    """
			
 
				-    cookies = {}
			
 
				-    for cookie in cookie_str.split(";"):
			
 
				-        cookie = cookie.strip()
			
 
				-        if not cookie:
			
 
				-            continue
			
 
				-        key, value = cookie.split("=", 1)
			
 
				-        key = key.strip()
			
 
				-        value = value.strip()
			
 
				-        cookies[key] = value
			
 
				-
			
 
				-    return cookies
			
 
				-
			
 
				-
			
 
				-def get_cookies_jar(cookies):
			
 
				-    """
			
 
				-    @summary: 适用于selenium生成的cookies转requests的cookies
			
 
				-    requests.get(xxx, cookies=jar)
			
 
				-    参考：https://www.cnblogs.com/small-bud/p/9064674.html
			
 
				-
			
 
				-    ---------
			
 
				-    @param cookies: [{},{}]
			
 
				-    ---------
			
 
				-    @result: cookie jar
			
 
				-    """
			
 
				-
			
 
				-    cookie_jar = RequestsCookieJar()
			
 
				-    for cookie in cookies:
			
 
				-        cookie_jar.set(cookie["name"], cookie["value"])
			
 
				-
			
 
				-    return cookie_jar
			
 
				-
			
 
				-
			
 
				-def get_cookies_from_selenium_cookie(cookies):
			
 
				-    """
			
 
				-    @summary: 适用于selenium生成的cookies转requests的cookies
			
 
				-    requests.get(xxx, cookies=jar)
			
 
				-    参考：https://www.cnblogs.com/small-bud/p/9064674.html
			
 
				-
			
 
				-    ---------
			
 
				-    @param cookies: [{},{}]
			
 
				-    ---------
			
 
				-    @result: cookie jar
			
 
				-    """
			
 
				-
			
 
				-    cookie_dict = {}
			
 
				-    for cookie in cookies:
			
 
				-        if cookie.get("name"):
			
 
				-            cookie_dict[cookie["name"]] = cookie["value"]
			
 
				-
			
 
				-    return cookie_dict
			
 
				-
			
 
				-
			
 
				-def cookiesjar2str(cookies):
			
 
				-    str_cookie = ""
			
 
				-    for k, v in requests.utils.dict_from_cookiejar(cookies).items():
			
 
				-        str_cookie += k
			
 
				-        str_cookie += "="
			
 
				-        str_cookie += v
			
 
				-        str_cookie += "; "
			
 
				-    return str_cookie
			
 
				-
			
 
				-
			
 
				-def cookies2str(cookies):
			
 
				-    str_cookie = ""
			
 
				-    for k, v in cookies.items():
			
 
				-        str_cookie += k
			
 
				-        str_cookie += "="
			
 
				-        str_cookie += v
			
 
				-        str_cookie += "; "
			
 
				-    return str_cookie
			
 
				-
			
 
				-
			
 
				-def get_urls(
			
 
				-    html,
			
 
				-    stop_urls=(
			
 
				-        "javascript",
			
 
				-        "+",
			
 
				-        ".css",
			
 
				-        ".js",
			
 
				-        ".rar",
			
 
				-        ".xls",
			
 
				-        ".exe",
			
 
				-        ".apk",
			
 
				-        ".doc",
			
 
				-        ".jpg",
			
 
				-        ".png",
			
 
				-        ".flv",
			
 
				-        ".mp4",
			
 
				-    ),
			
 
				-):
			
 
				-    # 不匹配javascript、 +、 # 这样的url
			
 
				-    regex = r'<a.*?href.*?=.*?["|\'](.*?)["|\']'
			
 
				-
			
 
				-    urls = get_info(html, regex)
			
 
				-    urls = sorted(set(urls), key=urls.index)
			
 
				-    if stop_urls:
			
 
				-        stop_urls = isinstance(stop_urls, str) and [stop_urls] or stop_urls
			
 
				-        use_urls = []
			
 
				-        for url in urls:
			
 
				-            for stop_url in stop_urls:
			
 
				-                if stop_url in url:
			
 
				-                    break
			
 
				-            else:
			
 
				-                use_urls.append(url)
			
 
				-
			
 
				-        urls = use_urls
			
 
				-    return urls
			
 
				-
			
 
				-
			
 
				-def get_full_url(root_url, sub_url):
			
 
				-    """
			
 
				-    @summary: 得到完整的ur
			
 
				-    ---------
			
 
				-    @param root_url: 根url （网页的url）
			
 
				-    @param sub_url:  子url （带有相对路径的 可以拼接成完整的）
			
 
				-    ---------
			
 
				-    @result: 返回完整的url
			
 
				-    """
			
 
				-
			
 
				-    return urljoin(root_url, sub_url)
			
 
				-
			
 
				-
			
 
				-def joint_url(url, params):
			
 
				-    # param_str = "?"
			
 
				-    # for key, value in params.items():
			
 
				-    #     value = isinstance(value, str) and value or str(value)
			
 
				-    #     param_str += key + "=" + value + "&"
			
 
				-    #
			
 
				-    # return url + param_str[:-1]
			
 
				-
			
 
				-    if not params:
			
 
				-        return url
			
 
				-
			
 
				-    params = urlencode(params)
			
 
				-    separator = "?" if "?" not in url else "&"
			
 
				-    return url + separator + params
			
 
				-
			
 
				-
			
 
				-def canonicalize_url(url):
			
 
				-    """
			
 
				-    url 归一化 会参数排序 及去掉锚点
			
 
				-    """
			
 
				-    return _canonicalize_url(url)
			
 
				-
			
 
				-
			
 
				-def get_url_md5(url):
			
 
				-    url = canonicalize_url(url)
			
 
				-    url = re.sub("^http://", "https://", url)
			
 
				-    return get_md5(url)
			
 
				-
			
 
				-
			
 
				-def fit_url(urls, identis):
			
 
				-    identis = isinstance(identis, str) and [identis] or identis
			
 
				-    fit_urls = []
			
 
				-    for link in urls:
			
 
				-        for identi in identis:
			
 
				-            if identi in link:
			
 
				-                fit_urls.append(link)
			
 
				-    return list(set(fit_urls))
			
 
				-
			
 
				-
			
 
				-def get_param(url, key):
			
 
				-    params = url.split("?")[-1].split("&")
			
 
				-    for param in params:
			
 
				-        key_value = param.split("=", 1)
			
 
				-        if key == key_value[0]:
			
 
				-            return key_value[1]
			
 
				-    return None
			
 
				-
			
 
				-
			
 
				-def urlencode(params):
			
 
				-    """
			
 
				-    字典类型的参数转为字符串
			
 
				-    @param params:
			
 
				-    {
			
 
				-        'a': 1,
			
 
				-        'b': 2
			
 
				-    }
			
 
				-    @return: a=1&b=2
			
 
				-    """
			
 
				-    return urllib.parse.urlencode(params)
			
 
				-
			
 
				-
			
 
				-def urldecode(url):
			
 
				-    """
			
 
				-    将字符串类型的参数转为json
			
 
				-    @param url: xxx?a=1&b=2
			
 
				-    @return:
			
 
				-    {
			
 
				-        'a': 1,
			
 
				-        'b': 2
			
 
				-    }
			
 
				-    """
			
 
				-    params_json = {}
			
 
				-    params = url.split("?")[-1].split("&")
			
 
				-    for param in params:
			
 
				-        key, value = param.split("=")
			
 
				-        params_json[key] = unquote_url(value)
			
 
				-
			
 
				-    return params_json
			
 
				-
			
 
				-
			
 
				-def unquote_url(url, encoding="utf-8"):
			
 
				-    """
			
 
				-    @summary: 将url解码
			
 
				-    ---------
			
 
				-    @param url:
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    return urllib.parse.unquote(url, encoding=encoding)
			
 
				-
			
 
				-
			
 
				-def quote_url(url, encoding="utf-8"):
			
 
				-    """
			
 
				-    @summary: 将url编码 编码意思http://www.w3school.com.cn/tags/html_ref_urlencode.html
			
 
				-    ---------
			
 
				-    @param url:
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    return urllib.parse.quote(url, safe="%;/?:@&=+$,", encoding=encoding)
			
 
				-
			
 
				-
			
 
				-def quote_chinese_word(text, encoding="utf-8"):
			
 
				-    def quote_chinese_word_func(text):
			
 
				-        chinese_word = text.group(0)
			
 
				-        return urllib.parse.quote(chinese_word, encoding=encoding)
			
 
				-
			
 
				-    return re.sub("([\u4e00-\u9fa5]+)", quote_chinese_word_func, text, flags=re.S)
			
 
				-
			
 
				-
			
 
				-def unescape(str):
			
 
				-    """
			
 
				-    反转译
			
 
				-    """
			
 
				-    return html.unescape(str)
			
 
				-
			
 
				-
			
 
				-def excape(str):
			
 
				-    """
			
 
				-    转译
			
 
				-    """
			
 
				-    return html.escape(str)
			
 
				-
			
 
				-
			
 
				-_regexs = {}
			
 
				-
			
 
				-
			
 
				-# @log_function_time
			
 
				-def get_info(html, regexs, allow_repeat=True, fetch_one=False, split=None):
			
 
				-    regexs = isinstance(regexs, str) and [regexs] or regexs
			
 
				-
			
 
				-    infos = []
			
 
				-    for regex in regexs:
			
 
				-        if regex == "":
			
 
				-            continue
			
 
				-
			
 
				-        if regex not in _regexs.keys():
			
 
				-            _regexs[regex] = re.compile(regex, re.S)
			
 
				-
			
 
				-        if fetch_one:
			
 
				-            infos = _regexs[regex].search(html)
			
 
				-            if infos:
			
 
				-                infos = infos.groups()
			
 
				-            else:
			
 
				-                continue
			
 
				-        else:
			
 
				-            infos = _regexs[regex].findall(str(html))
			
 
				-
			
 
				-        if len(infos) > 0:
			
 
				-            # print(regex)
			
 
				-            break
			
 
				-
			
 
				-    if fetch_one:
			
 
				-        infos = infos if infos else ("",)
			
 
				-        return infos if len(infos) > 1 else infos[0]
			
 
				-    else:
			
 
				-        infos = allow_repeat and infos or sorted(set(infos), key=infos.index)
			
 
				-        infos = split.join(infos) if split else infos
			
 
				-        return infos
			
 
				-
			
 
				-
			
 
				-def table_json(table, save_one_blank=True):
			
 
				-    """
			
 
				-    将表格转为json 适应于 key：value 在一行类的表格
			
 
				-    @param table: 使用selector封装后的具有xpath的selector
			
 
				-    @param save_one_blank: 保留一个空白符
			
 
				-    @return:
			
 
				-    """
			
 
				-    data = {}
			
 
				-
			
 
				-    trs = table.xpath(".//tr")
			
 
				-    for tr in trs:
			
 
				-        tds = tr.xpath("./td|./th")
			
 
				-
			
 
				-        for i in range(0, len(tds), 2):
			
 
				-            if i + 1 > len(tds) - 1:
			
 
				-                break
			
 
				-
			
 
				-            key = tds[i].xpath("string(.)").extract_first(default="").strip()
			
 
				-            value = tds[i + 1].xpath("string(.)").extract_first(default="").strip()
			
 
				-            value = replace_str(value, "[\f\n\r\t\v]", "")
			
 
				-            value = replace_str(value, " +", " " if save_one_blank else "")
			
 
				-
			
 
				-            if key:
			
 
				-                data[key] = value
			
 
				-
			
 
				-    return data
			
 
				-
			
 
				-
			
 
				-def get_table_row_data(table):
			
 
				-    """
			
 
				-    获取表格里每一行数据
			
 
				-    @param table: 使用selector封装后的具有xpath的selector
			
 
				-    @return: [[],[]..]
			
 
				-    """
			
 
				-
			
 
				-    datas = []
			
 
				-    rows = table.xpath(".//tr")
			
 
				-    for row in rows:
			
 
				-        cols = row.xpath("./td|./th")
			
 
				-        row_datas = []
			
 
				-        for col in cols:
			
 
				-            data = col.xpath("string(.)").extract_first(default="").strip()
			
 
				-            row_datas.append(data)
			
 
				-        datas.append(row_datas)
			
 
				-
			
 
				-    return datas
			
 
				-
			
 
				-
			
 
				-def rows2json(rows, keys=None):
			
 
				-    """
			
 
				-    将行数据转为json
			
 
				-    @param rows: 每一行的数据
			
 
				-    @param keys: json的key，空时将rows的第一行作为key
			
 
				-    @return:
			
 
				-    """
			
 
				-    data_start_pos = 0 if keys else 1
			
 
				-    datas = []
			
 
				-    keys = keys or rows[0]
			
 
				-    for values in rows[data_start_pos:]:
			
 
				-        datas.append(dict(zip(keys, values)))
			
 
				-
			
 
				-    return datas
			
 
				-
			
 
				-
			
 
				-def get_form_data(form):
			
 
				-    """
			
 
				-    提取form中提交的数据
			
 
				-    :param form: 使用selector封装后的具有xpath的selector
			
 
				-    :return:
			
 
				-    """
			
 
				-    data = {}
			
 
				-    inputs = form.xpath(".//input")
			
 
				-    for input in inputs:
			
 
				-        name = input.xpath("./@name").extract_first()
			
 
				-        value = input.xpath("./@value").extract_first()
			
 
				-        if name:
			
 
				-            data[name] = value
			
 
				-
			
 
				-    return data
			
 
				-
			
 
				-
			
 
				-def get_domain(url):
			
 
				-    return urllib.parse.urlparse(url).netloc
			
 
				-
			
 
				-
			
 
				-def get_index_url(url):
			
 
				-    return "/".join(url.split("/")[:3])
			
 
				-
			
 
				-
			
 
				-def get_ip(domain):
			
 
				-    ip = socket.getaddrinfo(domain, "http")[0][4][0]
			
 
				-    return ip
			
 
				-
			
 
				-
			
 
				-def get_localhost_ip():
			
 
				-    """
			
 
				-    利用 UDP 协议来实现的，生成一个UDP包，把自己的 IP 放如到 UDP 协议头中，然后从UDP包中获取本机的IP。
			
 
				-    这个方法并不会真实的向外部发包，所以用抓包工具是看不到的
			
 
				-    :return:
			
 
				-    """
			
 
				-    s = None
			
 
				-    try:
			
 
				-        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
			
 
				-        s.connect(("8.8.8.8", 80))
			
 
				-        ip = s.getsockname()[0]
			
 
				-    finally:
			
 
				-        if s:
			
 
				-            s.close()
			
 
				-
			
 
				-    return ip
			
 
				-
			
 
				-
			
 
				-def ip_to_num(ip):
			
 
				-    import struct
			
 
				-
			
 
				-    ip_num = socket.ntohl(struct.unpack("I", socket.inet_aton(str(ip)))[0])
			
 
				-    return ip_num
			
 
				-
			
 
				-
			
 
				-def is_valid_proxy(proxy, check_url=None):
			
 
				-    """
			
 
				-    检验代理是否有效
			
 
				-    @param proxy: xxx.xxx.xxx:xxx
			
 
				-    @param check_url: 利用目标网站检查，目标网站url。默认为None， 使用代理服务器的socket检查, 但不能排除Connection closed by foreign host
			
 
				-    @return: True / False
			
 
				-    """
			
 
				-    is_valid = False
			
 
				-
			
 
				-    if check_url:
			
 
				-        proxies = {"http": f"http://{proxy}", "https": f"https://{proxy}"}
			
 
				-        headers = {
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
			
 
				-        }
			
 
				-        response = None
			
 
				-        try:
			
 
				-            response = requests.get(
			
 
				-                check_url, headers=headers, proxies=proxies, stream=True, timeout=20
			
 
				-            )
			
 
				-            is_valid = True
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            log.error("check proxy failed: {} {}".format(e, proxy))
			
 
				-
			
 
				-        finally:
			
 
				-            if response:
			
 
				-                response.close()
			
 
				-
			
 
				-    else:
			
 
				-        ip, port = proxy.split(":")
			
 
				-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sk:
			
 
				-            sk.settimeout(7)
			
 
				-            try:
			
 
				-                sk.connect((ip, int(port)))  # 检查代理服务器是否开着
			
 
				-                is_valid = True
			
 
				-
			
 
				-            except Exception as e:
			
 
				-                log.error("check proxy failed: {} {}:{}".format(e, ip, port))
			
 
				-
			
 
				-    return is_valid
			
 
				-
			
 
				-
			
 
				-def is_valid_url(url):
			
 
				-    """
			
 
				-    验证url是否合法
			
 
				-    :param url:
			
 
				-    :return:
			
 
				-    """
			
 
				-    if re.match(r"(^https?:/{2}\w.+$)|(ftp://)", url):
			
 
				-        return True
			
 
				-    else:
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def get_text(soup, *args):
			
 
				-    try:
			
 
				-        return soup.get_text()
			
 
				-    except Exception as e:
			
 
				-        log.error(e)
			
 
				-        return ""
			
 
				-
			
 
				-
			
 
				-def del_html_tag(content, except_line_break=False, save_img=False, white_replaced=""):
			
 
				-    """
			
 
				-    删除html标签
			
 
				-    @param content: html内容
			
 
				-    @param except_line_break: 保留p标签
			
 
				-    @param save_img: 保留图片
			
 
				-    @param white_replaced: 空白符替换
			
 
				-    @return:
			
 
				-    """
			
 
				-    content = replace_str(content, "(?i)<script(.|\n)*?</script>")  # (?)忽略大小写
			
 
				-    content = replace_str(content, "(?i)<style(.|\n)*?</style>")
			
 
				-    content = replace_str(content, "<!--(.|\n)*?-->")
			
 
				-    content = replace_str(
			
 
				-        content, "(?!&[a-z]+=)&[a-z]+;?"
			
 
				-    )  # 干掉&nbsp等无用的字符 但&xxx= 这种表示参数的除外
			
 
				-    if except_line_break:
			
 
				-        content = content.replace("</p>", "/p")
			
 
				-        content = replace_str(content, "<[^p].*?>")
			
 
				-        content = content.replace("/p", "</p>")
			
 
				-        content = replace_str(content, "[ \f\r\t\v]")
			
 
				-
			
 
				-    elif save_img:
			
 
				-        content = replace_str(content, "(?!<img.+?>)<.+?>")  # 替换掉除图片外的其他标签
			
 
				-        content = replace_str(content, "(?! +)\s+", "\n")  # 保留空格
			
 
				-        content = content.strip()
			
 
				-
			
 
				-    else:
			
 
				-        content = replace_str(content, "<(.|\n)*?>")
			
 
				-        content = replace_str(content, "\s", white_replaced)
			
 
				-        content = content.strip()
			
 
				-
			
 
				-    return content
			
 
				-
			
 
				-
			
 
				-def del_html_js_css(content):
			
 
				-    content = replace_str(content, "(?i)<script(.|\n)*?</script>")  # (?)忽略大小写
			
 
				-    content = replace_str(content, "(?i)<style(.|\n)*?</style>")
			
 
				-    content = replace_str(content, "<!--(.|\n)*?-->")
			
 
				-
			
 
				-    return content
			
 
				-
			
 
				-
			
 
				-def is_have_chinese(content):
			
 
				-    regex = "[\u4e00-\u9fa5]+"
			
 
				-    chinese_word = get_info(content, regex)
			
 
				-    return chinese_word and True or False
			
 
				-
			
 
				-
			
 
				-def is_have_english(content):
			
 
				-    regex = "[a-zA-Z]+"
			
 
				-    english_words = get_info(content, regex)
			
 
				-    return english_words and True or False
			
 
				-
			
 
				-
			
 
				-def get_chinese_word(content):
			
 
				-    regex = "[\u4e00-\u9fa5]+"
			
 
				-    chinese_word = get_info(content, regex)
			
 
				-    return chinese_word
			
 
				-
			
 
				-
			
 
				-def get_english_words(content):
			
 
				-    regex = "[a-zA-Z]+"
			
 
				-    english_words = get_info(content, regex)
			
 
				-    return english_words or ""
			
 
				-
			
 
				-
			
 
				-##################################################
			
 
				-def get_json(json_str):
			
 
				-    """
			
 
				-    @summary: 取json对象
			
 
				-    ---------
			
 
				-    @param json_str: json格式的字符串
			
 
				-    ---------
			
 
				-    @result: 返回json对象
			
 
				-    """
			
 
				-
			
 
				-    try:
			
 
				-        return json.loads(json_str) if json_str else {}
			
 
				-    except Exception as e1:
			
 
				-        try:
			
 
				-            json_str = json_str.strip()
			
 
				-            json_str = json_str.replace("'", '"')
			
 
				-            keys = get_info(json_str, "(\w+):")
			
 
				-            for key in keys:
			
 
				-                json_str = json_str.replace(key, '"%s"' % key)
			
 
				-
			
 
				-            return json.loads(json_str) if json_str else {}
			
 
				-
			
 
				-        except Exception as e2:
			
 
				-            pass
			
 
				-
			
 
				-        return {}
			
 
				-
			
 
				-
			
 
				-def jsonp2json(jsonp):
			
 
				-    """
			
 
				-    将jsonp转为json
			
 
				-    @param jsonp: jQuery172013600082560040794_1553230569815({})
			
 
				-    @return:
			
 
				-    """
			
 
				-    try:
			
 
				-        return json.loads(re.match(".*?({.*}).*", jsonp, re.S).group(1))
			
 
				-    except:
			
 
				-        raise ValueError("Invalid Input")
			
 
				-
			
 
				-
			
 
				-def dumps_json(data, indent=4, sort_keys=False):
			
 
				-    """
			
 
				-    @summary: 格式化json 用于打印
			
 
				-    ---------
			
 
				-    @param data: json格式的字符串或json对象
			
 
				-    ---------
			
 
				-    @result: 格式化后的字符串
			
 
				-    """
			
 
				-    try:
			
 
				-        if isinstance(data, str):
			
 
				-            data = get_json(data)
			
 
				-
			
 
				-        data = json.dumps(
			
 
				-            data,
			
 
				-            ensure_ascii=False,
			
 
				-            indent=indent,
			
 
				-            skipkeys=True,
			
 
				-            sort_keys=sort_keys,
			
 
				-            default=str,
			
 
				-        )
			
 
				-
			
 
				-    except Exception as e:
			
 
				-        data = pformat(data)
			
 
				-
			
 
				-    return data
			
 
				-
			
 
				-
			
 
				-def get_json_value(json_object, key):
			
 
				-    """
			
 
				-    @summary:
			
 
				-    ---------
			
 
				-    @param json_object: json对象或json格式的字符串
			
 
				-    @param key: 建值 如果在多个层级目录下 可写 key1.key2  如{'key1':{'key2':3}}
			
 
				-    ---------
			
 
				-    @result: 返回对应的值，如果没有，返回''
			
 
				-    """
			
 
				-    current_key = ""
			
 
				-    value = ""
			
 
				-    try:
			
 
				-        json_object = (
			
 
				-            isinstance(json_object, str) and get_json(json_object) or json_object
			
 
				-        )
			
 
				-
			
 
				-        current_key = key.split(".")[0]
			
 
				-        value = json_object[current_key]
			
 
				-
			
 
				-        key = key[key.find(".") + 1 :]
			
 
				-    except Exception as e:
			
 
				-        return value
			
 
				-
			
 
				-    if key == current_key:
			
 
				-        return value
			
 
				-    else:
			
 
				-        return get_json_value(value, key)
			
 
				-
			
 
				-
			
 
				-def get_all_keys(datas, depth=None, current_depth=0):
			
 
				-    """
			
 
				-    @summary: 获取json李所有的key
			
 
				-    ---------
			
 
				-    @param datas: dict / list
			
 
				-    @param depth: 字典key的层级 默认不限制层级 层级从1开始
			
 
				-    @param current_depth: 字典key的当前层级 不用传参
			
 
				-    ---------
			
 
				-    @result: 返回json所有的key
			
 
				-    """
			
 
				-
			
 
				-    keys = []
			
 
				-    if depth and current_depth >= depth:
			
 
				-        return keys
			
 
				-
			
 
				-    if isinstance(datas, list):
			
 
				-        for data in datas:
			
 
				-            keys.extend(get_all_keys(data, depth, current_depth=current_depth + 1))
			
 
				-    elif isinstance(datas, dict):
			
 
				-        for key, value in datas.items():
			
 
				-            keys.append(key)
			
 
				-            if isinstance(value, dict):
			
 
				-                keys.extend(get_all_keys(value, depth, current_depth=current_depth + 1))
			
 
				-
			
 
				-    return keys
			
 
				-
			
 
				-
			
 
				-def to_chinese(unicode_str):
			
 
				-    format_str = json.loads('{"chinese":"%s"}' % unicode_str)
			
 
				-    return format_str["chinese"]
			
 
				-
			
 
				-
			
 
				-##################################################
			
 
				-def replace_str(source_str, regex, replace_str=""):
			
 
				-    """
			
 
				-    @summary: 替换字符串
			
 
				-    ---------
			
 
				-    @param source_str: 原字符串
			
 
				-    @param regex: 正则
			
 
				-    @param replace_str: 用什么来替换 默认为''
			
 
				-    ---------
			
 
				-    @result: 返回替换后的字符串
			
 
				-    """
			
 
				-    str_info = re.compile(regex)
			
 
				-    return str_info.sub(replace_str, source_str)
			
 
				-
			
 
				-
			
 
				-def del_redundant_blank_character(text):
			
 
				-    """
			
 
				-    删除冗余的空白符， 只保留一个
			
 
				-    :param text:
			
 
				-    :return:
			
 
				-    """
			
 
				-    return re.sub("\s+", " ", text)
			
 
				-
			
 
				-
			
 
				-##################################################
			
 
				-def get_conf_value(config_file, section, key):
			
 
				-    cp = configparser.ConfigParser(allow_no_value=True)
			
 
				-    with codecs.open(config_file, "r", encoding="utf-8") as f:
			
 
				-        cp.read_file(f)
			
 
				-    return cp.get(section, key)
			
 
				-
			
 
				-
			
 
				-def mkdir(path):
			
 
				-    try:
			
 
				-        if not os.path.exists(path):
			
 
				-            os.makedirs(path)
			
 
				-    except OSError as exc:  # Python >2.5
			
 
				-        pass
			
 
				-
			
 
				-
			
 
				-def write_file(filename, content, mode="w", encoding="utf-8"):
			
 
				-    """
			
 
				-    @summary: 写文件
			
 
				-    ---------
			
 
				-    @param filename: 文件名（有路径）
			
 
				-    @param content: 内容
			
 
				-    @param mode: 模式 w/w+ (覆盖/追加)
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    directory = os.path.dirname(filename)
			
 
				-    mkdir(directory)
			
 
				-    with open(filename, mode, encoding=encoding) as file:
			
 
				-        file.writelines(content)
			
 
				-
			
 
				-
			
 
				-def read_file(filename, readlines=False, encoding="utf-8"):
			
 
				-    """
			
 
				-    @summary: 读文件
			
 
				-    ---------
			
 
				-    @param filename: 文件名（有路径）
			
 
				-    @param readlines: 按行读取 （默认False）
			
 
				-    ---------
			
 
				-    @result: 按行读取返回List，否则返回字符串
			
 
				-    """
			
 
				-
			
 
				-    content = None
			
 
				-    try:
			
 
				-        with open(filename, "r", encoding=encoding) as file:
			
 
				-            content = file.readlines() if readlines else file.read()
			
 
				-    except Exception as e:
			
 
				-        log.error(e)
			
 
				-
			
 
				-    return content
			
 
				-
			
 
				-
			
 
				-def get_oss_file_list(oss_handler, prefix, date_range_min, date_range_max=None):
			
 
				-    """
			
 
				-    获取文件列表
			
 
				-    @param prefix: 路径前缀 如 data/car_service_line/yiche/yiche_serial_zongshu_info
			
 
				-    @param date_range_min: 时间范围 最小值 日期分隔符为/ 如 2019/03/01 或 2019/03/01/00/00/00
			
 
				-    @param date_range_max: 时间范围 最大值 日期分隔符为/ 如 2019/03/01 或 2019/03/01/00/00/00
			
 
				-    @return: 每个文件路径 如 html/e_commerce_service_line/alibaba/alibaba_shop_info/2019/03/22/15/53/15/8ca8b9e4-4c77-11e9-9dee-acde48001122.json.snappy
			
 
				-    """
			
 
				-
			
 
				-    # 计算时间范围
			
 
				-    date_range_max = date_range_max or date_range_min
			
 
				-    date_format = "/".join(
			
 
				-        ["%Y", "%m", "%d", "%H", "%M", "%S"][: date_range_min.count("/") + 1]
			
 
				-    )
			
 
				-    time_interval = [
			
 
				-        {"days": 365},
			
 
				-        {"days": 31},
			
 
				-        {"days": 1},
			
 
				-        {"hours": 1},
			
 
				-        {"minutes": 1},
			
 
				-        {"seconds": 1},
			
 
				-    ][date_range_min.count("/")]
			
 
				-    date_range = get_between_date(
			
 
				-        date_range_min, date_range_max, date_format=date_format, **time_interval
			
 
				-    )
			
 
				-
			
 
				-    for date in date_range:
			
 
				-        file_folder_path = os.path.join(prefix, date)
			
 
				-        objs = oss_handler.list(prefix=file_folder_path)
			
 
				-        for obj in objs:
			
 
				-            filename = obj.key
			
 
				-            yield filename
			
 
				-
			
 
				-
			
 
				-def is_html(url):
			
 
				-    if not url:
			
 
				-        return False
			
 
				-
			
 
				-    try:
			
 
				-        content_type = request.urlopen(url).info().get("Content-Type", "")
			
 
				-
			
 
				-        if "text/html" in content_type:
			
 
				-            return True
			
 
				-        else:
			
 
				-            return False
			
 
				-    except Exception as e:
			
 
				-        log.error(e)
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def is_exist(file_path):
			
 
				-    """
			
 
				-    @summary: 文件是否存在
			
 
				-    ---------
			
 
				-    @param file_path:
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    return os.path.exists(file_path)
			
 
				-
			
 
				-
			
 
				-def download_file(url, file_path, *, call_func=None, proxies=None, data=None):
			
 
				-    """
			
 
				-    下载文件，会自动创建文件存储目录
			
 
				-    Args:
			
 
				-        url: 地址
			
 
				-        file_path: 文件存储地址
			
 
				-        call_func: 下载成功的回调
			
 
				-        proxies: 代理
			
 
				-        data: 请求体
			
 
				-
			
 
				-    Returns:
			
 
				-
			
 
				-    """
			
 
				-    directory = os.path.dirname(file_path)
			
 
				-    mkdir(directory)
			
 
				-
			
 
				-    # 进度条
			
 
				-    def progress_callfunc(blocknum, blocksize, totalsize):
			
 
				-        """回调函数
			
 
				-        @blocknum : 已经下载的数据块
			
 
				-        @blocksize : 数据块的大小
			
 
				-        @totalsize: 远程文件的大小
			
 
				-        """
			
 
				-        percent = 100.0 * blocknum * blocksize / totalsize
			
 
				-        if percent > 100:
			
 
				-            percent = 100
			
 
				-        # print ('进度条 %.2f%%' % percent, end = '\r')
			
 
				-        sys.stdout.write("进度条 %.2f%%" % percent + "\r")
			
 
				-        sys.stdout.flush()
			
 
				-
			
 
				-    if url:
			
 
				-        try:
			
 
				-            if proxies:
			
 
				-                # create the object, assign it to a variable
			
 
				-                proxy = request.ProxyHandler(proxies)
			
 
				-                # construct a new opener using your proxy settings
			
 
				-                opener = request.build_opener(proxy)
			
 
				-                # install the openen on the module-level
			
 
				-                request.install_opener(opener)
			
 
				-
			
 
				-            request.urlretrieve(url, file_path, progress_callfunc, data)
			
 
				-
			
 
				-            if callable(call_func):
			
 
				-                call_func()
			
 
				-            return 1
			
 
				-        except Exception as e:
			
 
				-            log.error(e)
			
 
				-            return 0
			
 
				-    else:
			
 
				-        return 0
			
 
				-
			
 
				-
			
 
				-def get_file_list(path, ignore=[]):
			
 
				-    templist = path.split("*")
			
 
				-    path = templist[0]
			
 
				-    file_type = templist[1] if len(templist) >= 2 else ""
			
 
				-
			
 
				-    # 递归遍历文件
			
 
				-    def get_file_list_(path, file_type, ignore, all_file=[]):
			
 
				-        file_list = os.listdir(path)
			
 
				-
			
 
				-        for file_name in file_list:
			
 
				-            if file_name in ignore:
			
 
				-                continue
			
 
				-
			
 
				-            file_path = os.path.join(path, file_name)
			
 
				-            if os.path.isdir(file_path):
			
 
				-                get_file_list_(file_path, file_type, ignore, all_file)
			
 
				-            else:
			
 
				-                if not file_type or file_name.endswith(file_type):
			
 
				-                    all_file.append(file_path)
			
 
				-
			
 
				-        return all_file
			
 
				-
			
 
				-    return get_file_list_(path, file_type, ignore) if os.path.isdir(path) else [path]
			
 
				-
			
 
				-
			
 
				-def rename_file(old_name, new_name):
			
 
				-    os.rename(old_name, new_name)
			
 
				-
			
 
				-
			
 
				-def del_file(path, ignore=()):
			
 
				-    files = get_file_list(path, ignore)
			
 
				-    for file in files:
			
 
				-        try:
			
 
				-            os.remove(file)
			
 
				-        except Exception as e:
			
 
				-            log.error(
			
 
				-                """
			
 
				-                删除出错: %s
			
 
				-                Exception : %s
			
 
				-                """
			
 
				-                % (file, str(e))
			
 
				-            )
			
 
				-        finally:
			
 
				-            pass
			
 
				-
			
 
				-
			
 
				-def get_file_type(file_name):
			
 
				-    """
			
 
				-    @summary: 取文件后缀名
			
 
				-    ---------
			
 
				-    @param file_name:
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-    try:
			
 
				-        return os.path.splitext(file_name)[1]
			
 
				-    except Exception as e:
			
 
				-        log.exception(e)
			
 
				-
			
 
				-
			
 
				-def get_file_path(file_path):
			
 
				-    """
			
 
				-    @summary: 取文件路径
			
 
				-    ---------
			
 
				-    @param file_path: /root/a.py
			
 
				-    ---------
			
 
				-    @result: /root
			
 
				-    """
			
 
				-    try:
			
 
				-        return os.path.split(file_path)[0]
			
 
				-    except Exception as e:
			
 
				-        log.exception(e)
			
 
				-
			
 
				-
			
 
				-#############################################
			
 
				-
			
 
				-
			
 
				-def exec_js(js_code):
			
 
				-    """
			
 
				-    @summary: 执行js代码
			
 
				-    ---------
			
 
				-    @param js_code: js代码
			
 
				-    ---------
			
 
				-    @result: 返回执行结果
			
 
				-    """
			
 
				-
			
 
				-    return execjs.eval(js_code)
			
 
				-
			
 
				-
			
 
				-def compile_js(js_func):
			
 
				-    """
			
 
				-    @summary: 编译js函数
			
 
				-    ---------
			
 
				-    @param js_func:js函数
			
 
				-    ---------
			
 
				-    @result: 返回函数对象 调用 fun('js_funName', param1,param2)
			
 
				-    """
			
 
				-
			
 
				-    ctx = execjs.compile(js_func)
			
 
				-    return ctx.call
			
 
				-
			
 
				-
			
 
				-###############################################
			
 
				-
			
 
				-#############################################
			
 
				-
			
 
				-
			
 
				-def date_to_timestamp(date, time_format="%Y-%m-%d %H:%M:%S"):
			
 
				-    """
			
 
				-    @summary:
			
 
				-    ---------
			
 
				-    @param date:将"2011-09-28 10:00:00"时间格式转化为时间戳
			
 
				-    @param format:时间格式
			
 
				-    ---------
			
 
				-    @result: 返回时间戳
			
 
				-    """
			
 
				-
			
 
				-    timestamp = time.mktime(time.strptime(date, time_format))
			
 
				-    return int(timestamp)
			
 
				-
			
 
				-
			
 
				-def timestamp_to_date(timestamp, time_format="%Y-%m-%d %H:%M:%S"):
			
 
				-    """
			
 
				-    @summary:
			
 
				-    ---------
			
 
				-    @param timestamp: 将时间戳转化为日期
			
 
				-    @param format: 日期格式
			
 
				-    ---------
			
 
				-    @result: 返回日期
			
 
				-    """
			
 
				-    if timestamp is None:
			
 
				-        raise ValueError("timestamp is null")
			
 
				-
			
 
				-    date = time.localtime(timestamp)
			
 
				-    return time.strftime(time_format, date)
			
 
				-
			
 
				-
			
 
				-def get_current_timestamp():
			
 
				-    return int(time.time())
			
 
				-
			
 
				-
			
 
				-def get_current_date(date_format="%Y-%m-%d %H:%M:%S"):
			
 
				-    return datetime.datetime.now().strftime(date_format)
			
 
				-    # return time.strftime(date_format, time.localtime(time.time()))
			
 
				-
			
 
				-
			
 
				-def get_date_number(year=None, month=None, day=None):
			
 
				-    """
			
 
				-    @summary: 获取指定日期对应的日期数
			
 
				-    默认当前周
			
 
				-    ---------
			
 
				-    @param year: 2010
			
 
				-    @param month: 6
			
 
				-    @param day: 16
			
 
				-    ---------
			
 
				-    @result: (年号，第几周，第几天) 如 (2010, 24, 3)
			
 
				-    """
			
 
				-    if year and month and day:
			
 
				-        return datetime.date(year, month, day).isocalendar()
			
 
				-    elif not any([year, month, day]):
			
 
				-        return datetime.datetime.now().isocalendar()
			
 
				-    else:
			
 
				-        assert year, "year 不能为空"
			
 
				-        assert month, "month 不能为空"
			
 
				-        assert day, "day 不能为空"
			
 
				-
			
 
				-
			
 
				-def get_between_date(
			
 
				-    begin_date, end_date=None, date_format="%Y-%m-%d", **time_interval
			
 
				-):
			
 
				-    """
			
 
				-    @summary: 获取一段时间间隔内的日期，默认为每一天
			
 
				-    ---------
			
 
				-    @param begin_date: 开始日期 str 如 2018-10-01
			
 
				-    @param end_date: 默认为今日
			
 
				-    @param date_format: 日期格式，应与begin_date的日期格式相对应
			
 
				-    @param time_interval: 时间间隔 默认一天 支持 days、seconds、microseconds、milliseconds、minutes、hours、weeks
			
 
				-    ---------
			
 
				-    @result: list 值为字符串
			
 
				-    """
			
 
				-
			
 
				-    date_list = []
			
 
				-
			
 
				-    begin_date = datetime.datetime.strptime(begin_date, date_format)
			
 
				-    end_date = (
			
 
				-        datetime.datetime.strptime(end_date, date_format)
			
 
				-        if end_date
			
 
				-        else datetime.datetime.strptime(
			
 
				-            time.strftime(date_format, time.localtime(time.time())), date_format
			
 
				-        )
			
 
				-    )
			
 
				-    time_interval = time_interval or dict(days=1)
			
 
				-
			
 
				-    while begin_date <= end_date:
			
 
				-        date_str = begin_date.strftime(date_format)
			
 
				-        date_list.append(date_str)
			
 
				-
			
 
				-        begin_date += datetime.timedelta(**time_interval)
			
 
				-
			
 
				-    if end_date.strftime(date_format) not in date_list:
			
 
				-        date_list.append(end_date.strftime(date_format))
			
 
				-
			
 
				-    return date_list
			
 
				-
			
 
				-
			
 
				-def get_between_months(begin_date, end_date=None):
			
 
				-    """
			
 
				-    @summary: 获取一段时间间隔内的月份
			
 
				-    需要满一整月
			
 
				-    ---------
			
 
				-    @param begin_date: 开始时间 如 2018-01-01
			
 
				-    @param end_date: 默认当前时间
			
 
				-    ---------
			
 
				-    @result: 列表 如 ['2018-01', '2018-02']
			
 
				-    """
			
 
				-
			
 
				-    def add_months(dt, months):
			
 
				-        month = dt.month - 1 + months
			
 
				-        year = dt.year + month // 12
			
 
				-        month = month % 12 + 1
			
 
				-        day = min(dt.day, calendar.monthrange(year, month)[1])
			
 
				-        return dt.replace(year=year, month=month, day=day)
			
 
				-
			
 
				-    date_list = []
			
 
				-    begin_date = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
			
 
				-    end_date = (
			
 
				-        datetime.datetime.strptime(end_date, "%Y-%m-%d")
			
 
				-        if end_date
			
 
				-        else datetime.datetime.strptime(
			
 
				-            time.strftime("%Y-%m-%d", time.localtime(time.time())), "%Y-%m-%d"
			
 
				-        )
			
 
				-    )
			
 
				-    while begin_date <= end_date:
			
 
				-        date_str = begin_date.strftime("%Y-%m")
			
 
				-        date_list.append(date_str)
			
 
				-        begin_date = add_months(begin_date, 1)
			
 
				-    return date_list
			
 
				-
			
 
				-
			
 
				-def get_today_of_day(day_offset=0):
			
 
				-    return str(datetime.date.today() + datetime.timedelta(days=day_offset))
			
 
				-
			
 
				-
			
 
				-def get_days_of_month(year, month):
			
 
				-    """
			
 
				-    返回天数
			
 
				-    """
			
 
				-
			
 
				-    return calendar.monthrange(year, month)[1]
			
 
				-
			
 
				-
			
 
				-def get_firstday_of_month(date):
			
 
				-    """''
			
 
				-    date format = "YYYY-MM-DD"
			
 
				-    """
			
 
				-
			
 
				-    year, month, day = date.split("-")
			
 
				-    year, month, day = int(year), int(month), int(day)
			
 
				-
			
 
				-    days = "01"
			
 
				-    if int(month) < 10:
			
 
				-        month = "0" + str(int(month))
			
 
				-    arr = (year, month, days)
			
 
				-    return "-".join("%s" % i for i in arr)
			
 
				-
			
 
				-
			
 
				-def get_lastday_of_month(date):
			
 
				-    """''
			
 
				-    get the last day of month
			
 
				-    date format = "YYYY-MM-DD"
			
 
				-    """
			
 
				-    year, month, day = date.split("-")
			
 
				-    year, month, day = int(year), int(month), int(day)
			
 
				-
			
 
				-    days = calendar.monthrange(year, month)[1]
			
 
				-    month = add_zero(month)
			
 
				-    arr = (year, month, days)
			
 
				-    return "-".join("%s" % i for i in arr)
			
 
				-
			
 
				-
			
 
				-def get_firstday_month(month_offset=0):
			
 
				-    """''
			
 
				-    get the first day of month from today
			
 
				-    month_offset is how many months
			
 
				-    """
			
 
				-    (y, m, d) = get_year_month_and_days(month_offset)
			
 
				-    d = "01"
			
 
				-    arr = (y, m, d)
			
 
				-    return "-".join("%s" % i for i in arr)
			
 
				-
			
 
				-
			
 
				-def get_lastday_month(month_offset=0):
			
 
				-    """''
			
 
				-    get the last day of month from today
			
 
				-    month_offset is how many months
			
 
				-    """
			
 
				-    return "-".join("%s" % i for i in get_year_month_and_days(month_offset))
			
 
				-
			
 
				-
			
 
				-def get_last_month(month_offset=0):
			
 
				-    """''
			
 
				-    get the last day of month from today
			
 
				-    month_offset is how many months
			
 
				-    """
			
 
				-    return "-".join("%s" % i for i in get_year_month_and_days(month_offset)[:2])
			
 
				-
			
 
				-
			
 
				-def get_year_month_and_days(month_offset=0):
			
 
				-    """
			
 
				-    @summary:
			
 
				-    ---------
			
 
				-    @param month_offset: 月份偏移量
			
 
				-    ---------
			
 
				-    @result: ('2019', '04', '30')
			
 
				-    """
			
 
				-
			
 
				-    today = datetime.datetime.now()
			
 
				-    year, month = today.year, today.month
			
 
				-
			
 
				-    this_year = int(year)
			
 
				-    this_month = int(month)
			
 
				-    total_month = this_month + month_offset
			
 
				-    if month_offset >= 0:
			
 
				-        if total_month <= 12:
			
 
				-            days = str(get_days_of_month(this_year, total_month))
			
 
				-            total_month = add_zero(total_month)
			
 
				-            return (year, total_month, days)
			
 
				-        else:
			
 
				-            i = total_month // 12
			
 
				-            j = total_month % 12
			
 
				-            if j == 0:
			
 
				-                i -= 1
			
 
				-                j = 12
			
 
				-            this_year += i
			
 
				-            days = str(get_days_of_month(this_year, j))
			
 
				-            j = add_zero(j)
			
 
				-            return (str(this_year), str(j), days)
			
 
				-    else:
			
 
				-        if (total_month > 0) and (total_month < 12):
			
 
				-            days = str(get_days_of_month(this_year, total_month))
			
 
				-            total_month = add_zero(total_month)
			
 
				-            return (year, total_month, days)
			
 
				-        else:
			
 
				-            i = total_month // 12
			
 
				-            j = total_month % 12
			
 
				-            if j == 0:
			
 
				-                i -= 1
			
 
				-                j = 12
			
 
				-            this_year += i
			
 
				-            days = str(get_days_of_month(this_year, j))
			
 
				-            j = add_zero(j)
			
 
				-            return (str(this_year), str(j), days)
			
 
				-
			
 
				-
			
 
				-def add_zero(n):
			
 
				-    return "%02d" % n
			
 
				-
			
 
				-
			
 
				-def get_month(month_offset=0):
			
 
				-    """''
			
 
				-    获取当前日期前后N月的日期
			
 
				-    if month_offset>0, 获取当前日期前N月的日期
			
 
				-    if month_offset<0, 获取当前日期后N月的日期
			
 
				-    date format = "YYYY-MM-DD"
			
 
				-    """
			
 
				-    today = datetime.datetime.now()
			
 
				-    day = add_zero(today.day)
			
 
				-
			
 
				-    (y, m, d) = get_year_month_and_days(month_offset)
			
 
				-    arr = (y, m, d)
			
 
				-    if int(day) < int(d):
			
 
				-        arr = (y, m, day)
			
 
				-    return "-".join("%s" % i for i in arr)
			
 
				-
			
 
				-
			
 
				-@run_safe_model("format_date")
			
 
				-def format_date(date, old_format="", new_format="%Y-%m-%d %H:%M:%S"):
			
 
				-    """
			
 
				-    @summary: 格式化日期格式
			
 
				-    ---------
			
 
				-    @param date: 日期 eg：2017年4月17日 3时27分12秒
			
 
				-    @param old_format: 原来的日期格式 如 '%Y年%m月%d日 %H时%M分%S秒'
			
 
				-        %y 两位数的年份表示（00-99）
			
 
				-        %Y 四位数的年份表示（000-9999）
			
 
				-        %m 月份（01-12）
			
 
				-        %d 月内中的一天（0-31）
			
 
				-        %H 24小时制小时数（0-23）
			
 
				-        %I 12小时制小时数（01-12）
			
 
				-        %M 分钟数（00-59）
			
 
				-        %S 秒（00-59）
			
 
				-    @param new_format: 输出的日期格式
			
 
				-    ---------
			
 
				-    @result: 格式化后的日期，类型为字符串 如2017-4-17 03:27:12
			
 
				-    """
			
 
				-    if not date:
			
 
				-        return ""
			
 
				-
			
 
				-    if not old_format:
			
 
				-        regex = "(\d+)"
			
 
				-        numbers = get_info(date, regex, allow_repeat=True)
			
 
				-        formats = ["%Y", "%m", "%d", "%H", "%M", "%S"]
			
 
				-        old_format = date
			
 
				-        for i, number in enumerate(numbers[:6]):
			
 
				-            if i == 0 and len(number) == 2:  # 年份可能是两位 用小%y
			
 
				-                old_format = old_format.replace(
			
 
				-                    number, formats[i].lower(), 1
			
 
				-                )  # 替换一次 '2017年11月30日 11:49' 防止替换11月时，替换11小时
			
 
				-            else:
			
 
				-                old_format = old_format.replace(number, formats[i], 1)  # 替换一次
			
 
				-
			
 
				-    try:
			
 
				-        date_obj = datetime.datetime.strptime(date, old_format)
			
 
				-        if "T" in date and "Z" in date:
			
 
				-            date_obj += datetime.timedelta(hours=8)
			
 
				-            date_str = date_obj.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-        else:
			
 
				-            date_str = datetime.datetime.strftime(date_obj, new_format)
			
 
				-
			
 
				-    except Exception as e:
			
 
				-        log.error("日期格式化出错，old_format = %s 不符合 %s 格式" % (old_format, date))
			
 
				-        date_str = date
			
 
				-
			
 
				-    return date_str
			
 
				-
			
 
				-
			
 
				-def transform_lower_num(data_str: str):
			
 
				-    num_map = {
			
 
				-        "一": "1",
			
 
				-        "二": "2",
			
 
				-        "三": "3",
			
 
				-        "四": "4",
			
 
				-        "五": "5",
			
 
				-        "六": "6",
			
 
				-        "七": "7",
			
 
				-        "八": "8",
			
 
				-        "九": "9",
			
 
				-        "十": "0",
			
 
				-    }
			
 
				-    pattern = f'[{"|".join(num_map.keys())}|零]'
			
 
				-    res = re.search(pattern, data_str)
			
 
				-    if not res:
			
 
				-        #  如果字符串中没有包含中文数字 不做处理 直接返回
			
 
				-        return data_str
			
 
				-
			
 
				-    data_str = data_str.replace("0", "零")
			
 
				-    for n in num_map:
			
 
				-        data_str = data_str.replace(n, num_map[n])
			
 
				-
			
 
				-    re_data_str = re.findall("\d+", data_str)
			
 
				-    for i in re_data_str:
			
 
				-        if len(i) == 3:
			
 
				-            new_i = i.replace("0", "")
			
 
				-            data_str = data_str.replace(i, new_i, 1)
			
 
				-        elif len(i) == 4:
			
 
				-            new_i = i.replace("10", "")
			
 
				-            data_str = data_str.replace(i, new_i, 1)
			
 
				-        elif len(i) == 2 and int(i) < 10:
			
 
				-            new_i = int(i) + 10
			
 
				-            data_str = data_str.replace(i, str(new_i), 1)
			
 
				-        elif len(i) == 1 and int(i) == 0:
			
 
				-            new_i = int(i) + 10
			
 
				-            data_str = data_str.replace(i, str(new_i), 1)
			
 
				-
			
 
				-    return data_str.replace("零", "0")
			
 
				-
			
 
				-
			
 
				-@run_safe_model("format_time")
			
 
				-def format_time(release_time, date_format="%Y-%m-%d %H:%M:%S"):
			
 
				-    """
			
 
				-    >>> format_time("2个月前")
			
 
				-    '2021-08-15 16:24:21'
			
 
				-    >>> format_time("2月前")
			
 
				-    '2021-08-15 16:24:36'
			
 
				-    """
			
 
				-    release_time = transform_lower_num(release_time)
			
 
				-    release_time = release_time.replace("日", "天").replace("/", "-")
			
 
				-
			
 
				-    if "年前" in release_time:
			
 
				-        years = re.compile("(\d+)\s*年前").findall(release_time)
			
 
				-        years_ago = datetime.datetime.now() - datetime.timedelta(
			
 
				-            days=int(years[0]) * 365
			
 
				-        )
			
 
				-        release_time = years_ago.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    elif "月前" in release_time:
			
 
				-        months = re.compile("(\d+)[\s个]*月前").findall(release_time)
			
 
				-        months_ago = datetime.datetime.now() - datetime.timedelta(
			
 
				-            days=int(months[0]) * 30
			
 
				-        )
			
 
				-        release_time = months_ago.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    elif "周前" in release_time:
			
 
				-        weeks = re.compile("(\d+)\s*周前").findall(release_time)
			
 
				-        weeks_ago = datetime.datetime.now() - datetime.timedelta(days=int(weeks[0]) * 7)
			
 
				-        release_time = weeks_ago.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    elif "天前" in release_time:
			
 
				-        ndays = re.compile("(\d+)\s*天前").findall(release_time)
			
 
				-        days_ago = datetime.datetime.now() - datetime.timedelta(days=int(ndays[0]))
			
 
				-        release_time = days_ago.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    elif "小时前" in release_time:
			
 
				-        nhours = re.compile("(\d+)\s*小时前").findall(release_time)
			
 
				-        hours_ago = datetime.datetime.now() - datetime.timedelta(hours=int(nhours[0]))
			
 
				-        release_time = hours_ago.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    elif "分钟前" in release_time:
			
 
				-        nminutes = re.compile("(\d+)\s*分钟前").findall(release_time)
			
 
				-        minutes_ago = datetime.datetime.now() - datetime.timedelta(
			
 
				-            minutes=int(nminutes[0])
			
 
				-        )
			
 
				-        release_time = minutes_ago.strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    elif "前天" in release_time:
			
 
				-        today = datetime.date.today()
			
 
				-        yesterday = today - datetime.timedelta(days=2)
			
 
				-        release_time = release_time.replace("前天", str(yesterday))
			
 
				-
			
 
				-    elif "昨天" in release_time:
			
 
				-        today = datetime.date.today()
			
 
				-        yesterday = today - datetime.timedelta(days=1)
			
 
				-        release_time = release_time.replace("昨天", str(yesterday))
			
 
				-
			
 
				-    elif "今天" in release_time:
			
 
				-        release_time = release_time.replace("今天", get_current_date("%Y-%m-%d"))
			
 
				-
			
 
				-    elif "刚刚" in release_time:
			
 
				-        release_time = get_current_date()
			
 
				-
			
 
				-    elif re.search("^\d\d:\d\d", release_time):
			
 
				-        release_time = get_current_date("%Y-%m-%d") + " " + release_time
			
 
				-
			
 
				-    elif not re.compile("\d{4}").findall(release_time):
			
 
				-        month = re.compile("\d{1,2}").findall(release_time)
			
 
				-        if month and int(month[0]) <= int(get_current_date("%m")):
			
 
				-            release_time = get_current_date("%Y") + "-" + release_time
			
 
				-        else:
			
 
				-            release_time = str(int(get_current_date("%Y")) - 1) + "-" + release_time
			
 
				-
			
 
				-    # 把日和小时粘在一起的拆开
			
 
				-    template = re.compile("(\d{4}-\d{1,2}-\d{2})(\d{1,2})")
			
 
				-    release_time = re.sub(template, r"\1 \2", release_time)
			
 
				-    release_time = format_date(release_time, new_format=date_format)
			
 
				-
			
 
				-    return release_time
			
 
				-
			
 
				-
			
 
				-def to_date(date_str, date_format="%Y-%m-%d %H:%M:%S"):
			
 
				-    return datetime.datetime.strptime(date_str, date_format)
			
 
				-
			
 
				-
			
 
				-def get_before_date(
			
 
				-    current_date,
			
 
				-    days,
			
 
				-    current_date_format="%Y-%m-%d %H:%M:%S",
			
 
				-    return_date_format="%Y-%m-%d %H:%M:%S",
			
 
				-):
			
 
				-    """
			
 
				-    @summary: 获取之前时间
			
 
				-    ---------
			
 
				-    @param current_date: 当前时间 str类型
			
 
				-    @param days: 时间间隔 -1 表示前一天 1 表示后一天
			
 
				-    @param days: 返回的时间格式
			
 
				-    ---------
			
 
				-    @result: 字符串
			
 
				-    """
			
 
				-
			
 
				-    current_date = to_date(current_date, current_date_format)
			
 
				-    date_obj = current_date + datetime.timedelta(days=days)
			
 
				-    return datetime.datetime.strftime(date_obj, return_date_format)
			
 
				-
			
 
				-
			
 
				-def get_utcnow():
			
 
				-    """utc时间"""
			
 
				-    return datetime.datetime.utcnow()
			
 
				-
			
 
				-
			
 
				-def delay_time(sleep_time=60):
			
 
				-    """
			
 
				-    @summary: 睡眠  默认1分钟
			
 
				-    ---------
			
 
				-    @param sleep_time: 以秒为单位
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    time.sleep(sleep_time)
			
 
				-
			
 
				-
			
 
				-def format_seconds(seconds):
			
 
				-    """
			
 
				-    @summary: 将秒转为时分秒
			
 
				-    ---------
			
 
				-    @param seconds:
			
 
				-    ---------
			
 
				-    @result: 2天3小时2分49秒
			
 
				-    """
			
 
				-
			
 
				-    seconds = int(seconds + 0.5)  # 向上取整
			
 
				-
			
 
				-    m, s = divmod(seconds, 60)
			
 
				-    h, m = divmod(m, 60)
			
 
				-    d, h = divmod(h, 24)
			
 
				-
			
 
				-    times = ""
			
 
				-    if d:
			
 
				-        times += "{}天".format(d)
			
 
				-    if h:
			
 
				-        times += "{}小时".format(h)
			
 
				-    if m:
			
 
				-        times += "{}分".format(m)
			
 
				-    if s:
			
 
				-        times += "{}秒".format(s)
			
 
				-
			
 
				-    return times
			
 
				-
			
 
				-
			
 
				-################################################
			
 
				-def get_md5(*args):
			
 
				-    """
			
 
				-    @summary: 获取唯一的32位md5
			
 
				-    ---------
			
 
				-    @param *args: 参与联合去重的值
			
 
				-    ---------
			
 
				-    @result: 7c8684bcbdfcea6697650aa53d7b1405
			
 
				-    """
			
 
				-
			
 
				-    m = hashlib.md5()
			
 
				-    for arg in args:
			
 
				-        m.update(str(arg).encode())
			
 
				-
			
 
				-    return m.hexdigest()
			
 
				-
			
 
				-
			
 
				-def get_sha1(*args):
			
 
				-    """
			
 
				-    @summary: 获取唯一的40位值， 用于获取唯一的id
			
 
				-    ---------
			
 
				-    @param *args: 参与联合去重的值
			
 
				-    ---------
			
 
				-    @result: ba4868b3f277c8e387b55d9e3d0be7c045cdd89e
			
 
				-    """
			
 
				-
			
 
				-    sha1 = hashlib.sha1()
			
 
				-    for arg in args:
			
 
				-        sha1.update(str(arg).encode())
			
 
				-    return sha1.hexdigest()  # 40位
			
 
				-
			
 
				-
			
 
				-def get_base64(secret, message):
			
 
				-    """
			
 
				-    @summary: 数字证书签名算法是："HMAC-SHA256"
			
 
				-              参考：https://www.jokecamp.com/blog/examples-of-creating-base64-hashes-using-hmac-sha256-in-different-languages/
			
 
				-    ---------
			
 
				-    @param secret: 秘钥
			
 
				-    @param message: 消息
			
 
				-    ---------
			
 
				-    @result: 签名输出类型是："base64"
			
 
				-    """
			
 
				-
			
 
				-    import hashlib
			
 
				-    import hmac
			
 
				-    import base64
			
 
				-
			
 
				-    message = bytes(message, "utf-8")
			
 
				-    secret = bytes(secret, "utf-8")
			
 
				-
			
 
				-    signature = base64.b64encode(
			
 
				-        hmac.new(secret, message, digestmod=hashlib.sha256).digest()
			
 
				-    ).decode("utf8")
			
 
				-    return signature
			
 
				-
			
 
				-
			
 
				-def get_uuid(key1="", key2=""):
			
 
				-    """
			
 
				-    @summary: 计算uuid值
			
 
				-    可用于将两个字符串组成唯一的值。如可将域名和新闻标题组成uuid，形成联合索引
			
 
				-    ---------
			
 
				-    @param key1:str
			
 
				-    @param key2:str
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    uuid_object = ""
			
 
				-
			
 
				-    if not key1 and not key2:
			
 
				-        uuid_object = uuid.uuid1()
			
 
				-    else:
			
 
				-        hash = md5(bytes(key1, "utf-8") + bytes(key2, "utf-8")).digest()
			
 
				-        uuid_object = uuid.UUID(bytes=hash[:16], version=3)
			
 
				-
			
 
				-    return str(uuid_object)
			
 
				-
			
 
				-
			
 
				-def get_hash(text):
			
 
				-    return hash(text)
			
 
				-
			
 
				-
			
 
				-##################################################
			
 
				-
			
 
				-
			
 
				-def cut_string(text, length):
			
 
				-    """
			
 
				-    @summary: 将文本按指定长度拆分
			
 
				-    ---------
			
 
				-    @param text: 文本
			
 
				-    @param length: 拆分长度
			
 
				-    ---------
			
 
				-    @result: 返回按指定长度拆分后形成的list
			
 
				-    """
			
 
				-
			
 
				-    text_list = re.findall(".{%d}" % length, text, re.S)
			
 
				-    leave_text = text[len(text_list) * length :]
			
 
				-    if leave_text:
			
 
				-        text_list.append(leave_text)
			
 
				-
			
 
				-    return text_list
			
 
				-
			
 
				-
			
 
				-def get_random_string(length=1):
			
 
				-    random_string = "".join(random.sample(string.ascii_letters + string.digits, length))
			
 
				-    return random_string
			
 
				-
			
 
				-
			
 
				-def get_random_password(length=8, special_characters=""):
			
 
				-    """
			
 
				-    @summary: 创建随机密码 默认长度为8，包含大写字母、小写字母、数字
			
 
				-    ---------
			
 
				-    @param length: 密码长度 默认8
			
 
				-    @param special_characters: 特殊字符
			
 
				-    ---------
			
 
				-    @result: 指定长度的密码
			
 
				-    """
			
 
				-
			
 
				-    while True:
			
 
				-        random_password = "".join(
			
 
				-            random.sample(
			
 
				-                string.ascii_letters + string.digits + special_characters, length
			
 
				-            )
			
 
				-        )
			
 
				-        if (
			
 
				-            re.search("[0-9]", random_password)
			
 
				-            and re.search("[A-Z]", random_password)
			
 
				-            and re.search("[a-z]", random_password)
			
 
				-        ):
			
 
				-            if not special_characters:
			
 
				-                break
			
 
				-            elif set(random_password).intersection(special_characters):
			
 
				-                break
			
 
				-
			
 
				-    return random_password
			
 
				-
			
 
				-
			
 
				-def get_random_email(length=None, email_types: list = None, special_characters=""):
			
 
				-    """
			
 
				-    随机生成邮箱
			
 
				-    :param length: 邮箱长度
			
 
				-    :param email_types: 邮箱类型
			
 
				-    :param special_characters: 特殊字符
			
 
				-    :return:
			
 
				-    """
			
 
				-    if not length:
			
 
				-        length = random.randint(4, 12)
			
 
				-    if not email_types:
			
 
				-        email_types = [
			
 
				-            "qq.com",
			
 
				-            "163.com",
			
 
				-            "gmail.com",
			
 
				-            "yahoo.com",
			
 
				-            "hotmail.com",
			
 
				-            "yeah.net",
			
 
				-            "126.com",
			
 
				-            "139.com",
			
 
				-            "sohu.com",
			
 
				-        ]
			
 
				-
			
 
				-    email_body = get_random_password(length, special_characters)
			
 
				-    email_type = random.choice(email_types)
			
 
				-
			
 
				-    email = email_body + "@" + email_type
			
 
				-    return email
			
 
				-
			
 
				-
			
 
				-#################################
			
 
				-
			
 
				-
			
 
				-def dumps_obj(obj):
			
 
				-    return pickle.dumps(obj)
			
 
				-
			
 
				-
			
 
				-def loads_obj(obj_str):
			
 
				-    return pickle.loads(obj_str)
			
 
				-
			
 
				-
			
 
				-def get_method(obj, name):
			
 
				-    name = str(name)
			
 
				-    try:
			
 
				-        return getattr(obj, name)
			
 
				-    except AttributeError:
			
 
				-        log.error("Method %r not found in: %s" % (name, obj))
			
 
				-        return None
			
 
				-
			
 
				-
			
 
				-def witch_workspace(project_path):
			
 
				-    """
			
 
				-    @summary:
			
 
				-    ---------
			
 
				-    @param project_path:
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    os.chdir(project_path)  # 切换工作路经
			
 
				-
			
 
				-
			
 
				-############### 数据库相关 #######################
			
 
				-def format_sql_value(value):
			
 
				-    if isinstance(value, str):
			
 
				-        value = value.strip()
			
 
				-
			
 
				-    elif isinstance(value, (list, dict)):
			
 
				-        value = dumps_json(value, indent=None)
			
 
				-
			
 
				-    elif isinstance(value, (datetime.date, datetime.time)):
			
 
				-        value = str(value)
			
 
				-
			
 
				-    elif isinstance(value, bool):
			
 
				-        value = int(value)
			
 
				-
			
 
				-    return value
			
 
				-
			
 
				-
			
 
				-def list2str(datas):
			
 
				-    """
			
 
				-    列表转字符串
			
 
				-    :param datas: [1, 2]
			
 
				-    :return: (1, 2)
			
 
				-    """
			
 
				-    data_str = str(tuple(datas))
			
 
				-    data_str = re.sub(",\)$", ")", data_str)
			
 
				-    return data_str
			
 
				-
			
 
				-
			
 
				-def make_insert_sql(
			
 
				-    table, data, auto_update=False, update_columns=(), insert_ignore=False
			
 
				-):
			
 
				-    """
			
 
				-    @summary: 适用于mysql， oracle数据库时间需要to_date 处理（TODO）
			
 
				-    ---------
			
 
				-    @param table:
			
 
				-    @param data: 表数据 json格式
			
 
				-    @param auto_update: 使用的是replace into， 为完全覆盖已存在的数据
			
 
				-    @param update_columns: 需要更新的列 默认全部，当指定值时，auto_update设置无效，当duplicate key冲突时更新指定的列
			
 
				-    @param insert_ignore: 数据存在忽略
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    keys = ["`{}`".format(key) for key in data.keys()]
			
 
				-    keys = list2str(keys).replace("'", "")
			
 
				-
			
 
				-    values = [format_sql_value(value) for value in data.values()]
			
 
				-    values = list2str(values)
			
 
				-
			
 
				-    if update_columns:
			
 
				-        if not isinstance(update_columns, (tuple, list)):
			
 
				-            update_columns = [update_columns]
			
 
				-        update_columns_ = ", ".join(
			
 
				-            ["{key}=values({key})".format(key=key) for key in update_columns]
			
 
				-        )
			
 
				-        sql = (
			
 
				-            "insert%s into `{table}` {keys} values {values} on duplicate key update %s"
			
 
				-            % (" ignore" if insert_ignore else "", update_columns_)
			
 
				-        )
			
 
				-
			
 
				-    elif auto_update:
			
 
				-        sql = "replace into `{table}` {keys} values {values}"
			
 
				-    else:
			
 
				-        sql = "insert%s into `{table}` {keys} values {values}" % (
			
 
				-            " ignore" if insert_ignore else ""
			
 
				-        )
			
 
				-
			
 
				-    sql = sql.format(table=table, keys=keys, values=values).replace("None", "null")
			
 
				-    return sql
			
 
				-
			
 
				-
			
 
				-def make_update_sql(table, data, condition):
			
 
				-    """
			
 
				-    @summary: 适用于mysql， oracle数据库时间需要to_date 处理（TODO）
			
 
				-    ---------
			
 
				-    @param table:
			
 
				-    @param data: 表数据 json格式
			
 
				-    @param condition: where 条件
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-    key_values = []
			
 
				-
			
 
				-    for key, value in data.items():
			
 
				-        value = format_sql_value(value)
			
 
				-        if isinstance(value, str):
			
 
				-            key_values.append("`{}`={}".format(key, repr(value)))
			
 
				-        elif value is None:
			
 
				-            key_values.append("`{}`={}".format(key, "null"))
			
 
				-        else:
			
 
				-            key_values.append("`{}`={}".format(key, value))
			
 
				-
			
 
				-    key_values = ", ".join(key_values)
			
 
				-
			
 
				-    sql = "update `{table}` set {key_values} where {condition}"
			
 
				-    sql = sql.format(table=table, key_values=key_values, condition=condition)
			
 
				-    return sql
			
 
				-
			
 
				-
			
 
				-def make_batch_sql(
			
 
				-    table, datas, auto_update=False, update_columns=(), update_columns_value=()
			
 
				-):
			
 
				-    """
			
 
				-    @summary: 生产批量的sql
			
 
				-    ---------
			
 
				-    @param table:
			
 
				-    @param datas: 表数据 [{...}]
			
 
				-    @param auto_update: 使用的是replace into， 为完全覆盖已存在的数据
			
 
				-    @param update_columns: 需要更新的列 默认全部，当指定值时，auto_update设置无效，当duplicate key冲突时更新指定的列
			
 
				-    @param update_columns_value: 需要更新的列的值 默认为datas里边对应的值, 注意 如果值为字符串类型 需要主动加单引号， 如 update_columns_value=("'test'",)
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-    if not datas:
			
 
				-        return
			
 
				-
			
 
				-    keys = list(datas[0].keys())
			
 
				-    values_placeholder = ["%s"] * len(keys)
			
 
				-
			
 
				-    values = []
			
 
				-    for data in datas:
			
 
				-        value = []
			
 
				-        for key in keys:
			
 
				-            current_data = data.get(key)
			
 
				-            current_data = format_sql_value(current_data)
			
 
				-
			
 
				-            value.append(current_data)
			
 
				-
			
 
				-        values.append(value)
			
 
				-
			
 
				-    keys = ["`{}`".format(key) for key in keys]
			
 
				-    keys = list2str(keys).replace("'", "")
			
 
				-
			
 
				-    values_placeholder = list2str(values_placeholder).replace("'", "")
			
 
				-
			
 
				-    if update_columns:
			
 
				-        if not isinstance(update_columns, (tuple, list)):
			
 
				-            update_columns = [update_columns]
			
 
				-        if update_columns_value:
			
 
				-            update_columns_ = ", ".join(
			
 
				-                [
			
 
				-                    "`{key}`={value}".format(key=key, value=value)
			
 
				-                    for key, value in zip(update_columns, update_columns_value)
			
 
				-                ]
			
 
				-            )
			
 
				-        else:
			
 
				-            update_columns_ = ", ".join(
			
 
				-                ["`{key}`=values(`{key}`)".format(key=key) for key in update_columns]
			
 
				-            )
			
 
				-        sql = "insert into `{table}` {keys} values {values_placeholder} on duplicate key update {update_columns}".format(
			
 
				-            table=table,
			
 
				-            keys=keys,
			
 
				-            values_placeholder=values_placeholder,
			
 
				-            update_columns=update_columns_,
			
 
				-        )
			
 
				-    elif auto_update:
			
 
				-        sql = "replace into `{table}` {keys} values {values_placeholder}".format(
			
 
				-            table=table, keys=keys, values_placeholder=values_placeholder
			
 
				-        )
			
 
				-    else:
			
 
				-        sql = "insert ignore into `{table}` {keys} values {values_placeholder}".format(
			
 
				-            table=table, keys=keys, values_placeholder=values_placeholder
			
 
				-        )
			
 
				-
			
 
				-    return sql, values
			
 
				-
			
 
				-
			
 
				-############### json相关 #######################
			
 
				-
			
 
				-
			
 
				-def key2underline(key: str, strict=True):
			
 
				-    """
			
 
				-    >>> key2underline("HelloWord")
			
 
				-    'hello_word'
			
 
				-    >>> key2underline("SHData", strict=True)
			
 
				-    's_h_data'
			
 
				-    >>> key2underline("SHData", strict=False)
			
 
				-    'sh_data'
			
 
				-    >>> key2underline("SHDataHi", strict=False)
			
 
				-    'sh_data_hi'
			
 
				-    >>> key2underline("SHDataHi", strict=True)
			
 
				-    's_h_data_hi'
			
 
				-    >>> key2underline("dataHi", strict=True)
			
 
				-    'data_hi'
			
 
				-    """
			
 
				-    regex = "[A-Z]*" if not strict else "[A-Z]"
			
 
				-    capitals = re.findall(regex, key)
			
 
				-
			
 
				-    if capitals:
			
 
				-        for capital in capitals:
			
 
				-            if not capital:
			
 
				-                continue
			
 
				-            if key.startswith(capital):
			
 
				-                if len(capital) > 1:
			
 
				-                    key = key.replace(
			
 
				-                        capital, capital[:-1].lower() + "_" + capital[-1].lower(), 1
			
 
				-                    )
			
 
				-                else:
			
 
				-                    key = key.replace(capital, capital.lower(), 1)
			
 
				-            else:
			
 
				-                if len(capital) > 1:
			
 
				-                    key = key.replace(capital, "_" + capital.lower() + "_", 1)
			
 
				-                else:
			
 
				-                    key = key.replace(capital, "_" + capital.lower(), 1)
			
 
				-
			
 
				-    return key.strip("_")
			
 
				-
			
 
				-
			
 
				-def key2hump(key):
			
 
				-    """
			
 
				-    下划线试变成首字母大写
			
 
				-    """
			
 
				-    return key.title().replace("_", "")
			
 
				-
			
 
				-
			
 
				-def format_json_key(json_data):
			
 
				-    json_data_correct = {}
			
 
				-    for key, value in json_data.items():
			
 
				-        key = key2underline(key)
			
 
				-        json_data_correct[key] = value
			
 
				-
			
 
				-    return json_data_correct
			
 
				-
			
 
				-
			
 
				-def quick_to_json(text):
			
 
				-    """
			
 
				-    @summary: 可快速将浏览器上的header转为json格式
			
 
				-    ---------
			
 
				-    @param text:
			
 
				-    ---------
			
 
				-    @result:
			
 
				-    """
			
 
				-
			
 
				-    contents = text.split("\n")
			
 
				-    json = {}
			
 
				-    for content in contents:
			
 
				-        if content == "\n":
			
 
				-            continue
			
 
				-
			
 
				-        content = content.strip()
			
 
				-        regex = ["(:?.*?):(.*)", "(.*?):? +(.*)", "([^:]*)"]
			
 
				-
			
 
				-        result = get_info(content, regex)
			
 
				-        result = result[0] if isinstance(result[0], tuple) else result
			
 
				-        try:
			
 
				-            json[result[0]] = eval(result[1].strip())
			
 
				-        except:
			
 
				-            json[result[0]] = result[1].strip()
			
 
				-
			
 
				-    return json
			
 
				-
			
 
				-
			
 
				-##############################
			
 
				-
			
 
				-
			
 
				-def print_pretty(object):
			
 
				-    pprint(object)
			
 
				-
			
 
				-
			
 
				-def print_params2json(url):
			
 
				-    params_json = {}
			
 
				-    params = url.split("?")[-1].split("&")
			
 
				-    for param in params:
			
 
				-        key_value = param.split("=", 1)
			
 
				-        params_json[key_value[0]] = key_value[1]
			
 
				-
			
 
				-    print(dumps_json(params_json))
			
 
				-
			
 
				-
			
 
				-def print_cookie2json(cookie_str_or_list):
			
 
				-    if isinstance(cookie_str_or_list, str):
			
 
				-        cookie_json = {}
			
 
				-        cookies = cookie_str_or_list.split("; ")
			
 
				-        for cookie in cookies:
			
 
				-            name, value = cookie.split("=")
			
 
				-            cookie_json[name] = value
			
 
				-    else:
			
 
				-        cookie_json = get_cookies_from_selenium_cookie(cookie_str_or_list)
			
 
				-
			
 
				-    print(dumps_json(cookie_json))
			
 
				-
			
 
				-
			
 
				-###############################
			
 
				-
			
 
				-
			
 
				-def flatten(x):
			
 
				-    """flatten(sequence) -> list
			
 
				-    Returns a single, flat list which contains all elements retrieved
			
 
				-    from the sequence and all recursively contained sub-sequences
			
 
				-    (iterables).
			
 
				-    Examples:
			
 
				-    >>> [1, 2, [3,4], (5,6)]
			
 
				-    [1, 2, [3, 4], (5, 6)]
			
 
				-    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])
			
 
				-    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]
			
 
				-    >>> flatten(["foo", "bar"])
			
 
				-    ['foo', 'bar']
			
 
				-    >>> flatten(["foo", ["baz", 42], "bar"])
			
 
				-    ['foo', 'baz', 42, 'bar']
			
 
				-    """
			
 
				-    return list(iflatten(x))
			
 
				-
			
 
				-
			
 
				-def iflatten(x):
			
 
				-    """iflatten(sequence) -> iterator
			
 
				-    Similar to ``.flatten()``, but returns iterator instead"""
			
 
				-    for el in x:
			
 
				-        if _is_listlike(el):
			
 
				-            for el_ in flatten(el):
			
 
				-                yield el_
			
 
				-        else:
			
 
				-            yield el
			
 
				-
			
 
				-
			
 
				-def _is_listlike(x):
			
 
				-    """
			
 
				-    >>> _is_listlike("foo")
			
 
				-    False
			
 
				-    >>> _is_listlike(5)
			
 
				-    False
			
 
				-    >>> _is_listlike(b"foo")
			
 
				-    False
			
 
				-    >>> _is_listlike([b"foo"])
			
 
				-    True
			
 
				-    >>> _is_listlike((b"foo",))
			
 
				-    True
			
 
				-    >>> _is_listlike({})
			
 
				-    True
			
 
				-    >>> _is_listlike(set())
			
 
				-    True
			
 
				-    >>> _is_listlike((x for x in range(3)))
			
 
				-    True
			
 
				-    >>> _is_listlike(six.moves.xrange(5))
			
 
				-    True
			
 
				-    """
			
 
				-    return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))
			
 
				-
			
 
				-
			
 
				-###################
			
 
				-
			
 
				-
			
 
				-def re_def_supper_class(obj, supper_class):
			
 
				-    """
			
 
				-    重新定义父类
			
 
				-    @param obj: 类 如 class A: 则obj为A 或者 A的实例 a.__class__
			
 
				-    @param supper_class: 父类
			
 
				-    @return:
			
 
				-    """
			
 
				-    obj.__bases__ = (supper_class,)
			
 
				-
			
 
				-
			
 
				-###################
			
 
				-freq_limit_record = {}
			
 
				-
			
 
				-
			
 
				-def reach_freq_limit(rate_limit, *key):
			
 
				-    """
			
 
				-    频率限制
			
 
				-    :param rate_limit: 限制时间 单位秒
			
 
				-    :param key: 频率限制的key
			
 
				-    :return: True / False
			
 
				-    """
			
 
				-    if rate_limit == 0:
			
 
				-        return False
			
 
				-
			
 
				-    msg_md5 = get_md5(*key)
			
 
				-    key = "rate_limit:{}".format(msg_md5)
			
 
				-    try:
			
 
				-        if get_redisdb().get(key):
			
 
				-            return True
			
 
				-
			
 
				-        get_redisdb().set(key, time.time(), ex=rate_limit)
			
 
				-    except redis.exceptions.ConnectionError as e:
			
 
				-        # 使用内存做频率限制
			
 
				-        global freq_limit_record
			
 
				-
			
 
				-        if key not in freq_limit_record:
			
 
				-            freq_limit_record[key] = time.time()
			
 
				-            return False
			
 
				-
			
 
				-        if time.time() - freq_limit_record.get(key) < rate_limit:
			
 
				-            return True
			
 
				-        else:
			
 
				-            freq_limit_record[key] = time.time()
			
 
				-
			
 
				-    return False
			
 
				-
			
 
				-
			
 
				-def wechat_warning(
			
 
				-    message,
			
 
				-    message_prefix=None,
			
 
				-    rate_limit=None,
			
 
				-    url=None,
			
 
				-    user_phone=None,
			
 
				-    all_users: bool = None,
			
 
				-):
			
 
				-    """企业微信报警"""
			
 
				-
			
 
				-    # 为了加载最新的配置
			
 
				-    rate_limit = rate_limit if rate_limit is not None else setting.WARNING_INTERVAL
			
 
				-    url = url or setting.WECHAT_WARNING_URL
			
 
				-    user_phone = user_phone or setting.WECHAT_WARNING_PHONE
			
 
				-    all_users = all_users if all_users is not None else setting.WECHAT_WARNING_ALL
			
 
				-
			
 
				-    if isinstance(user_phone, str):
			
 
				-        user_phone = [user_phone] if user_phone else []
			
 
				-
			
 
				-    if all_users is True or not user_phone:
			
 
				-        user_phone = ["@all"]
			
 
				-
			
 
				-    if not all([url, message]):
			
 
				-        return
			
 
				-
			
 
				-    if reach_freq_limit(rate_limit, url, user_phone, message_prefix or message):
			
 
				-        log.info("报警时间间隔过短，此次报警忽略。 内容 {}".format(message))
			
 
				-        return
			
 
				-
			
 
				-    data = {
			
 
				-        "msgtype": "text",
			
 
				-        "text": {"content": message, "mentioned_mobile_list": user_phone},
			
 
				-    }
			
 
				-
			
 
				-    headers = {"Content-Type": "application/json"}
			
 
				-
			
 
				-    try:
			
 
				-        response = requests.post(
			
 
				-            url, headers=headers, data=json.dumps(data).encode("utf8")
			
 
				-        )
			
 
				-        result = response.json()
			
 
				-        response.close()
			
 
				-        if result.get("errcode") == 0:
			
 
				-            return True
			
 
				-        else:
			
 
				-            raise Exception(result.get("errmsg"))
			
 
				-    except Exception as e:
			
 
				-        log.error("报警发送失败。 报警内容 {}, error: {}".format(message, e))
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-###################
			
 
				-
			
 
				-
			
 
				-def make_item(cls, data: dict):
			
 
				-    """提供Item类与原数据，快速构建Item实例
			
 
				-    :param cls: Item类
			
 
				-    :param data: 字典格式的数据
			
 
				-    """
			
 
				-    item = cls()
			
 
				-    for key, val in data.items():
			
 
				-        setattr(item, key, val)
			
 
				-    return item
			
 
				-
			
 
				-
			
 
				-###################
			
 
				-
			
 
				-
			
 
				-def aio_wrap(loop=None, executor=None):
			
 
				-    """
			
 
				-    wrap a normal sync version of a function to an async version
			
 
				-    """
			
 
				-    outer_loop = loop
			
 
				-    outer_executor = executor
			
 
				-
			
 
				-    def wrap(fn):
			
 
				-        @wraps(fn)
			
 
				-        async def run(*args, loop=None, executor=None, **kwargs):
			
 
				-            if loop is None:
			
 
				-                if outer_loop is None:
			
 
				-                    loop = asyncio.get_event_loop()
			
 
				-                else:
			
 
				-                    loop = outer_loop
			
 
				-            if executor is None:
			
 
				-                executor = outer_executor
			
 
				-            pfunc = partial(fn, *args, **kwargs)
			
 
				-            return await loop.run_in_executor(executor, pfunc)
			
 
				-
			
 
				-        return run
			
 
				-
			
 
				-    return wrap
			
 
				-
			
 
				-
			
 
				-######### number ##########
			
 
				-
			
 
				-
			
 
				-def ensure_int(n):
			
 
				-    """
			
 
				-    >>> ensure_int(None)
			
 
				-    0
			
 
				-    >>> ensure_int(False)
			
 
				-    0
			
 
				-    >>> ensure_int(12)
			
 
				-    12
			
 
				-    >>> ensure_int("72")
			
 
				-    72
			
 
				-    >>> ensure_int('')
			
 
				-    0
			
 
				-    >>> ensure_int('1')
			
 
				-    1
			
 
				-    """
			
 
				-    if not n:
			
 
				-        return 0
			
 
				-    return int(n)
			
 
				-
			
 
				-
			
 
				-def ensure_float(n):
			
 
				-    """
			
 
				-    >>> ensure_float(None)
			
 
				-    0.0
			
 
				-    >>> ensure_float(False)
			
 
				-    0.0
			
 
				-    >>> ensure_float(12)
			
 
				-    12.0
			
 
				-    >>> ensure_float("72")
			
 
				-    72.0
			
 
				-    """
			
 
				-    if not n:
			
 
				-        return 0.0
			
 
				-    return float(n)
			
 
				-
			
 
				-
			
 
				-def ensure_int64(n):
			
 
				-    """
			
 
				-    >>> ensure_int64(None)
			
 
				-    0
			
 
				-    >>> ensure_float(False)
			
 
				-    0
			
 
				-    >>> ensure_float(12)
			
 
				-    12
			
 
				-    >>> ensure_float("72")
			
 
				-    72
			
 
				-    """
			
 
				-    if not n:
			
 
				-        return bson.int64.Int64(0)
			
 
				-    return bson.int64.Int64(n)
			
 
				-
			
 
				-
			
 
				-def import_cls(cls_info):
			
 
				-    module, class_name = cls_info.rsplit(".", 1)
			
 
				-    cls = importlib.import_module(module).__getattribute__(class_name)
			
 
				-    return cls
			
--- a/A数据处理/site_monitor/utils/webdriver/__init__.py
+++ b/A数据处理/site_monitor/utils/webdriver/__init__.py
@@ -1,12 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2022/9/7 4:39 PM
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-from .playwright_driver import PlaywrightDriver
			
 
				-from .webdirver import InterceptRequest, InterceptResponse
			
 
				-from .webdriver_pool import WebDriverPool
			
--- a/A数据处理/site_monitor/utils/webdriver/playwright_driver.py
+++ b/A数据处理/site_monitor/utils/webdriver/playwright_driver.py
@@ -1,300 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2022/9/7 4:11 PM
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import json
			
 
				-import os
			
 
				-import re
			
 
				-from collections import defaultdict
			
 
				-from typing import Union, List
			
 
				-
			
 
				-try:
			
 
				-    from typing import Literal  # python >= 3.8
			
 
				-except ImportError:  # python <3.8
			
 
				-    from typing_extensions import Literal
			
 
				-
			
 
				-
			
 
				-from playwright.sync_api import Page, BrowserContext, ViewportSize, ProxySettings
			
 
				-from playwright.sync_api import Playwright, Browser
			
 
				-from playwright.sync_api import Response
			
 
				-from playwright.sync_api import sync_playwright
			
 
				-
			
 
				-from utils import tools
			
 
				-from utils.log import logger as log
			
 
				-from utils.webdriver.webdirver import *
			
 
				-
			
 
				-
			
 
				-class PlaywrightDriver(WebDriver):
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        *,
			
 
				-        page_on_event_callback: dict = None,
			
 
				-        storage_state_path: str = None,
			
 
				-        driver_type: Literal["chromium", "firefox", "webkit"] = "webkit",
			
 
				-        url_regexes: list = None,
			
 
				-        save_all: bool = False,
			
 
				-        **kwargs
			
 
				-    ):
			
 
				-        """
			
 
				-
			
 
				-        Args:
			
 
				-            page_on_event_callback: page.on() 事件的回调 如 page_on_event_callback={"dialog": lambda dialog: dialog.accept()}
			
 
				-            storage_state_path: 保存浏览器状态的路径
			
 
				-            driver_type: 浏览器类型 chromium, firefox, webkit
			
 
				-            url_regexes: 拦截接口，支持正则，数组类型
			
 
				-            save_all: 是否保存所有拦截的接口, 默认只保存最后一个
			
 
				-            **kwargs:
			
 
				-        """
			
 
				-        super(PlaywrightDriver, self).__init__(**kwargs)
			
 
				-        self.driver: Playwright = None
			
 
				-        self.browser: Browser = None
			
 
				-        self.context: BrowserContext = None
			
 
				-        self.page: Page = None
			
 
				-        self.url = None
			
 
				-        self.storage_state_path = storage_state_path
			
 
				-
			
 
				-        self._driver_type = driver_type
			
 
				-        self._page_on_event_callback = page_on_event_callback
			
 
				-        self._url_regexes = url_regexes
			
 
				-        self._save_all = save_all
			
 
				-
			
 
				-        if self._save_all and self._url_regexes:
			
 
				-            log.warning(
			
 
				-                "获取完拦截的数据后, 请主动调用PlaywrightDriver的clear_cache()方法清空拦截的数据，否则数据会一直累加，导致内存溢出"
			
 
				-            )
			
 
				-            self._cache_data = defaultdict(list)
			
 
				-        else:
			
 
				-            self._cache_data = {}
			
 
				-
			
 
				-        self._setup()
			
 
				-
			
 
				-    def _setup(self):
			
 
				-        # 处理参数
			
 
				-        if self._proxy:
			
 
				-            proxy = self._proxy() if callable(self._proxy) else self._proxy
			
 
				-            proxy = self.format_context_proxy(proxy)
			
 
				-        else:
			
 
				-            proxy = None
			
 
				-
			
 
				-        user_agent = (
			
 
				-            self._user_agent() if callable(self._user_agent) else self._user_agent
			
 
				-        )
			
 
				-
			
 
				-        view_size = ViewportSize(
			
 
				-            width=self._window_size[0], height=self._window_size[1]
			
 
				-        )
			
 
				-
			
 
				-        # 初始化浏览器对象
			
 
				-        self.driver = sync_playwright().start()
			
 
				-        self.browser = getattr(self.driver, self._driver_type).launch(
			
 
				-            headless=self._headless,
			
 
				-            # args=["--no-sandbox"],
			
 
				-            proxy=proxy,
			
 
				-            executable_path=self._executable_path,
			
 
				-            downloads_path=self._download_path,
			
 
				-        )
			
 
				-
			
 
				-        if self.storage_state_path and os.path.exists(self.storage_state_path):
			
 
				-            self.context = self.browser.new_context(
			
 
				-                user_agent=user_agent,
			
 
				-                screen=view_size,
			
 
				-                viewport=view_size,
			
 
				-                proxy=proxy,
			
 
				-                storage_state=self.storage_state_path,
			
 
				-                ignore_https_errors=True
			
 
				-            )
			
 
				-        else:
			
 
				-            self.context = self.browser.new_context(
			
 
				-                user_agent=user_agent,
			
 
				-                screen=view_size,
			
 
				-                viewport=view_size,
			
 
				-                proxy=proxy,
			
 
				-                ignore_https_errors=True
			
 
				-            )
			
 
				-
			
 
				-        if self._use_stealth_js:
			
 
				-            path = os.path.join(os.path.dirname(__file__), "../js/stealth.min.js")
			
 
				-            self.context.add_init_script(path=path)
			
 
				-
			
 
				-        self.page = self.context.new_page()
			
 
				-        self.page.set_default_timeout(self._timeout * 1000)
			
 
				-
			
 
				-        if self._page_on_event_callback:
			
 
				-            for event, callback in self._page_on_event_callback.items():
			
 
				-                self.page.on(event, callback)
			
 
				-
			
 
				-        if self._url_regexes:
			
 
				-            self.page.on("response", self.on_response)
			
 
				-
			
 
				-    def __enter__(self):
			
 
				-        return self
			
 
				-
			
 
				-    def __exit__(self, exc_type, exc_val, exc_tb):
			
 
				-        if exc_val:
			
 
				-            log.error(exc_val)
			
 
				-
			
 
				-        self.quit()
			
 
				-        return True
			
 
				-
			
 
				-    def format_context_proxy(self, proxy) -> ProxySettings:
			
 
				-        """
			
 
				-        Args:
			
 
				-            proxy: username:password@ip:port / ip:port
			
 
				-        Returns:
			
 
				-            {
			
 
				-                "server": "ip:port"
			
 
				-                "username": username,
			
 
				-                "password": password,
			
 
				-            }
			
 
				-            server: http://ip:port or socks5://ip:port. Short form ip:port is considered an HTTP proxy.
			
 
				-        """
			
 
				-
			
 
				-        if "@" in proxy:
			
 
				-            certification, _proxy = proxy.split("@")
			
 
				-            username, password = certification.split(":")
			
 
				-
			
 
				-            context_proxy = ProxySettings(
			
 
				-                server=_proxy,
			
 
				-                username=username,
			
 
				-                password=password,
			
 
				-            )
			
 
				-        else:
			
 
				-            context_proxy = ProxySettings(server=proxy)
			
 
				-
			
 
				-        return context_proxy
			
 
				-
			
 
				-    def save_storage_stage(self):
			
 
				-        if self.storage_state_path:
			
 
				-            os.makedirs(os.path.dirname(self.storage_state_path), exist_ok=True)
			
 
				-            self.context.storage_state(path=self.storage_state_path)
			
 
				-
			
 
				-    def quit(self):
			
 
				-        self.page.close()
			
 
				-        self.context.close()
			
 
				-        self.browser.close()
			
 
				-        self.driver.stop()
			
 
				-
			
 
				-    @property
			
 
				-    def domain(self):
			
 
				-        return tools.get_domain(self.url or self.page.url)
			
 
				-
			
 
				-    @property
			
 
				-    def cookies(self):
			
 
				-        cookies_json = {}
			
 
				-        for cookie in self.page.context.cookies():
			
 
				-            cookies_json[cookie["name"]] = cookie["value"]
			
 
				-
			
 
				-        return cookies_json
			
 
				-
			
 
				-    @cookies.setter
			
 
				-    def cookies(self, val: Union[dict, List[dict]]):
			
 
				-        """
			
 
				-        设置cookie
			
 
				-        Args:
			
 
				-            val: List[{name: str, value: str, url: Union[str, NoneType], domain: Union[str, NoneType], path: Union[str, NoneType], expires: Union[float, NoneType], httpOnly: Union[bool, NoneType], secure: Union[bool, NoneType], sameSite: Union["Lax", "None", "Strict", NoneType]}]
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        if isinstance(val, list):
			
 
				-            self.page.context.add_cookies(val)
			
 
				-        else:
			
 
				-            cookies = []
			
 
				-            for key, value in val.items():
			
 
				-                cookies.append(
			
 
				-                    {"name": key, "value": value, "url": self.url or self.page.url}
			
 
				-                )
			
 
				-            self.page.context.add_cookies(cookies)
			
 
				-
			
 
				-    @property
			
 
				-    def user_agent(self):
			
 
				-        return self.page.evaluate("() => navigator.userAgent")
			
 
				-
			
 
				-    def on_response(self, response: Response):
			
 
				-        for regex in self._url_regexes:
			
 
				-            if re.search(regex, response.request.url):
			
 
				-                intercept_request = InterceptRequest(
			
 
				-                    url=response.request.url,
			
 
				-                    headers=response.request.headers,
			
 
				-                    data=response.request.post_data,
			
 
				-                )
			
 
				-
			
 
				-                intercept_response = InterceptResponse(
			
 
				-                    request=intercept_request,
			
 
				-                    url=response.url,
			
 
				-                    headers=response.headers,
			
 
				-                    content=response.body(),
			
 
				-                    status_code=response.status,
			
 
				-                )
			
 
				-                if self._save_all:
			
 
				-                    self._cache_data[regex].append(intercept_response)
			
 
				-                else:
			
 
				-                    self._cache_data[regex] = intercept_response
			
 
				-
			
 
				-    def get_response(self, url_regex) -> InterceptResponse:
			
 
				-        if self._save_all:
			
 
				-            response_list = self._cache_data.get(url_regex)
			
 
				-            if response_list:
			
 
				-                return response_list[-1]
			
 
				-        return self._cache_data.get(url_regex)
			
 
				-
			
 
				-    def get_all_response(self, url_regex) -> List[InterceptResponse]:
			
 
				-        """
			
 
				-        获取所有匹配的响应, 仅在save_all=True时有效
			
 
				-        Args:
			
 
				-            url_regex:
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        response_list = self._cache_data.get(url_regex, [])
			
 
				-        if not isinstance(response_list, list):
			
 
				-            return [response_list]
			
 
				-        return response_list
			
 
				-
			
 
				-    def get_text(self, url_regex):
			
 
				-        return (
			
 
				-            self.get_response(url_regex).content.decode()
			
 
				-            if self.get_response(url_regex)
			
 
				-            else None
			
 
				-        )
			
 
				-
			
 
				-    def get_all_text(self, url_regex):
			
 
				-        """
			
 
				-        获取所有匹配的响应文本, 仅在save_all=True时有效
			
 
				-        Args:
			
 
				-            url_regex:
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        return [
			
 
				-            response.content.decode() for response in self.get_all_response(url_regex)
			
 
				-        ]
			
 
				-
			
 
				-    def get_json(self, url_regex):
			
 
				-        return (
			
 
				-            json.loads(self.get_text(url_regex))
			
 
				-            if self.get_response(url_regex)
			
 
				-            else None
			
 
				-        )
			
 
				-
			
 
				-    def get_all_json(self, url_regex):
			
 
				-        """
			
 
				-        获取所有匹配的响应json, 仅在save_all=True时有效
			
 
				-        Args:
			
 
				-            url_regex:
			
 
				-
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        return [json.loads(text) for text in self.get_all_text(url_regex)]
			
 
				-
			
 
				-    def clear_cache(self):
			
 
				-        self._cache_data = defaultdict(list)
			
--- a/A数据处理/site_monitor/utils/webdriver/webdirver.py
+++ b/A数据处理/site_monitor/utils/webdriver/webdirver.py
@@ -1,81 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2022/9/7 4:27 PM
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-import abc
			
 
				-
			
 
				-import setting as setting
			
 
				-
			
 
				-
			
 
				-class InterceptRequest:
			
 
				-    def __init__(self, url, data, headers):
			
 
				-        self.url = url
			
 
				-        self.data = data
			
 
				-        self.headers = headers
			
 
				-
			
 
				-
			
 
				-class InterceptResponse:
			
 
				-    def __init__(self, request: InterceptRequest, url, headers, content, status_code):
			
 
				-        self.request = request
			
 
				-        self.url = url
			
 
				-        self.headers = headers
			
 
				-        self.content = content
			
 
				-        self.status_code = status_code
			
 
				-
			
 
				-
			
 
				-class WebDriver:
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        load_images=True,
			
 
				-        user_agent=None,
			
 
				-        proxy=None,
			
 
				-        headless=False,
			
 
				-        driver_type=None,
			
 
				-        timeout=16,
			
 
				-        window_size=(1024, 800),
			
 
				-        executable_path=None,
			
 
				-        custom_argument=None,
			
 
				-        download_path=None,
			
 
				-        auto_install_driver=True,
			
 
				-        use_stealth_js=True,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        """
			
 
				-        webdirver 封装，支持chrome、phantomjs 和 firefox
			
 
				-        Args:
			
 
				-            load_images: 是否加载图片
			
 
				-            user_agent: 字符串 或 无参函数，返回值为user_agent
			
 
				-            proxy: xxx.xxx.xxx.xxx:xxxx 或 无参函数，返回值为代理地址
			
 
				-            headless: 是否启用无头模式
			
 
				-            driver_type: CHROME 或 PHANTOMJS,FIREFOX
			
 
				-            timeout: 请求超时时间
			
 
				-            window_size: # 窗口大小
			
 
				-            executable_path: 浏览器路径，默认为默认路径
			
 
				-            custom_argument: 自定义参数 用于webdriver.Chrome(options=chrome_options, **kwargs)
			
 
				-            download_path: 文件下载保存路径；如果指定，不再出现“保留”“放弃”提示，仅对Chrome有效
			
 
				-            auto_install_driver: 自动下载浏览器驱动 支持chrome 和 firefox
			
 
				-            use_stealth_js: 使用stealth.min.js隐藏浏览器特征
			
 
				-            **kwargs:
			
 
				-        """
			
 
				-        self._load_images = load_images
			
 
				-        self._user_agent = user_agent or setting.DEFAULT_USERAGENT
			
 
				-        self._proxy = proxy
			
 
				-        self._headless = headless
			
 
				-        self._timeout = timeout
			
 
				-        self._window_size = window_size
			
 
				-        self._executable_path = executable_path
			
 
				-        self._custom_argument = custom_argument
			
 
				-        self._download_path = download_path
			
 
				-        self._auto_install_driver = auto_install_driver
			
 
				-        self._use_stealth_js = use_stealth_js
			
 
				-        self._driver_type = driver_type
			
 
				-        self._kwargs = kwargs
			
 
				-
			
 
				-    @abc.abstractmethod
			
 
				-    def quit(self):
			
 
				-        pass
			
--- a/A数据处理/site_monitor/utils/webdriver/webdriver_pool.py
+++ b/A数据处理/site_monitor/utils/webdriver/webdriver_pool.py
@@ -1,115 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-Created on 2021/3/18 4:59 下午
			
 
				----------
			
 
				-@summary:
			
 
				----------
			
 
				-@author: Boris
			
 
				-@email: boris_liu@foxmail.com
			
 
				-"""
			
 
				-
			
 
				-import queue
			
 
				-import threading
			
 
				-
			
 
				-from utils.log import logger as log
			
 
				-from utils.tools import Singleton
			
 
				-from utils.webdriver.playwright_driver import PlaywrightDriver
			
 
				-
			
 
				-
			
 
				-@Singleton
			
 
				-class WebDriverPool:
			
 
				-    def __init__(
			
 
				-        self, pool_size=5, driver_cls=PlaywrightDriver, thread_safe=False, **kwargs
			
 
				-    ):
			
 
				-        """
			
 
				-
			
 
				-        Args:
			
 
				-            pool_size: driver池的大小
			
 
				-            driver: 驱动类型
			
 
				-            thread_safe: 是否线程安全
			
 
				-                是则每个线程拥有一个driver，pool_size无效，driver数量为线程数
			
 
				-                否则每个线程从池中获取driver
			
 
				-            **kwargs:
			
 
				-        """
			
 
				-        self.pool_size = pool_size
			
 
				-        self.driver_cls = driver_cls
			
 
				-        self.thread_safe = thread_safe
			
 
				-        self.kwargs = kwargs
			
 
				-
			
 
				-        self.queue = queue.Queue(maxsize=pool_size)
			
 
				-        self.lock = threading.RLock()
			
 
				-        self.driver_count = 0
			
 
				-        self.ctx = threading.local()
			
 
				-
			
 
				-    @property
			
 
				-    def driver(self):
			
 
				-        if not hasattr(self.ctx, "driver"):
			
 
				-            self.ctx.driver = None
			
 
				-        return self.ctx.driver
			
 
				-
			
 
				-    @driver.setter
			
 
				-    def driver(self, driver):
			
 
				-        self.ctx.driver = driver
			
 
				-
			
 
				-    @property
			
 
				-    def is_full(self):
			
 
				-        return self.driver_count >= self.pool_size
			
 
				-
			
 
				-    def create_driver(self, user_agent: str = None, proxy: str = None):
			
 
				-        kwargs = self.kwargs.copy()
			
 
				-        if user_agent:
			
 
				-            kwargs["user_agent"] = user_agent
			
 
				-        if proxy:
			
 
				-            kwargs["proxy"] = proxy
			
 
				-        return self.driver_cls(**kwargs)
			
 
				-
			
 
				-    def get(self, user_agent: str = None, proxy: str = None):
			
 
				-        """
			
 
				-        获取webdriver
			
 
				-        当webdriver为新实例时会使用 user_agen, proxy, cookie参数来创建
			
 
				-        Args:
			
 
				-            user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
			
 
				-            proxy: xxx.xxx.xxx.xxx
			
 
				-        Returns:
			
 
				-
			
 
				-        """
			
 
				-        if not self.is_full and not self.thread_safe:
			
 
				-            with self.lock:
			
 
				-                if not self.is_full:
			
 
				-                    driver = self.create_driver(user_agent, proxy)
			
 
				-                    self.queue.put(driver)
			
 
				-                    self.driver_count += 1
			
 
				-        elif self.thread_safe:
			
 
				-            if not self.driver:
			
 
				-                driver = self.create_driver(user_agent, proxy)
			
 
				-                self.driver = driver
			
 
				-                self.driver_count += 1
			
 
				-
			
 
				-        if self.thread_safe:
			
 
				-            driver = self.driver
			
 
				-        else:
			
 
				-            driver = self.queue.get()
			
 
				-
			
 
				-        return driver
			
 
				-
			
 
				-    def put(self, driver):
			
 
				-        if not self.thread_safe:
			
 
				-            self.queue.put(driver)
			
 
				-
			
 
				-    def remove(self, driver):
			
 
				-        if self.thread_safe:
			
 
				-            if self.driver:
			
 
				-                self.driver.quit()
			
 
				-                self.driver = None
			
 
				-        else:
			
 
				-            driver.quit()
			
 
				-        self.driver_count -= 1
			
 
				-
			
 
				-    def close(self):
			
 
				-        if self.thread_safe:
			
 
				-            log.info("暂不支持关闭需线程安全的driver")
			
 
				-
			
 
				-        while not self.queue.empty():
			
 
				-            driver = self.queue.get()
			
 
				-            driver.quit()
			
 
				-            self.driver_count -= 1