2 年之前 · 7d8f721e24
--- a/A数据处理/site_monitor/.gitignore
+++ b/A数据处理/site_monitor/.gitignore
@@ -0,0 +1,141 @@
 
															+### Python template
														
 
															+# Byte-compiled / optimized / DLL files
														
 
															+__pycache__/
														
 
															+*.py[cod]
														
 
															+*$py.class
														
 
															+
														
 
															+# C extensions
														
 
															+*.so
														
 
															+
														
 
															+# Distribution / packaging
														
 
															+.Python
														
 
															+build/
														
 
															+develop-eggs/
														
 
															+dist/
														
 
															+downloads/
														
 
															+eggs/
														
 
															+.eggs/
														
 
															+lib/
														
 
															+lib64/
														
 
															+parts/
														
 
															+sdist/
														
 
															+var/
														
 
															+wheels/
														
 
															+share/python-wheels/
														
 
															+*.egg-info/
														
 
															+.installed.cfg
														
 
															+*.egg
														
 
															+MANIFEST
														
 
															+
														
 
															+# PyInstaller
														
 
															+#  Usually these files are written by a python script from a template
														
 
															+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
														
 
															+*.manifest
														
 
															+*.spec
														
 
															+
														
 
															+# Installer logs
														
 
															+pip-log.txt
														
 
															+pip-delete-this-directory.txt
														
 
															+
														
 
															+# Unit test / coverage reports
														
 
															+htmlcov/
														
 
															+.tox/
														
 
															+.nox/
														
 
															+.coverage
														
 
															+.coverage.*
														
 
															+.cache
														
 
															+nosetests.xml
														
 
															+coverage.xml
														
 
															+*.cover
														
 
															+*.py,cover
														
 
															+.hypothesis/
														
 
															+.pytest_cache/
														
 
															+cover/
														
 
															+
														
 
															+# Translations
														
 
															+*.mo
														
 
															+*.pot
														
 
															+
														
 
															+# Django stuff:
														
 
															+*.log
														
 
															+local_settings.py
														
 
															+db.sqlite3
														
 
															+db.sqlite3-journal
														
 
															+
														
 
															+# Flask stuff:
														
 
															+instance/
														
 
															+.webassets-cache
														
 
															+
														
 
															+# Scrapy stuff:
														
 
															+.scrapy
														
 
															+
														
 
															+# Sphinx documentation
														
 
															+docs/_build/
														
 
															+
														
 
															+# PyBuilder
														
 
															+.pybuilder/
														
 
															+target/
														
 
															+
														
 
															+# Jupyter Notebook
														
 
															+.ipynb_checkpoints
														
 
															+
														
 
															+# IPython
														
 
															+profile_default/
														
 
															+ipython_config.py
														
 
															+
														
 
															+# pyenv
														
 
															+#   For a library or package, you might want to ignore these files since the code is
														
 
															+#   intended to run in multiple environments; otherwise, check them in:
														
 
															+# .python-version
														
 
															+
														
 
															+# pipenv
														
 
															+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
														
 
															+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
														
 
															+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
														
 
															+#   install all needed dependencies.
														
 
															+#Pipfile.lock
														
 
															+
														
 
															+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
														
 
															+__pypackages__/
														
 
															+
														
 
															+# Celery stuff
														
 
															+celerybeat-schedule
														
 
															+celerybeat.pid
														
 
															+
														
 
															+# SageMath parsed files
														
 
															+*.sage.py
														
 
															+
														
 
															+# Environments
														
 
															+.env
														
 
															+.venv
														
 
															+env/
														
 
															+venv/
														
 
															+ENV/
														
 
															+env.bak/
														
 
															+venv.bak/
														
 
															+
														
 
															+# Spyder project settings
														
 
															+.spyderproject
														
 
															+.spyproject
														
 
															+
														
 
															+# Rope project settings
														
 
															+.ropeproject
														
 
															+
														
 
															+# mkdocs documentation
														
 
															+/site
														
 
															+
														
 
															+# mypy
														
 
															+.mypy_cache/
														
 
															+.dmypy.json
														
 
															+dmypy.json
														
 
															+
														
 
															+# Pyre type checker
														
 
															+.pyre/
														
 
															+
														
 
															+# pytype static type analyzer
														
 
															+.pytype/
														
 
															+
														
 
															+# Cython debug symbols
														
 
															+cython_debug/
														
 
															+
														
 
															+.idea
														
--- a/A数据处理/site_monitor/README.md
+++ b/A数据处理/site_monitor/README.md
@@ -0,0 +1,19 @@
 
															+# 原网站监控
														
 
															+
														
 
															+#### 构建镜像
														
 
															+```shell
														
 
															+$ cd site_monitor
														
 
															+$ docker build -t site_monitor:v1.0 -f docker/Dockerfile .
														
 
															+```
														
 
															+
														
 
															+#### 创建容器
														
 
															+```shell
														
 
															+$ cd site_monitor
														
 
															+$ docker-compose -f docker/docker-compose.yml up -d
														
 
															+```
														
 
															+
														
 
															+#### 关闭容器
														
 
															+```shell
														
 
															+$ cd site_monitor
														
 
															+$ docker-compose -f docker/docker-compose.yml down
														
 
															+```
														
--- a/A数据处理/site_monitor/db/__init__.py
+++ b/A数据处理/site_monitor/db/__init__.py
@@ -0,0 +1,9 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2020/4/23 12:09 AM
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
--- a/A数据处理/site_monitor/db/mongodb.py
+++ b/A数据处理/site_monitor/db/mongodb.py
@@ -0,0 +1,422 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2021-04-18 14:12:21
														
 
															+---------
														
 
															+@summary: 操作mongo数据库
														
 
															+---------
														
 
															+@author: Mkdir700
														
 
															+@email:  mkdir700@gmail.com
														
 
															+"""
														
 
															+import re
														
 
															+from typing import List, Dict, Optional
														
 
															+from urllib import parse
														
 
															+
														
 
															+import pymongo
														
 
															+from pymongo import MongoClient
														
 
															+from pymongo.collection import Collection
														
 
															+from pymongo.database import Database
														
 
															+from pymongo.errors import DuplicateKeyError, BulkWriteError
														
 
															+
														
 
															+import setting as setting
														
 
															+from utils.log import logger as log
														
 
															+
														
 
															+
														
 
															+class MongoDB:
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        ip=None,
														
 
															+        port=None,
														
 
															+        db=None,
														
 
															+        user_name=None,
														
 
															+        user_pass=None,
														
 
															+        url=None,
														
 
															+        **kwargs,
														
 
															+    ):
														
 
															+        if url:
														
 
															+            self.client = MongoClient(url, **kwargs)
														
 
															+        else:
														
 
															+            if not ip:
														
 
															+                ip = setting.MONGO_IP
														
 
															+            if not port:
														
 
															+                port = setting.MONGO_PORT
														
 
															+            if not db:
														
 
															+                db = setting.MONGO_DB
														
 
															+            if not user_name:
														
 
															+                user_name = setting.MONGO_USER_NAME
														
 
															+            if not user_pass:
														
 
															+                user_pass = setting.MONGO_USER_PASS
														
 
															+            self.client = MongoClient(
														
 
															+                host=ip, port=port, username=user_name, password=user_pass
														
 
															+            )
														
 
															+
														
 
															+        self.db = self.get_database(db)
														
 
															+
														
 
															+        # 缓存索引信息
														
 
															+        self.__index__cached = {}
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_url(cls, url, **kwargs):
														
 
															+        """
														
 
															+        Args:
														
 
															+            url: mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]]
														
 
															+                 参考：http://mongodb.github.io/mongo-java-driver/3.4/javadoc/com/mongodb/MongoClientURI.html
														
 
															+            **kwargs:
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        url_parsed = parse.urlparse(url)
														
 
															+
														
 
															+        db_type = url_parsed.scheme.strip()
														
 
															+        if db_type != "mongodb":
														
 
															+            raise Exception(
														
 
															+                "url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
														
 
															+                    url
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+        return cls(url=url, **kwargs)
														
 
															+
														
 
															+    def get_database(self, database, **kwargs) -> Database:
														
 
															+        """
														
 
															+        获取数据库对象
														
 
															+        @param database: 数据库名
														
 
															+        @return:
														
 
															+        """
														
 
															+        return self.client.get_database(database, **kwargs)
														
 
															+
														
 
															+    def get_collection(self, coll_name, **kwargs) -> Collection:
														
 
															+        """
														
 
															+        根据集合名获取集合对象
														
 
															+        @param coll_name: 集合名
														
 
															+        @return:
														
 
															+        """
														
 
															+        return self.db.get_collection(coll_name, **kwargs)
														
 
															+
														
 
															+    def find(
														
 
															+        self, coll_name: str, condition: Optional[Dict] = None, limit: int = 0, **kwargs
														
 
															+    ) -> List[Dict]:
														
 
															+        """
														
 
															+        @summary:
														
 
															+        无数据： 返回[]
														
 
															+        有数据： [{'_id': 'xx', ...}, ...]
														
 
															+        ---------
														
 
															+        @param coll_name: 集合名(表名)
														
 
															+        @param condition: 查询条件
														
 
															+        @param limit: 结果数量
														
 
															+        @param kwargs:
														
 
															+            更多参数 https://docs.mongodb.com/manual/reference/command/find/#command-fields
														
 
															+
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+        condition = {} if condition is None else condition
														
 
															+        command = {"find": coll_name, "filter": condition, "limit": limit}
														
 
															+        command.update(kwargs)
														
 
															+        result = self.run_command(command)
														
 
															+        cursor = result["cursor"]
														
 
															+        cursor_id = cursor["id"]
														
 
															+        dataset = cursor["firstBatch"]
														
 
															+        while True:
														
 
															+            if cursor_id == 0:
														
 
															+                break
														
 
															+            result = self.run_command(
														
 
															+                {
														
 
															+                    "getMore": cursor_id,
														
 
															+                    "collection": coll_name,
														
 
															+                    "batchSize": kwargs.get("batchSize", 100),
														
 
															+                }
														
 
															+            )
														
 
															+            cursor = result["cursor"]
														
 
															+            cursor_id = cursor["id"]
														
 
															+            dataset.extend(cursor["nextBatch"])
														
 
															+        return dataset
														
 
															+
														
 
															+    def add(
														
 
															+        self,
														
 
															+        coll_name,
														
 
															+        data: Dict,
														
 
															+        replace=False,
														
 
															+        update_columns=(),
														
 
															+        update_columns_value=(),
														
 
															+        insert_ignore=False,
														
 
															+    ):
														
 
															+        """
														
 
															+        添加单条数据
														
 
															+        Args:
														
 
															+            coll_name: 集合名
														
 
															+            data: 单条数据
														
 
															+            replace: 唯一索引冲突时直接覆盖旧数据，默认为False
														
 
															+            update_columns: 更新指定的列（如果数据唯一索引冲突，则更新指定字段，如 update_columns = ["name", "title"]
														
 
															+            update_columns_value: 指定更新的字段对应的值, 不指定则用数据本身的值更新
														
 
															+            insert_ignore: 索引冲突是否忽略 默认False
														
 
															+
														
 
															+        Returns: 插入成功的行数
														
 
															+
														
 
															+        """
														
 
															+        affect_count = 1
														
 
															+        collection = self.get_collection(coll_name)
														
 
															+        try:
														
 
															+            collection.insert_one(data)
														
 
															+        except DuplicateKeyError as e:
														
 
															+            # 存在则更新
														
 
															+            if update_columns:
														
 
															+                if not isinstance(update_columns, (tuple, list)):
														
 
															+                    update_columns = [update_columns]
														
 
															+
														
 
															+                condition = self.__get_update_condition(
														
 
															+                    coll_name, data, e.details.get("errmsg")
														
 
															+                )
														
 
															+
														
 
															+                # 更新指定的列
														
 
															+                if update_columns_value:
														
 
															+                    # 使用指定的值更新
														
 
															+                    doc = {
														
 
															+                        key: value
														
 
															+                        for key, value in zip(update_columns, update_columns_value)
														
 
															+                    }
														
 
															+                else:
														
 
															+                    # 使用数据本身的值更新
														
 
															+                    doc = {key: data[key] for key in update_columns}
														
 
															+
														
 
															+                collection.update_one(condition, {"$set": doc})
														
 
															+
														
 
															+            # 覆盖更新
														
 
															+            elif replace:
														
 
															+                condition = self.__get_update_condition(
														
 
															+                    coll_name, data, e.details.get("errmsg")
														
 
															+                )
														
 
															+                # 替换已存在的数据
														
 
															+                collection.replace_one(condition, data)
														
 
															+
														
 
															+            elif not insert_ignore:
														
 
															+                raise e
														
 
															+
														
 
															+        return affect_count
														
 
															+
														
 
															+    def add_batch(
														
 
															+        self,
														
 
															+        coll_name: str,
														
 
															+        datas: List[Dict],
														
 
															+        replace=False,
														
 
															+        update_columns=(),
														
 
															+        update_columns_value=(),
														
 
															+        condition_fields: dict = None,
														
 
															+    ):
														
 
															+        """
														
 
															+        批量添加数据
														
 
															+        Args:
														
 
															+            coll_name: 集合名
														
 
															+            datas: 数据 [{'_id': 'xx'}, ... ]
														
 
															+            replace:  唯一索引冲突时直接覆盖旧数据，默认为False
														
 
															+            update_columns: 更新指定的列（如果数据的唯一索引存在，则更新指定字段，如 update_columns = ["name", "title"]
														
 
															+            update_columns_value: 指定更新的字段对应的值, 不指定则用数据本身的值更新
														
 
															+            condition_fields: 用于条件查找的字段，不指定则用索引冲突中的字段查找
														
 
															+
														
 
															+        Returns: 添加行数，不包含更新
														
 
															+
														
 
															+        """
														
 
															+        add_count = 0
														
 
															+
														
 
															+        if not datas:
														
 
															+            return add_count
														
 
															+
														
 
															+        collection = self.get_collection(coll_name)
														
 
															+        if not isinstance(update_columns, (tuple, list)):
														
 
															+            update_columns = [update_columns]
														
 
															+
														
 
															+        try:
														
 
															+            add_count = len(datas)
														
 
															+            collection.insert_many(datas, ordered=False)
														
 
															+        except BulkWriteError as e:
														
 
															+            write_errors = e.details.get("writeErrors")
														
 
															+            for error in write_errors:
														
 
															+                if error.get("code") == 11000:
														
 
															+                    # 数据重复
														
 
															+                    # 获取重复的数据
														
 
															+                    data = error.get("op")
														
 
															+
														
 
															+                    def get_condition():
														
 
															+                        # 获取更新条件
														
 
															+                        if condition_fields:
														
 
															+                            condition = {
														
 
															+                                condition_field: data[condition_field]
														
 
															+                                for condition_field in condition_fields
														
 
															+                            }
														
 
															+                        else:
														
 
															+                            # 根据重复的值获取更新条件
														
 
															+                            condition = self.__get_update_condition(
														
 
															+                                coll_name, data, error.get("errmsg")
														
 
															+                            )
														
 
															+
														
 
															+                        return condition
														
 
															+
														
 
															+                    if update_columns:
														
 
															+                        # 更新指定的列
														
 
															+                        if update_columns_value:
														
 
															+                            # 使用指定的值更新
														
 
															+                            doc = {
														
 
															+                                key: value
														
 
															+                                for key, value in zip(
														
 
															+                                    update_columns, update_columns_value
														
 
															+                                )
														
 
															+                            }
														
 
															+                        else:
														
 
															+                            # 使用数据本身的值更新
														
 
															+                            doc = {key: data.get(key) for key in update_columns}
														
 
															+
														
 
															+                        collection.update_one(get_condition(), {"$set": doc})
														
 
															+                        add_count -= 1
														
 
															+
														
 
															+                    elif replace:
														
 
															+                        # 覆盖更新
														
 
															+                        collection.replace_one(get_condition(), data)
														
 
															+                        add_count -= 1
														
 
															+
														
 
															+                    else:
														
 
															+                        # log.error(error)
														
 
															+                        add_count -= 1
														
 
															+
														
 
															+        return add_count
														
 
															+
														
 
															+    def count(self, coll_name, condition: Optional[Dict], limit=0, **kwargs):
														
 
															+        """
														
 
															+        计数
														
 
															+        @param coll_name: 集合名
														
 
															+        @param condition: 查询条件
														
 
															+        @param limit: 限制数量
														
 
															+        @param kwargs:
														
 
															+        ----
														
 
															+        command = {
														
 
															+          count: <collection or view>,
														
 
															+          query: <document>,
														
 
															+          limit: <integer>,
														
 
															+          skip: <integer>,
														
 
															+          hint: <hint>,
														
 
															+          readConcern: <document>,
														
 
															+          collation: <document>,
														
 
															+          comment: <any>
														
 
															+        }
														
 
															+        https://docs.mongodb.com/manual/reference/command/count/#mongodb-dbcommand-dbcmd.count
														
 
															+        @return: 数据数量
														
 
															+        """
														
 
															+        command = {"count": coll_name, "query": condition, "limit": limit, **kwargs}
														
 
															+        result = self.run_command(command)
														
 
															+        return result["n"]
														
 
															+
														
 
															+    def update(self, coll_name, data: Dict, condition: Dict, upsert: bool = False):
														
 
															+        """
														
 
															+        更新
														
 
															+        Args:
														
 
															+            coll_name: 集合名
														
 
															+            data: 单条数据 {"xxx":"xxx"}
														
 
															+            condition: 更新条件 {"_id": "xxxx"}
														
 
															+            upsert: 数据不存在则插入,默认为 False
														
 
															+
														
 
															+        Returns: True / False
														
 
															+        """
														
 
															+        try:
														
 
															+            collection = self.get_collection(coll_name)
														
 
															+            collection.update_one(condition, {"$set": data}, upsert=upsert)
														
 
															+        except Exception as e:
														
 
															+            log.error(
														
 
															+                """
														
 
															+                error:{}
														
 
															+                condition: {}
														
 
															+            """.format(
														
 
															+                    e, condition
														
 
															+                )
														
 
															+            )
														
 
															+            return False
														
 
															+        else:
														
 
															+            return True
														
 
															+
														
 
															+    def delete(self, coll_name, condition: Dict) -> bool:
														
 
															+        """
														
 
															+        删除
														
 
															+        Args:
														
 
															+            coll_name: 集合名
														
 
															+            condition: 查找条件
														
 
															+        Returns: True / False
														
 
															+
														
 
															+        """
														
 
															+        try:
														
 
															+            collection = self.get_collection(coll_name)
														
 
															+            collection.delete_one(condition)
														
 
															+        except Exception as e:
														
 
															+            log.error(
														
 
															+                """
														
 
															+                error:{}
														
 
															+                condition: {}
														
 
															+            """.format(
														
 
															+                    e, condition
														
 
															+                )
														
 
															+            )
														
 
															+            return False
														
 
															+        else:
														
 
															+            return True
														
 
															+
														
 
															+    def run_command(self, command: Dict):
														
 
															+        """
														
 
															+        运行指令
														
 
															+        参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
														
 
															+        @param command:
														
 
															+        @return:
														
 
															+        """
														
 
															+        return self.db.command(command)
														
 
															+
														
 
															+    def create_index(self, coll_name, keys, unique=True):
														
 
															+        collection = self.get_collection(coll_name)
														
 
															+        _keys = [(key, pymongo.ASCENDING) for key in keys]
														
 
															+        collection.create_index(_keys, unique=unique)
														
 
															+
														
 
															+    def get_index(self, coll_name):
														
 
															+        return self.get_collection(coll_name).index_information()
														
 
															+
														
 
															+    def drop_collection(self, coll_name):
														
 
															+        return self.db.drop_collection(coll_name)
														
 
															+
														
 
															+    def get_index_key(self, coll_name, index_name):
														
 
															+        """
														
 
															+        获取参与索引的key
														
 
															+        Args:
														
 
															+            index_name: 索引名
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        cache_key = f"{coll_name}:{index_name}"
														
 
															+
														
 
															+        if cache_key in self.__index__cached:
														
 
															+            return self.__index__cached.get(cache_key)
														
 
															+
														
 
															+        index = self.get_index(coll_name)
														
 
															+        index_detail = index.get(index_name)
														
 
															+        if not index_detail:
														
 
															+            errmsg = f"not found index {index_name} in collection {coll_name}"
														
 
															+            raise Exception(errmsg)
														
 
															+
														
 
															+        index_keys = [val[0] for val in index_detail.get("key")]
														
 
															+        self.__index__cached[cache_key] = index_keys
														
 
															+        return index_keys
														
 
															+
														
 
															+    def __get_update_condition(
														
 
															+        self, coll_name: str, data: dict, duplicate_errmsg: str
														
 
															+    ) -> dict:
														
 
															+        """
														
 
															+        根据索引冲突的报错信息 获取更新条件
														
 
															+        Args:
														
 
															+            duplicate_errmsg: E11000 duplicate key error collection: feapder.test index: a_1_b_1 dup key: { : 1, : "你好" }
														
 
															+            data: {"a": 1, "b": "你好", "c": "嘻嘻"}
														
 
															+
														
 
															+        Returns: {"a": 1, "b": "你好"}
														
 
															+
														
 
															+        """
														
 
															+        index_name = re.search(r"index: (\w+)", duplicate_errmsg).group(1)
														
 
															+        index_keys = self.get_index_key(coll_name, index_name)
														
 
															+
														
 
															+        condition = {key: data.get(key) for key in index_keys}
														
 
															+        return condition
														
 
															+
														
 
															+    def __getattr__(self, name):
														
 
															+        return getattr(self.db, name)
														
--- a/A数据处理/site_monitor/db/redisdb.py
+++ b/A数据处理/site_monitor/db/redisdb.py
@@ -0,0 +1,924 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2016-11-16 16:25
														
 
															+---------
														
 
															+@summary: 操作redis数据库
														
 
															+---------
														
 
															+@author: Boris
														
 
															+"""
														
 
															+
														
 
															+import time
														
 
															+
														
 
															+import redis
														
 
															+from redis._compat import unicode, long, basestring
														
 
															+from redis.connection import Encoder as _Encoder
														
 
															+from redis.exceptions import ConnectionError, TimeoutError
														
 
															+from redis.exceptions import DataError
														
 
															+from redis.sentinel import Sentinel
														
 
															+from rediscluster import RedisCluster
														
 
															+
														
 
															+import setting as setting
														
 
															+from utils.log import logger as log
														
 
															+
														
 
															+
														
 
															+class Encoder(_Encoder):
														
 
															+    def encode(self, value):
														
 
															+        "Return a bytestring or bytes-like representation of the value"
														
 
															+        if isinstance(value, (bytes, memoryview)):
														
 
															+            return value
														
 
															+        # elif isinstance(value, bool):
														
 
															+        #     # special case bool since it is a subclass of int
														
 
															+        #     raise DataError(
														
 
															+        #         "Invalid input of type: 'bool'. Convert to a "
														
 
															+        #         "bytes, string, int or float first."
														
 
															+        #     )
														
 
															+        elif isinstance(value, float):
														
 
															+            value = repr(value).encode()
														
 
															+        elif isinstance(value, (int, long)):
														
 
															+            # python 2 repr() on longs is '123L', so use str() instead
														
 
															+            value = str(value).encode()
														
 
															+        elif isinstance(value, (list, dict, tuple)):
														
 
															+            value = unicode(value)
														
 
															+        elif not isinstance(value, basestring):
														
 
															+            # a value we don't know how to deal with. throw an error
														
 
															+            typename = type(value).__name__
														
 
															+            raise DataError(
														
 
															+                "Invalid input of type: '%s'. Convert to a "
														
 
															+                "bytes, string, int or float first." % typename
														
 
															+            )
														
 
															+        if isinstance(value, unicode):
														
 
															+            value = value.encode(self.encoding, self.encoding_errors)
														
 
															+        return value
														
 
															+
														
 
															+
														
 
															+redis.connection.Encoder = Encoder
														
 
															+
														
 
															+
														
 
															+class RedisDB:
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        ip_ports=None,
														
 
															+        db=None,
														
 
															+        user_pass=None,
														
 
															+        url=None,
														
 
															+        decode_responses=True,
														
 
															+        service_name=None,
														
 
															+        max_connections=1000,
														
 
															+        **kwargs,
														
 
															+    ):
														
 
															+        """
														
 
															+        redis的封装
														
 
															+        Args:
														
 
															+            ip_ports: ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"]
														
 
															+            db:
														
 
															+            user_pass:
														
 
															+            url:
														
 
															+            decode_responses:
														
 
															+            service_name: 适用于redis哨兵模式
														
 
															+            max_connections: 同一个redis对象使用的并发数（连接池的最大连接数），超过这个数量会抛出redis.ConnectionError
														
 
															+        """
														
 
															+
														
 
															+        # 可能会改setting中的值，所以此处不能直接赋值为默认值，需要后加载赋值
														
 
															+        if ip_ports is None:
														
 
															+            ip_ports = setting.REDISDB_IP_PORTS
														
 
															+        if db is None:
														
 
															+            db = setting.REDISDB_DB
														
 
															+        if user_pass is None:
														
 
															+            user_pass = setting.REDISDB_USER_PASS
														
 
															+        if service_name is None:
														
 
															+            service_name = setting.REDISDB_SERVICE_NAME
														
 
															+
														
 
															+        self._is_redis_cluster = False
														
 
															+
														
 
															+        self.__redis = None
														
 
															+        self._url = url
														
 
															+        self._ip_ports = ip_ports
														
 
															+        self._db = db
														
 
															+        self._user_pass = user_pass
														
 
															+        self._decode_responses = decode_responses
														
 
															+        self._service_name = service_name
														
 
															+        self._max_connections = max_connections
														
 
															+        self._kwargs = kwargs
														
 
															+        self.get_connect()
														
 
															+
														
 
															+    def __repr__(self):
														
 
															+        if self._url:
														
 
															+            return "<Redisdb url:{}>".format(self._url)
														
 
															+
														
 
															+        return "<Redisdb ip_ports: {} db:{} user_pass:{}>".format(
														
 
															+            self._ip_ports, self._db, self._user_pass
														
 
															+        )
														
 
															+
														
 
															+    @property
														
 
															+    def _redis(self):
														
 
															+        try:
														
 
															+            if not self.__redis.ping():
														
 
															+                raise ConnectionError("unable to connect to redis")
														
 
															+        except:
														
 
															+            self._reconnect()
														
 
															+
														
 
															+        return self.__redis
														
 
															+
														
 
															+    @_redis.setter
														
 
															+    def _redis(self, val):
														
 
															+        self.__redis = val
														
 
															+
														
 
															+    def get_connect(self):
														
 
															+        # 获取数据库连接
														
 
															+        try:
														
 
															+            if not self._url:
														
 
															+                if not self._ip_ports:
														
 
															+                    raise ConnectionError("未设置 redis 连接信息")
														
 
															+
														
 
															+                ip_ports = (
														
 
															+                    self._ip_ports
														
 
															+                    if isinstance(self._ip_ports, list)
														
 
															+                    else self._ip_ports.split(",")
														
 
															+                )
														
 
															+                if len(ip_ports) > 1:
														
 
															+                    startup_nodes = []
														
 
															+                    for ip_port in ip_ports:
														
 
															+                        ip, port = ip_port.split(":")
														
 
															+                        startup_nodes.append({"host": ip, "port": port})
														
 
															+
														
 
															+                    if self._service_name:
														
 
															+                        # log.debug("使用redis哨兵模式")
														
 
															+                        hosts = [(node["host"], node["port"]) for node in startup_nodes]
														
 
															+                        sentinel = Sentinel(hosts, socket_timeout=3, **self._kwargs)
														
 
															+                        self._redis = sentinel.master_for(
														
 
															+                            self._service_name,
														
 
															+                            password=self._user_pass,
														
 
															+                            db=self._db,
														
 
															+                            redis_class=redis.StrictRedis,
														
 
															+                            decode_responses=self._decode_responses,
														
 
															+                            max_connections=self._max_connections,
														
 
															+                            **self._kwargs,
														
 
															+                        )
														
 
															+
														
 
															+                    else:
														
 
															+                        # log.debug("使用redis集群模式")
														
 
															+                        self._redis = RedisCluster(
														
 
															+                            startup_nodes=startup_nodes,
														
 
															+                            decode_responses=self._decode_responses,
														
 
															+                            password=self._user_pass,
														
 
															+                            max_connections=self._max_connections,
														
 
															+                            **self._kwargs,
														
 
															+                        )
														
 
															+
														
 
															+                    self._is_redis_cluster = True
														
 
															+                else:
														
 
															+                    ip, port = ip_ports[0].split(":")
														
 
															+                    self._redis = redis.StrictRedis(
														
 
															+                        host=ip,
														
 
															+                        port=port,
														
 
															+                        db=self._db,
														
 
															+                        password=self._user_pass,
														
 
															+                        decode_responses=self._decode_responses,
														
 
															+                        max_connections=self._max_connections,
														
 
															+                        **self._kwargs,
														
 
															+                    )
														
 
															+                    self._is_redis_cluster = False
														
 
															+            else:
														
 
															+                self._redis = redis.StrictRedis.from_url(
														
 
															+                    self._url, decode_responses=self._decode_responses
														
 
															+                )
														
 
															+                self._is_redis_cluster = False
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            raise e
														
 
															+
														
 
															+        # 不要写成self._redis.ping() 否则循环调用了
														
 
															+        return self.__redis.ping()
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_url(cls, url):
														
 
															+        """
														
 
															+
														
 
															+        Args:
														
 
															+            url: redis://[[username]:[password]]@[host]:[port]/[db]
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        return cls(url=url)
														
 
															+
														
 
															+    def sadd(self, table, values):
														
 
															+        """
														
 
															+        @summary: 使用无序set集合存储数据， 去重
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param values: 值； 支持list 或 单个值
														
 
															+        ---------
														
 
															+        @result: 若库中存在 返回0，否则入库，返回1。 批量添加返回None
														
 
															+        """
														
 
															+
														
 
															+        if isinstance(values, list):
														
 
															+            pipe = self._redis.pipeline()
														
 
															+
														
 
															+            if not self._is_redis_cluster:
														
 
															+                pipe.multi()
														
 
															+            for value in values:
														
 
															+                pipe.sadd(table, value)
														
 
															+            pipe.execute()
														
 
															+
														
 
															+        else:
														
 
															+            return self._redis.sadd(table, values)
														
 
															+
														
 
															+    def sget(self, table, count=1, is_pop=True):
														
 
															+        """
														
 
															+        返回 list 如 ['1'] 或 []
														
 
															+        @param table:
														
 
															+        @param count:
														
 
															+        @param is_pop:
														
 
															+        @return:
														
 
															+        """
														
 
															+
														
 
															+        datas = []
														
 
															+        if is_pop:
														
 
															+            count = count if count <= self.sget_count(table) else self.sget_count(table)
														
 
															+            if count:
														
 
															+                if count > 1:
														
 
															+                    pipe = self._redis.pipeline()
														
 
															+
														
 
															+                    if not self._is_redis_cluster:
														
 
															+                        pipe.multi()
														
 
															+                    while count:
														
 
															+                        pipe.spop(table)
														
 
															+                        count -= 1
														
 
															+                    datas = pipe.execute()
														
 
															+
														
 
															+                else:
														
 
															+                    datas.append(self._redis.spop(table))
														
 
															+
														
 
															+        else:
														
 
															+            datas = self._redis.srandmember(table, count)
														
 
															+
														
 
															+        return datas
														
 
															+
														
 
															+    def srem(self, table, values):
														
 
															+        """
														
 
															+        @summary: 移除集合中的指定元素
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param values: 一个或者列表
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        if isinstance(values, list):
														
 
															+            pipe = self._redis.pipeline()
														
 
															+
														
 
															+            if not self._is_redis_cluster:
														
 
															+                pipe.multi()
														
 
															+            for value in values:
														
 
															+                pipe.srem(table, value)
														
 
															+            pipe.execute()
														
 
															+        else:
														
 
															+            self._redis.srem(table, values)
														
 
															+
														
 
															+    def sget_count(self, table):
														
 
															+        return self._redis.scard(table)
														
 
															+
														
 
															+    def sdelete(self, table):
														
 
															+        """
														
 
															+        @summary: 删除set集合的大键（数据量大的表）
														
 
															+        删除大set键，使用sscan命令，每次扫描集合中500个元素，再用srem命令每次删除一个键
														
 
															+        若直接用delete命令，会导致Redis阻塞，出现故障切换和应用程序崩溃的故障。
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        # 当 SCAN 命令的游标参数被设置为 0 时， 服务器将开始一次新的迭代， 而当服务器向用户返回值为 0 的游标时， 表示迭代已结束
														
 
															+        cursor = "0"
														
 
															+        while cursor != 0:
														
 
															+            cursor, data = self._redis.sscan(table, cursor=cursor, count=500)
														
 
															+            for item in data:
														
 
															+                # pipe.srem(table, item)
														
 
															+                self._redis.srem(table, item)
														
 
															+
														
 
															+            # pipe.execute()
														
 
															+
														
 
															+    def sismember(self, table, key):
														
 
															+        "Return a boolean indicating if ``value`` is a member of set ``name``"
														
 
															+        return self._redis.sismember(table, key)
														
 
															+
														
 
															+    def zadd(self, table, values, prioritys=0):
														
 
															+        """
														
 
															+        @summary: 使用有序set集合存储数据， 去重(值存在更新)
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param values: 值； 支持list 或 单个值
														
 
															+        @param prioritys: 优先级； double类型，支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值，默认value的优先级为0
														
 
															+        ---------
														
 
															+        @result:若库中存在 返回0，否则入库，返回1。 批量添加返回 [0, 1 ...]
														
 
															+        """
														
 
															+        if isinstance(values, list):
														
 
															+            if not isinstance(prioritys, list):
														
 
															+                prioritys = [prioritys] * len(values)
														
 
															+            else:
														
 
															+                assert len(values) == len(prioritys), "values值要与prioritys值一一对应"
														
 
															+
														
 
															+            pipe = self._redis.pipeline()
														
 
															+
														
 
															+            if not self._is_redis_cluster:
														
 
															+                pipe.multi()
														
 
															+            for value, priority in zip(values, prioritys):
														
 
															+                pipe.execute_command(
														
 
															+                    "ZADD", table, priority, value
														
 
															+                )  # 为了兼容2.x与3.x版本的redis
														
 
															+            return pipe.execute()
														
 
															+
														
 
															+        else:
														
 
															+            return self._redis.execute_command(
														
 
															+                "ZADD", table, prioritys, values
														
 
															+            )  # 为了兼容2.x与3.x版本的redis
														
 
															+
														
 
															+    def zget(self, table, count=1, is_pop=True):
														
 
															+        """
														
 
															+        @summary: 从有序set集合中获取数据 优先返回分数小的（优先级高的）
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param count: 数量 -1 返回全部数据
														
 
															+        @param is_pop:获取数据后，是否在原set集合中删除，默认是
														
 
															+        ---------
														
 
															+        @result: 列表
														
 
															+        """
														
 
															+
														
 
															+        start_pos = 0  # 包含
														
 
															+        end_pos = count - 1 if count > 0 else count
														
 
															+
														
 
															+        pipe = self._redis.pipeline()
														
 
															+
														
 
															+        if not self._is_redis_cluster:
														
 
															+            pipe.multi()  # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html
														
 
															+        pipe.zrange(table, start_pos, end_pos)  # 取值
														
 
															+        if is_pop:
														
 
															+            pipe.zremrangebyrank(table, start_pos, end_pos)  # 删除
														
 
															+        results, *count = pipe.execute()
														
 
															+        return results
														
 
															+
														
 
															+    def zremrangebyscore(self, table, priority_min, priority_max):
														
 
															+        """
														
 
															+        根据分数移除成员 闭区间
														
 
															+        @param table:
														
 
															+        @param priority_min:
														
 
															+        @param priority_max:
														
 
															+        @return: 被移除的成员个数
														
 
															+        """
														
 
															+        return self._redis.zremrangebyscore(table, priority_min, priority_max)
														
 
															+
														
 
															+    def zrangebyscore(self, table, priority_min, priority_max, count=None, is_pop=True):
														
 
															+        """
														
 
															+        @summary: 返回指定分数区间的数据 闭区间
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param priority_min: 优先级越小越优先
														
 
															+        @param priority_max:
														
 
															+        @param count: 获取的数量，为空则表示分数区间内的全部数据
														
 
															+        @param is_pop: 是否删除
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        # 使用lua脚本， 保证操作的原子性
														
 
															+        lua = """
														
 
															+            -- local key = KEYS[1]
														
 
															+            local min_score = ARGV[2]
														
 
															+            local max_score = ARGV[3]
														
 
															+            local is_pop = ARGV[4]
														
 
															+            local count = ARGV[5]
														
 
															+
														
 
															+            -- 取值
														
 
															+            local datas = nil
														
 
															+            if count then
														
 
															+                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'limit', 0, count)
														
 
															+            else
														
 
															+                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score)
														
 
															+            end
														
 
															+
														
 
															+            -- 删除redis中刚取到的值
														
 
															+            if (is_pop=='True' or is_pop=='1') then
														
 
															+                for i=1, #datas do
														
 
															+                    redis.call('zrem', KEYS[1], datas[i])
														
 
															+                end
														
 
															+            end
														
 
															+
														
 
															+
														
 
															+            return datas
														
 
															+
														
 
															+        """
														
 
															+        cmd = self._redis.register_script(lua)
														
 
															+        if count:
														
 
															+            res = cmd(
														
 
															+                keys=[table], args=[table, priority_min, priority_max, is_pop, count]
														
 
															+            )
														
 
															+        else:
														
 
															+            res = cmd(keys=[table], args=[table, priority_min, priority_max, is_pop])
														
 
															+
														
 
															+        return res
														
 
															+
														
 
															+    def zrangebyscore_increase_score(
														
 
															+        self, table, priority_min, priority_max, increase_score, count=None
														
 
															+    ):
														
 
															+        """
														
 
															+        @summary: 返回指定分数区间的数据 闭区间， 同时修改分数
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param priority_min: 最小分数
														
 
															+        @param priority_max: 最大分数
														
 
															+        @param increase_score: 分数值增量 正数则在原有的分数上叠加，负数则相减
														
 
															+        @param count: 获取的数量，为空则表示分数区间内的全部数据
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        # 使用lua脚本， 保证操作的原子性
														
 
															+        lua = """
														
 
															+            -- local key = KEYS[1]
														
 
															+            local min_score = ARGV[1]
														
 
															+            local max_score = ARGV[2]
														
 
															+            local increase_score = ARGV[3]
														
 
															+            local count = ARGV[4]
														
 
															+
														
 
															+            -- 取值
														
 
															+            local datas = nil
														
 
															+            if count then
														
 
															+                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'limit', 0, count)
														
 
															+            else
														
 
															+                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score)
														
 
															+            end
														
 
															+
														
 
															+            --修改优先级
														
 
															+            for i=1, #datas do
														
 
															+                redis.call('zincrby', KEYS[1], increase_score, datas[i])
														
 
															+            end
														
 
															+
														
 
															+            return datas
														
 
															+
														
 
															+        """
														
 
															+        cmd = self._redis.register_script(lua)
														
 
															+        if count:
														
 
															+            res = cmd(
														
 
															+                keys=[table], args=[priority_min, priority_max, increase_score, count]
														
 
															+            )
														
 
															+        else:
														
 
															+            res = cmd(keys=[table], args=[priority_min, priority_max, increase_score])
														
 
															+
														
 
															+        return res
														
 
															+
														
 
															+    def zrangebyscore_set_score(
														
 
															+        self, table, priority_min, priority_max, score, count=None
														
 
															+    ):
														
 
															+        """
														
 
															+        @summary: 返回指定分数区间的数据 闭区间， 同时修改分数
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param priority_min: 最小分数
														
 
															+        @param priority_max: 最大分数
														
 
															+        @param score: 分数值
														
 
															+        @param count: 获取的数量，为空则表示分数区间内的全部数据
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        # 使用lua脚本， 保证操作的原子性
														
 
															+        lua = """
														
 
															+            -- local key = KEYS[1]
														
 
															+            local min_score = ARGV[1]
														
 
															+            local max_score = ARGV[2]
														
 
															+            local set_score = ARGV[3]
														
 
															+            local count = ARGV[4]
														
 
															+
														
 
															+            -- 取值
														
 
															+            local datas = nil
														
 
															+            if count then
														
 
															+                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'withscores','limit', 0, count)
														
 
															+            else
														
 
															+                datas = redis.call('zrangebyscore', KEYS[1], min_score, max_score, 'withscores')
														
 
															+            end
														
 
															+
														
 
															+            local real_datas = {} -- 数据
														
 
															+            --修改优先级
														
 
															+            for i=1, #datas, 2 do
														
 
															+               local data = datas[i]
														
 
															+               local score = datas[i+1]
														
 
															+
														
 
															+               table.insert(real_datas, data) -- 添加数据
														
 
															+
														
 
															+               redis.call('zincrby', KEYS[1], set_score - score, datas[i])
														
 
															+            end
														
 
															+
														
 
															+            return real_datas
														
 
															+
														
 
															+        """
														
 
															+        cmd = self._redis.register_script(lua)
														
 
															+        if count:
														
 
															+            res = cmd(keys=[table], args=[priority_min, priority_max, score, count])
														
 
															+        else:
														
 
															+            res = cmd(keys=[table], args=[priority_min, priority_max, score])
														
 
															+
														
 
															+        return res
														
 
															+
														
 
															+    def zincrby(self, table, amount, value):
														
 
															+        return self._redis.zincrby(table, amount, value)
														
 
															+
														
 
															+    def zget_count(self, table, priority_min=None, priority_max=None):
														
 
															+        """
														
 
															+        @summary: 获取表数据的数量
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param priority_min:优先级范围 最小值（包含）
														
 
															+        @param priority_max:优先级范围 最大值（包含）
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        if priority_min != None and priority_max != None:
														
 
															+            return self._redis.zcount(table, priority_min, priority_max)
														
 
															+        else:
														
 
															+            return self._redis.zcard(table)
														
 
															+
														
 
															+    def zrem(self, table, values):
														
 
															+        """
														
 
															+        @summary: 移除集合中的指定元素
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param values: 一个或者列表
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        if isinstance(values, list):
														
 
															+            self._redis.zrem(table, *values)
														
 
															+        else:
														
 
															+            self._redis.zrem(table, values)
														
 
															+
														
 
															+    def zexists(self, table, values):
														
 
															+        """
														
 
															+        利用zscore判断某元素是否存在
														
 
															+        @param values:
														
 
															+        @return:
														
 
															+        """
														
 
															+
														
 
															+        is_exists = []
														
 
															+
														
 
															+        if isinstance(values, list):
														
 
															+            pipe = self._redis.pipeline()
														
 
															+            pipe.multi()
														
 
															+            for value in values:
														
 
															+                pipe.zscore(table, value)
														
 
															+            is_exists_temp = pipe.execute()
														
 
															+            for is_exist in is_exists_temp:
														
 
															+                if is_exist != None:
														
 
															+                    is_exists.append(1)
														
 
															+                else:
														
 
															+                    is_exists.append(0)
														
 
															+
														
 
															+        else:
														
 
															+            is_exists = self._redis.zscore(table, values)
														
 
															+            is_exists = 1 if is_exists != None else 0
														
 
															+
														
 
															+        return is_exists
														
 
															+
														
 
															+    def lpush(self, table, values):
														
 
															+
														
 
															+        if isinstance(values, list):
														
 
															+            pipe = self._redis.pipeline()
														
 
															+
														
 
															+            if not self._is_redis_cluster:
														
 
															+                pipe.multi()
														
 
															+            for value in values:
														
 
															+                pipe.rpush(table, value)
														
 
															+            pipe.execute()
														
 
															+
														
 
															+        else:
														
 
															+            return self._redis.rpush(table, values)
														
 
															+
														
 
															+    def lpop(self, table, count=1):
														
 
															+        """
														
 
															+        @summary:
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param count:
														
 
															+        ---------
														
 
															+        @result: count>1时返回列表
														
 
															+        """
														
 
															+
														
 
															+        datas = None
														
 
															+        lcount = self.lget_count(table)
														
 
															+        count = count if count <= lcount else lcount
														
 
															+
														
 
															+        if count:
														
 
															+            if count > 1:
														
 
															+                pipe = self._redis.pipeline()
														
 
															+
														
 
															+                if not self._is_redis_cluster:
														
 
															+                    pipe.multi()
														
 
															+                while count:
														
 
															+                    pipe.lpop(table)
														
 
															+                    count -= 1
														
 
															+                datas = pipe.execute()
														
 
															+
														
 
															+            else:
														
 
															+                datas = self._redis.lpop(table)
														
 
															+
														
 
															+        return datas
														
 
															+
														
 
															+    def rpoplpush(self, from_table, to_table=None):
														
 
															+        """
														
 
															+        将列表 from_table 中的最后一个元素(尾元素)弹出，并返回给客户端。
														
 
															+        将 from_table 弹出的元素插入到列表 to_table ，作为 to_table 列表的的头元素。
														
 
															+        如果 from_table 和 to_table 相同，则列表中的表尾元素被移动到表头，并返回该元素，可以把这种特殊情况视作列表的旋转(rotation)操作
														
 
															+        @param from_table:
														
 
															+        @param to_table:
														
 
															+        @return:
														
 
															+        """
														
 
															+
														
 
															+        if not to_table:
														
 
															+            to_table = from_table
														
 
															+
														
 
															+        return self._redis.rpoplpush(from_table, to_table)
														
 
															+
														
 
															+    def lget_count(self, table):
														
 
															+        return self._redis.llen(table)
														
 
															+
														
 
															+    def lrem(self, table, value, num=0):
														
 
															+        """
														
 
															+        @summary:
														
 
															+        删除value
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param value:
														
 
															+        @param num:
														
 
															+        ---------
														
 
															+        @result: 删除的条数
														
 
															+        """
														
 
															+        return self._redis.lrem(table, num, value)
														
 
															+
														
 
															+    def lrange(self, table, start=0, end=-1):
														
 
															+        return self._redis.lrange(table, start, end)
														
 
															+
														
 
															+    def hset(self, table, key, value):
														
 
															+        """
														
 
															+        @summary:
														
 
															+        如果 key 不存在，一个新的哈希表被创建并进行 HSET 操作。
														
 
															+        如果域 field 已经存在于哈希表中，旧值将被覆盖
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param key:
														
 
															+        @param value:
														
 
															+        ---------
														
 
															+        @result: 1 新插入； 0 覆盖
														
 
															+        """
														
 
															+        return self._redis.hset(table, key, value)
														
 
															+
														
 
															+    def hset_batch(self, table, datas):
														
 
															+        """
														
 
															+        批量插入
														
 
															+        Args:
														
 
															+            datas:
														
 
															+                [[key, value]]
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        pipe = self._redis.pipeline()
														
 
															+
														
 
															+        if not self._is_redis_cluster:
														
 
															+            pipe.multi()
														
 
															+        for key, value in datas:
														
 
															+            pipe.hset(table, key, value)
														
 
															+        return pipe.execute()
														
 
															+
														
 
															+    def hincrby(self, table, key, increment):
														
 
															+        return self._redis.hincrby(table, key, increment)
														
 
															+
														
 
															+    def hget(self, table, key, is_pop=False):
														
 
															+        if not is_pop:
														
 
															+            return self._redis.hget(table, key)
														
 
															+        else:
														
 
															+            lua = """
														
 
															+                -- local key = KEYS[1]
														
 
															+                local field = ARGV[1]
														
 
															+
														
 
															+                -- 取值
														
 
															+                local datas = redis.call('hget', KEYS[1], field)
														
 
															+                -- 删除值
														
 
															+                redis.call('hdel', KEYS[1], field)
														
 
															+
														
 
															+                return datas
														
 
															+
														
 
															+                    """
														
 
															+            cmd = self._redis.register_script(lua)
														
 
															+            res = cmd(keys=[table], args=[key])
														
 
															+
														
 
															+            return res
														
 
															+
														
 
															+    def hgetall(self, table):
														
 
															+        return self._redis.hgetall(table)
														
 
															+
														
 
															+    def hexists(self, table, key):
														
 
															+        return self._redis.hexists(table, key)
														
 
															+
														
 
															+    def hdel(self, table, *keys):
														
 
															+        """
														
 
															+        @summary: 删除对应的key 可传多个
														
 
															+        ---------
														
 
															+        @param table:
														
 
															+        @param *keys:
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+        self._redis.hdel(table, *keys)
														
 
															+
														
 
															+    def hget_count(self, table):
														
 
															+        return self._redis.hlen(table)
														
 
															+
														
 
															+    def hkeys(self, table):
														
 
															+        return self._redis.hkeys(table)
														
 
															+
														
 
															+    def setbit(self, table, offsets, values):
														
 
															+        """
														
 
															+        设置字符串数组某一位的值， 返回之前的值
														
 
															+        @param table:
														
 
															+        @param offsets: 支持列表或单个值
														
 
															+        @param values: 支持列表或单个值
														
 
															+        @return: list / 单个值
														
 
															+        """
														
 
															+        if isinstance(offsets, list):
														
 
															+            if not isinstance(values, list):
														
 
															+                values = [values] * len(offsets)
														
 
															+            else:
														
 
															+                assert len(offsets) == len(values), "offsets值要与values值一一对应"
														
 
															+
														
 
															+            pipe = self._redis.pipeline()
														
 
															+            pipe.multi()
														
 
															+
														
 
															+            for offset, value in zip(offsets, values):
														
 
															+                pipe.setbit(table, offset, value)
														
 
															+
														
 
															+            return pipe.execute()
														
 
															+
														
 
															+        else:
														
 
															+            return self._redis.setbit(table, offsets, values)
														
 
															+
														
 
															+    def getbit(self, table, offsets):
														
 
															+        """
														
 
															+        取字符串数组某一位的值
														
 
															+        @param table:
														
 
															+        @param offsets: 支持列表
														
 
															+        @return: list / 单个值
														
 
															+        """
														
 
															+        if isinstance(offsets, list):
														
 
															+            pipe = self._redis.pipeline()
														
 
															+            pipe.multi()
														
 
															+            for offset in offsets:
														
 
															+                pipe.getbit(table, offset)
														
 
															+
														
 
															+            return pipe.execute()
														
 
															+
														
 
															+        else:
														
 
															+            return self._redis.getbit(table, offsets)
														
 
															+
														
 
															+    def bitcount(self, table):
														
 
															+        return self._redis.bitcount(table)
														
 
															+
														
 
															+    def strset(self, table, value, **kwargs):
														
 
															+        return self._redis.set(table, value, **kwargs)
														
 
															+
														
 
															+    def str_incrby(self, table, value):
														
 
															+        return self._redis.incrby(table, value)
														
 
															+
														
 
															+    def strget(self, table):
														
 
															+        return self._redis.get(table)
														
 
															+
														
 
															+    def strlen(self, table):
														
 
															+        return self._redis.strlen(table)
														
 
															+
														
 
															+    def getkeys(self, regex):
														
 
															+        return self._redis.keys(regex)
														
 
															+
														
 
															+    def exists_key(self, key):
														
 
															+        return self._redis.exists(key)
														
 
															+
														
 
															+    def set_expire(self, key, seconds):
														
 
															+        """
														
 
															+        @summary: 设置过期时间
														
 
															+        ---------
														
 
															+        @param key:
														
 
															+        @param seconds: 秒
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+        self._redis.expire(key, seconds)
														
 
															+
														
 
															+    def get_expire(self, key):
														
 
															+        """
														
 
															+        @summary: 查询过期时间
														
 
															+        ---------
														
 
															+        @param key:
														
 
															+        @param seconds: 秒
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+        return self._redis.ttl(key)
														
 
															+
														
 
															+    def clear(self, table):
														
 
															+        try:
														
 
															+            self._redis.delete(table)
														
 
															+        except Exception as e:
														
 
															+            log.error(e)
														
 
															+
														
 
															+    def get_redis_obj(self):
														
 
															+        return self._redis
														
 
															+
														
 
															+    def _reconnect(self):
														
 
															+        # 检测连接状态, 当数据库重启或设置 timeout 导致断开连接时自动重连
														
 
															+        retry_count = 0
														
 
															+        while True:
														
 
															+            try:
														
 
															+                retry_count += 1
														
 
															+                log.error(f"redis 连接断开, 重新连接 {retry_count}")
														
 
															+                if self.get_connect():
														
 
															+                    log.info(f"redis 连接成功")
														
 
															+                    return True
														
 
															+            except (ConnectionError, TimeoutError) as e:
														
 
															+                log.error(f"连接失败 e: {e}")
														
 
															+
														
 
															+            time.sleep(2)
														
 
															+
														
 
															+    def __getattr__(self, name):
														
 
															+        return getattr(self._redis, name)
														
 
															+
														
 
															+    def current_status(self, show_key=True, filter_key_by_used_memory=10 * 1024 * 1024):
														
 
															+        """
														
 
															+        统计redis当前使用情况
														
 
															+        Args:
														
 
															+            show_key: 是否统计每个key的内存
														
 
															+            filter_key_by_used_memory: 根据内存的使用量过滤key 只显示使用量大于指定内存的key
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        from prettytable import PrettyTable
														
 
															+        from tqdm import tqdm
														
 
															+
														
 
															+        status_msg = ""
														
 
															+
														
 
															+        print("正在查询最大连接数...")
														
 
															+        clients_count = self._redis.execute_command("info clients")
														
 
															+        max_clients_count = self._redis.execute_command("config get maxclients")
														
 
															+        status_msg += ": ".join(max_clients_count) + "\n"
														
 
															+        status_msg += clients_count + "\n"
														
 
															+
														
 
															+        print("正在查询整体内存使用情况...")
														
 
															+        total_status = self._redis.execute_command("info memory")
														
 
															+        status_msg += total_status + "\n"
														
 
															+
														
 
															+        if show_key:
														
 
															+            print("正在查询每个key占用内存情况等信息...")
														
 
															+            table = PrettyTable(
														
 
															+                field_names=[
														
 
															+                    "type",
														
 
															+                    "key",
														
 
															+                    "value_count",
														
 
															+                    "used_memory_human",
														
 
															+                    "used_memory",
														
 
															+                ],
														
 
															+                sortby="used_memory",
														
 
															+                reversesort=True,
														
 
															+                header_style="title",
														
 
															+            )
														
 
															+
														
 
															+            keys = self._redis.execute_command("keys *")
														
 
															+            for key in tqdm(keys):
														
 
															+                key_type = self._redis.execute_command("type {}".format(key))
														
 
															+                if key_type == "set":
														
 
															+                    value_count = self._redis.scard(key)
														
 
															+                elif key_type == "zset":
														
 
															+                    value_count = self._redis.zcard(key)
														
 
															+                elif key_type == "list":
														
 
															+                    value_count = self._redis.llen(key)
														
 
															+                elif key_type == "hash":
														
 
															+                    value_count = self._redis.hlen(key)
														
 
															+                elif key_type == "string":
														
 
															+                    value_count = self._redis.strlen(key)
														
 
															+                elif key_type == "none":
														
 
															+                    continue
														
 
															+                else:
														
 
															+                    raise TypeError("尚不支持 {} 类型的key".format(key_type))
														
 
															+
														
 
															+                used_memory = self._redis.execute_command("memory usage {}".format(key))
														
 
															+                if used_memory >= filter_key_by_used_memory:
														
 
															+                    used_memory_human = (
														
 
															+                        "%0.2fMB" % (used_memory / 1024 / 1024) if used_memory else 0
														
 
															+                    )
														
 
															+
														
 
															+                    table.add_row(
														
 
															+                        [key_type, key, value_count, used_memory_human, used_memory]
														
 
															+                    )
														
 
															+
														
 
															+            status_msg += str(table)
														
 
															+
														
 
															+        return status_msg
														
--- a/A数据处理/site_monitor/docker/Dockerfile
+++ b/A数据处理/site_monitor/docker/Dockerfile
@@ -0,0 +1,35 @@
 
															+# 拉取镜像
														
 
															+FROM ubuntu:22.04
														
 
															+
														
 
															+# 配置容器时间
														
 
															+RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone
														
 
															+
														
 
															+# 更新源 - 阿里源
														
 
															+RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
														
 
															+RUN sed -i s@/security.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
														
 
															+RUN apt-get clean && apt-get update
														
 
															+RUN apt-get install -y wget unzip curl vim
														
 
															+
														
 
															+# 安装 python3.8.10 gcc相关配置
														
 
															+WORKDIR /opt
														
 
															+RUN apt-get install -y gcc build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libbz2-dev liblzma-dev sqlite3 libsqlite3-dev tk-dev uuid-dev libgdbm-compat-dev libncurses-dev libnspr4-dev
														
 
															+
														
 
															+# python3.8.10下载与解压缩
														
 
															+RUN curl -o python3.8.10.tgz https://mirrors.huaweicloud.com/python/3.8.10/Python-3.8.10.tgz && tar -zxvf python3.8.10.tgz
														
 
															+# 创建编译安装目录, 配置安装位置
														
 
															+RUN mkdir /usr/local/python38
														
 
															+WORKDIR /opt/Python-3.8.10
														
 
															+RUN ./configure --prefix=/usr/local/python38 && make && make install
														
 
															+# 添加python3的软连接
														
 
															+RUN rm -rf /usr/bin/python3 /usr/bin/pip3 && ln -s /usr/local/python38/bin/python3 /usr/bin/python3 && ln -s /usr/local/python38/bin/pip3.8 /usr/bin/pip3
														
 
															+# 更换pip源&更新pip
														
 
															+RUN pip3 config set global.index-url https://mirrors.bfsu.edu.cn/pypi/web/simple && pip3 install --upgrade pip
														
 
															+
														
 
															+# 安装项目依赖
														
 
															+COPY requirements.txt requirements.txt
														
 
															+RUN pip3 install -r requirements.txt
														
 
															+# 安装playwright - webkit 驱动和依赖
														
 
															+RUN python3 -m playwright install --with-deps webkit
														
 
															+
														
 
															+# 设置工作目录
														
 
															+WORKDIR /mnt
														
--- a/A数据处理/site_monitor/docker/docker-compose.yml
+++ b/A数据处理/site_monitor/docker/docker-compose.yml
@@ -0,0 +1,17 @@
 
															+version: "3"
														
 
															+services: # 一组容器
														
 
															+  worker01:
														
 
															+    container_name: site_monitor
														
 
															+    image: site_monitor:v1.0
														
 
															+    volumes: # 映射文件夹
														
 
															+      - /mnt/site_monitor:/mnt
														
 
															+    network_mode: "host" # 指定网络名称
														
 
															+    restart: always
														
 
															+    privileged: true
														
 
															+    shm_size: 2GB
														
 
															+    logging:
														
 
															+      driver: "json-file"
														
 
															+      options:
														
 
															+        max-size: "200k"
														
 
															+        max-file: "10"
														
 
															+    command: 'python3 /mnt/monitor.py'
														
--- a/A数据处理/site_monitor/monitor.py
+++ b/A数据处理/site_monitor/monitor.py
@@ -0,0 +1,218 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2023-05-10 
														
 
															+---------
														
 
															+@summary:  网站监控
														
 
															+---------
														
 
															+@author: Dzr
														
 
															+"""
														
 
															+import threading
														
 
															+
														
 
															+import bson
														
 
															+import numpy as np
														
 
															+import requests
														
 
															+from playwright._impl._api_types import Error
														
 
															+from requests.exceptions import SSLError
														
 
															+
														
 
															+import utils.tools as tools
														
 
															+from db.mongodb import MongoDB
														
 
															+from network.request import Request
														
 
															+from network.response import Response
														
 
															+from utils.log import logger
														
 
															+
														
 
															+
														
 
															+class MonitorParser(threading.Thread):
														
 
															+
														
 
															+    def __init__(self, mongo_db, coll_name):
														
 
															+        threading.Thread.__init__(self)
														
 
															+        self.mgo_db = mongo_db
														
 
															+        self.coll_name = coll_name
														
 
															+
														
 
															+        self.monitor_api = 'http://cc.spdata.jianyu360.com/crawl/site_monitor/task/fetch'
														
 
															+
														
 
															+    def get_task(self):
														
 
															+        items = {}
														
 
															+        try:
														
 
															+            response = requests.get(self.monitor_api, timeout=5)
														
 
															+            items = response.json()['data']
														
 
															+            if '_id' in items:
														
 
															+                items['_id'] = bson.ObjectId(items['_id'])
														
 
															+        finally:
														
 
															+            return items
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def get_response(url, render=False, **kwargs):
														
 
															+        request = Request(url=url, render=render, **kwargs)
														
 
															+        response = None
														
 
															+        for i in range(3):
														
 
															+            try:
														
 
															+                response = request.get_response()
														
 
															+            except Error as e:
														
 
															+                if 'The certificate for this server is invalid.' in e.message:
														
 
															+                    url = url.replace('https', 'http')
														
 
															+                    request = Request(url=url, render=render, **kwargs)
														
 
															+            except SSLError as e:
														
 
															+                url = url.replace('https', 'http')
														
 
															+                request = Request(url=url, render=True, **kwargs)
														
 
															+            except Exception as e:
														
 
															+                response.reason = e.args
														
 
															+            else:
														
 
															+                if response.status_code != 200:
														
 
															+                    continue
														
 
															+
														
 
															+                if response.text is None:
														
 
															+                    continue
														
 
															+
														
 
															+                if len(response.plain_text) == 0:
														
 
															+                    continue
														
 
															+
														
 
															+                if response.tags()['tags_count'] == 0:
														
 
															+                    continue
														
 
															+
														
 
															+                break
														
 
															+        else:
														
 
															+            response = Response.from_dict({
														
 
															+                "url": url,
														
 
															+                "_content": b"",
														
 
															+                "cookies": {},
														
 
															+                "status_code": getattr(response, 'status_code', -1),
														
 
															+                "elapsed": 666,
														
 
															+                "headers": {}
														
 
															+            })
														
 
															+
														
 
															+        return response
														
 
															+
														
 
															+    def __add_items_to_db(self, task, items):
														
 
															+        result = self.mgo_db.update(
														
 
															+            coll_name=self.coll_name,
														
 
															+            condition={'_id': task['_id']},
														
 
															+            data=items
														
 
															+        )
														
 
															+        print({'_id': task['_id']})
														
 
															+        return result
														
 
															+
														
 
															+    def deal_task(self, task):
														
 
															+        is_first_monitor = False
														
 
															+
														
 
															+        # 网站主页
														
 
															+        host = task['host']
														
 
															+        response = self.get_response(host, render=False, proxies=False)
														
 
															+        host_status_code = response.status_code
														
 
															+
														
 
															+        # 栏目
														
 
															+        url = task['url']
														
 
															+        response = self.get_response(url, render=True, proxies=False)
														
 
															+        channel_status_code = response.status_code
														
 
															+
														
 
															+        # 栏目页面标签
														
 
															+        tags_count = response.tags()['tags_count']
														
 
															+        tags_count_diff = abs(tags_count - task['tags_count'])
														
 
															+        tags_count_diff_lst = list(task['tags_count_diff_lst'])
														
 
															+
														
 
															+        # 栏目是否改版
														
 
															+        channel_ischange = task['channel_ischange']
														
 
															+        if len(tags_count_diff_lst) >= 3 and not channel_ischange:
														
 
															+            mean = np.mean(tags_count_diff_lst)  # 均值
														
 
															+            std = np.std(tags_count_diff_lst, ddof=1)  # 标准差
														
 
															+            std_range = [mean - (2 * std), mean + (2 * std)]
														
 
															+            if tags_count_diff not in std_range:
														
 
															+                channel_ischange = True
														
 
															+
														
 
															+        # 访问频次
														
 
															+        update_dt = tools.timestamp_to_date(task['update_at'], '%Y-%m-%d')
														
 
															+        if tools.get_current_date('%Y-%m-%d') != update_dt:
														
 
															+            is_first_monitor = True
														
 
															+
														
 
															+            channel_visit_count, channel_failure_count = 1, 0
														
 
															+            if channel_status_code != 200:
														
 
															+                channel_failure_count = 1
														
 
															+
														
 
															+            host_visit_count, host_failure_count = 1, 0
														
 
															+            if host_status_code != 200:
														
 
															+                host_failure_count = 1
														
 
															+
														
 
															+            tags_count_diff_lst = []
														
 
															+            tags_count_diff_lst.insert(0, tags_count_diff)
														
 
															+        else:
														
 
															+            channel_visit_count = task['channel_visit_count'] + 1
														
 
															+            channel_failure_count = task['channel_failure_count']
														
 
															+            if channel_status_code != 200:
														
 
															+                channel_failure_count += 1
														
 
															+
														
 
															+            host_visit_count = task['host_visit_count'] + 1
														
 
															+            host_failure_count = task['host_failure_count']
														
 
															+            if host_status_code != 200:
														
 
															+                host_failure_count += 1
														
 
															+
														
 
															+            tags_count_diff_lst.insert(0, tags_count_diff)
														
 
															+
														
 
															+        if is_first_monitor:
														
 
															+            pass
														
 
															+
														
 
															+        items = {
														
 
															+            'tags_count': tags_count,
														
 
															+            'tags_count_diff': tags_count_diff,
														
 
															+            'tags_count_diff_lst': tags_count_diff_lst,
														
 
															+            'channel_ischange': channel_ischange,
														
 
															+            'channel_status_code': channel_status_code,
														
 
															+            'channel_visit_count': channel_visit_count,
														
 
															+            'channel_failure_count': channel_failure_count,
														
 
															+            'host_status_code': host_status_code,
														
 
															+            'host_visit_count': host_visit_count,
														
 
															+            'host_failure_count': host_failure_count,
														
 
															+            'update_at': tools.ensure_int64(tools.get_current_timestamp())
														
 
															+        }
														
 
															+        self.__add_items_to_db(task, items)
														
 
															+
														
 
															+        logger.debug(
														
 
															+            """
														
 
															+                -------------- 处理完成 ----------------
														
 
															+                id  = Object('%s')
														
 
															+                thread = %s
														
 
															+                response = %s
														
 
															+                """
														
 
															+            % (
														
 
															+                str(task['_id']),
														
 
															+                self.getName(),
														
 
															+                response
														
 
															+            )
														
 
															+        )
														
 
															+
														
 
															+    def run(self):
														
 
															+        while True:
														
 
															+            task = self.get_task()
														
 
															+            if not task:
														
 
															+                logger.debug(f"[{self.getName()}]暂无监控任务")
														
 
															+                tools.delay_time(2)
														
 
															+                continue
														
 
															+
														
 
															+            try:
														
 
															+                self.deal_task(task)
														
 
															+            except Exception as e:
														
 
															+                logger.exception(e)
														
 
															+
														
 
															+
														
 
															+class MonitorServer(threading.Thread):
														
 
															+
														
 
															+    def __init__(self, thread_nums=1):
														
 
															+        threading.Thread.__init__(self)
														
 
															+        self.mongo_db = MongoDB()
														
 
															+        self.coll_name = 'site_monitor'
														
 
															+
														
 
															+        self.thread_nums = thread_nums
														
 
															+
														
 
															+        self.parser_control_obj = MonitorParser
														
 
															+        self.parser_controls = []
														
 
															+
														
 
															+    def run(self):
														
 
															+        for _ in range(self.thread_nums):
														
 
															+            parser_control = self.parser_control_obj(
														
 
															+                mongo_db=self.mongo_db,
														
 
															+                coll_name=self.coll_name
														
 
															+            )
														
 
															+            parser_control.start()
														
 
															+            self.parser_controls.append(parser_control)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    MonitorServer(thread_nums=2).start()
														
--- a/A数据处理/site_monitor/network/__init__.py
+++ b/A数据处理/site_monitor/network/__init__.py
@@ -0,0 +1,8 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2023-05-10 
														
 
															+---------
														
 
															+@summary:  
														
 
															+---------
														
 
															+@author: Dzr
														
 
															+"""
														
--- a/A数据处理/site_monitor/network/downloader/__init__.py
+++ b/A数据处理/site_monitor/network/downloader/__init__.py
@@ -0,0 +1,3 @@
 
															+from ._requests import RequestsDownloader
														
 
															+from ._requests import RequestsSessionDownloader
														
 
															+from ._playwright import PlaywrightDownloader
														
--- a/A数据处理/site_monitor/network/downloader/_playwright.py
+++ b/A数据处理/site_monitor/network/downloader/_playwright.py
@@ -0,0 +1,104 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2022/9/7 4:05 PM
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import setting as setting
														
 
															+import utils.tools as tools
														
 
															+from network.downloader.base import RenderDownloader
														
 
															+from network.response import Response
														
 
															+from utils.webdriver import WebDriverPool, PlaywrightDriver
														
 
															+
														
 
															+
														
 
															+class PlaywrightDownloader(RenderDownloader):
														
 
															+    webdriver_pool: WebDriverPool = None
														
 
															+
														
 
															+    @property
														
 
															+    def _webdriver_pool(self):
														
 
															+        if not self.__class__.webdriver_pool:
														
 
															+            self.__class__.webdriver_pool = WebDriverPool(
														
 
															+                **setting.PLAYWRIGHT, driver_cls=PlaywrightDriver, thread_safe=True
														
 
															+            )
														
 
															+
														
 
															+        return self.__class__.webdriver_pool
														
 
															+
														
 
															+    def download(self, request) -> Response:
														
 
															+        # 代理优先级 自定义 > 配置文件 > 随机
														
 
															+        if request.custom_proxies:
														
 
															+            proxy = request.get_proxy()
														
 
															+        elif setting.PLAYWRIGHT.get("proxy"):
														
 
															+            proxy = setting.PLAYWRIGHT.get("proxy")
														
 
															+        else:
														
 
															+            proxy = request.get_proxy()
														
 
															+
														
 
															+        # user_agent优先级 自定义 > 配置文件 > 随机
														
 
															+        if request.custom_ua:
														
 
															+            user_agent = request.get_user_agent()
														
 
															+        elif setting.PLAYWRIGHT.get("user_agent"):
														
 
															+            user_agent = setting.PLAYWRIGHT.get("user_agent")
														
 
															+        else:
														
 
															+            user_agent = request.get_user_agent()
														
 
															+
														
 
															+        cookies = request.get_cookies()
														
 
															+        url = request.url
														
 
															+        render_time = request.render_time or setting.PLAYWRIGHT.get("render_time")
														
 
															+        wait_until = setting.PLAYWRIGHT.get("wait_until") or "domcontentloaded"
														
 
															+        if request.get_params():
														
 
															+            url = tools.joint_url(url, request.get_params())
														
 
															+
														
 
															+        driver: PlaywrightDriver = self._webdriver_pool.get(
														
 
															+            user_agent=user_agent, proxy=proxy
														
 
															+        )
														
 
															+        try:
														
 
															+            if cookies:
														
 
															+                driver.url = url
														
 
															+                driver.cookies = cookies
														
 
															+            driver.page.goto(url, wait_until=wait_until)
														
 
															+
														
 
															+            if render_time:
														
 
															+                tools.delay_time(render_time)
														
 
															+
														
 
															+            html = driver.page.content()
														
 
															+            response = Response.from_dict(
														
 
															+                {
														
 
															+                    "url": driver.page.url,
														
 
															+                    "cookies": driver.cookies,
														
 
															+                    "_content": html.encode(),
														
 
															+                    "status_code": 200,
														
 
															+                    "elapsed": 666,
														
 
															+                    "headers": {
														
 
															+                        "User-Agent": driver.user_agent,
														
 
															+                        "Cookie": tools.cookies2str(driver.cookies),
														
 
															+                    },
														
 
															+                }
														
 
															+            )
														
 
															+
														
 
															+            response.driver = driver
														
 
															+            response.browser = driver
														
 
															+            return response
														
 
															+        except Exception as e:
														
 
															+            self._webdriver_pool.remove(driver)
														
 
															+            raise e
														
 
															+
														
 
															+    def close(self, driver):
														
 
															+        if driver:
														
 
															+            self._webdriver_pool.remove(driver)
														
 
															+
														
 
															+    def put_back(self, driver):
														
 
															+        """
														
 
															+        释放浏览器对象
														
 
															+        """
														
 
															+        self._webdriver_pool.put(driver)
														
 
															+
														
 
															+    def close_all(self):
														
 
															+        """
														
 
															+        关闭所有浏览器
														
 
															+        """
														
 
															+        # 不支持
														
 
															+        # self._webdriver_pool.close()
														
 
															+        pass
														
--- a/A数据处理/site_monitor/network/downloader/_requests.py
+++ b/A数据处理/site_monitor/network/downloader/_requests.py
@@ -0,0 +1,46 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2022/4/10 5:57 下午
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import requests
														
 
															+from requests.adapters import HTTPAdapter
														
 
															+
														
 
															+from network.downloader.base import Downloader
														
 
															+from network.response import Response
														
 
															+
														
 
															+
														
 
															+class RequestsDownloader(Downloader):
														
 
															+    def download(self, request) -> Response:
														
 
															+        response = requests.request(
														
 
															+            request.method, request.url, **request.requests_kwargs
														
 
															+        )
														
 
															+        response = Response(response)
														
 
															+        return response
														
 
															+
														
 
															+
														
 
															+class RequestsSessionDownloader(Downloader):
														
 
															+    session = None
														
 
															+
														
 
															+    @property
														
 
															+    def _session(self):
														
 
															+        if not self.__class__.session:
														
 
															+            self.__class__.session = requests.Session()
														
 
															+            # pool_connections – 缓存的 urllib3 连接池个数  pool_maxsize – 连接池中保存的最大连接数
														
 
															+            http_adapter = HTTPAdapter(pool_connections=1000, pool_maxsize=1000)
														
 
															+            # 任何使用该session会话的 HTTP 请求，只要其 URL 是以给定的前缀开头，该传输适配器就会被使用到。
														
 
															+            self.__class__.session.mount("http", http_adapter)
														
 
															+
														
 
															+        return self.__class__.session
														
 
															+
														
 
															+    def download(self, request) -> Response:
														
 
															+        response = self._session.request(
														
 
															+            request.method, request.url, **request.requests_kwargs
														
 
															+        )
														
 
															+        response = Response(response)
														
 
															+        return response
														
--- a/A数据处理/site_monitor/network/downloader/base.py
+++ b/A数据处理/site_monitor/network/downloader/base.py
@@ -0,0 +1,41 @@
 
															+import abc
														
 
															+from abc import ABC
														
 
															+
														
 
															+from network.response import Response
														
 
															+
														
 
															+
														
 
															+class Downloader:
														
 
															+    @abc.abstractmethod
														
 
															+    def download(self, request) -> Response:
														
 
															+        """
														
 
															+
														
 
															+        Args:
														
 
															+            request: Request
														
 
															+
														
 
															+        Returns: Response
														
 
															+
														
 
															+        """
														
 
															+        raise NotImplementedError
														
 
															+
														
 
															+    def close(self, response: Response):
														
 
															+        pass
														
 
															+
														
 
															+
														
 
															+class RenderDownloader(Downloader, ABC):
														
 
															+    def put_back(self, driver):
														
 
															+        """
														
 
															+        释放浏览器对象
														
 
															+        """
														
 
															+        pass
														
 
															+
														
 
															+    def close(self, driver):
														
 
															+        """
														
 
															+        关闭浏览器
														
 
															+        """
														
 
															+        pass
														
 
															+
														
 
															+    def close_all(self):
														
 
															+        """
														
 
															+        关闭所有浏览器
														
 
															+        """
														
 
															+        pass
														
--- a/A数据处理/site_monitor/network/proxy_file/de9f83d546a39eca6979d2a6dca3407a.txt
+++ b/A数据处理/site_monitor/network/proxy_file/de9f83d546a39eca6979d2a6dca3407a.txt
@@ -0,0 +1,32 @@
 
															+180.105.104.247:8860&&1684743244
														
 
															+115.208.199.134:8860&&1684742848
														
 
															+42.84.93.124:8861&&1684742999
														
 
															+180.127.72.88:8860&&1684743979
														
 
															+144.255.48.89:8860&&1684744166
														
 
															+180.106.242.48:8860&&1684743307
														
 
															+121.207.84.107:8860&&1684742787
														
 
															+180.127.72.79:8860&&1684743262
														
 
															+182.107.181.130:8860&&1684742689
														
 
															+218.67.90.253:8860&&1684743824
														
 
															+59.61.165.88:8860&&1684742786
														
 
															+114.233.0.176:8860&&1684742924
														
 
															+113.93.224.26:8860&&1684743064
														
 
															+123.169.34.24:8860&&1684743176
														
 
															+182.34.27.242:8860&&1684744210
														
 
															+125.69.91.209:8860&&1684743202
														
 
															+36.27.184.4:8860&&1684743545
														
 
															+49.69.209.246:8860&&1684742763
														
 
															+123.146.150.68:8860&&1684742715
														
 
															+114.235.254.245:8860&&1684742840
														
 
															+106.32.10.20:8860&&1684743120
														
 
															+140.250.148.156:8860&&1684742873
														
 
															+180.111.177.16:8860&&1684743024
														
 
															+180.108.151.90:8860&&1684743675
														
 
															+121.238.107.47:8860&&1684742780
														
 
															+123.160.96.180:8860&&1684742820
														
 
															+223.215.119.152:8860&&1684742729
														
 
															+182.34.102.138:8860&&1684743505
														
 
															+59.58.211.240:8860&&1684744113
														
 
															+180.140.47.156:8860&&1684743073
														
 
															+125.123.136.247:8861&&1684743189
														
 
															+49.86.182.103:8860&&1684742719
														
--- a/A数据处理/site_monitor/network/proxy_pool.py
+++ b/A数据处理/site_monitor/network/proxy_pool.py
@@ -0,0 +1,746 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+代理池
														
 
															+"""
														
 
															+import datetime
														
 
															+import json
														
 
															+import os
														
 
															+import random
														
 
															+import socket
														
 
															+import time
														
 
															+from urllib import parse
														
 
															+
														
 
															+import redis
														
 
															+import requests
														
 
															+
														
 
															+import setting
														
 
															+from utils import tools
														
 
															+from utils.log import logger as log
														
 
															+
														
 
															+
														
 
															+def decrypt(input_str: str) -> str:
														
 
															+    """
														
 
															+    改写：新增
														
 
															+    定义base64解密函数
														
 
															+
														
 
															+    :param input_str:
														
 
															+    :return:
														
 
															+    """
														
 
															+    key = "ABNOPqrceQRSTklmUDEFGXYZabnopfghHVWdijstuvwCIJKLMxyz0123456789+/"
														
 
															+    ascii_list = ['{:0>6}'.format(str(bin(key.index(i))).replace('0b', '')) for i in input_str if i != '=']
														
 
															+    output_str = ''
														
 
															+    # 对前面不是“=”的字节取索引，然后转换为2进制
														
 
															+    # 补齐“=”的个数
														
 
															+    equal_num = input_str.count('=')
														
 
															+    while ascii_list:
														
 
															+        temp_list = ascii_list[:4]
														
 
															+        # 转换成2进制字符串
														
 
															+        temp_str = ''.join(temp_list)
														
 
															+        # 对没有8位2进制的字符串补够8位2进制
														
 
															+        if len(temp_str) % 8 != 0:
														
 
															+            temp_str = temp_str[0:-1 * equal_num * 2]
														
 
															+        # 4个6字节的二进制  转换  为三个8字节的二进制
														
 
															+        temp_str_list = [temp_str[x:x + 8] for x in [0, 8, 16]]
														
 
															+        # 二进制转为10进制
														
 
															+        temp_str_list = [int(x, 2) for x in temp_str_list if x]
														
 
															+        # 连接成字符串
														
 
															+        output_str += ''.join([chr(x) for x in temp_str_list])
														
 
															+        ascii_list = ascii_list[4:]
														
 
															+    return output_str
														
 
															+
														
 
															+
														
 
															+# 建立本地缓存代理文件夹
														
 
															+proxy_path = os.path.join(os.path.dirname(__file__), "proxy_file")
														
 
															+if not os.path.exists(proxy_path):
														
 
															+    os.mkdir(proxy_path)
														
 
															+
														
 
															+
														
 
															+def get_proxy_from_url(**kwargs):
														
 
															+    """
														
 
															+    获取指定url的代理
														
 
															+    :param kwargs:
														
 
															+    :return:
														
 
															+    """
														
 
															+    proxy_source_url = kwargs.get("proxy_source_url", [])
														
 
															+    # proxy_source_url = "http://socks.spdata.jianyu360.com/socks/getips?limit=100"
														
 
															+
														
 
															+    if not isinstance(proxy_source_url, list):
														
 
															+        proxy_source_url = [proxy_source_url]
														
 
															+        proxy_source_url = [x for x in proxy_source_url if x]
														
 
															+    if not proxy_source_url:
														
 
															+        raise ValueError("no specify proxy_source_url: {}".format(proxy_source_url))
														
 
															+    kwargs = kwargs.copy()
														
 
															+    kwargs.pop("proxy_source_url")
														
 
															+    proxies_list = []
														
 
															+    for url in proxy_source_url:
														
 
															+        if url.startswith("http"):
														
 
															+            proxies_list.extend(get_proxy_from_http(url, **kwargs))
														
 
															+        elif url.startswith("redis"):
														
 
															+            proxies_list.extend(get_proxy_from_redis(url, **kwargs))
														
 
															+
														
 
															+    if proxies_list:
														
 
															+        # 顺序打乱
														
 
															+        random.shuffle(proxies_list)
														
 
															+    return proxies_list
														
 
															+
														
 
															+
														
 
															+def get_proxy_from_http(proxy_source_url, **kwargs):
														
 
															+    """
														
 
															+    从指定 http 地址获取代理
														
 
															+    :param proxy_source_url:
														
 
															+    :param kwargs:
														
 
															+    :return:
														
 
															+    """
														
 
															+    filename = tools.get_md5(proxy_source_url) + ".txt"
														
 
															+    abs_filename = os.path.join(proxy_path, filename)
														
 
															+    update_interval = kwargs.get("local_proxy_file_cache_timeout", 30)
														
 
															+    update_flag = 0
														
 
															+    if not update_interval:
														
 
															+        # 强制更新
														
 
															+        update_flag = 1
														
 
															+    elif not os.path.exists(abs_filename):
														
 
															+        # 文件不存在则更新
														
 
															+        update_flag = 1
														
 
															+    elif time.time() - os.stat(abs_filename).st_mtime > update_interval:
														
 
															+        # 超过更新间隔
														
 
															+        update_flag = 1
														
 
															+    if update_flag:
														
 
															+        pool = []
														
 
															+        response = requests.get(proxy_source_url, timeout=20)
														
 
															+        # 改写：获取scocks代理的response处理
														
 
															+        for proxy in response.json():
														
 
															+            host = decrypt(proxy['ip'])
														
 
															+            port = proxy['ports'][0]
														
 
															+            endTime = proxy['lifetime']
														
 
															+            pool.append(f"{host}:{port}&&{endTime}")
														
 
															+
														
 
															+        with open(os.path.join(proxy_path, filename), "w") as f:
														
 
															+            f.write('\n'.join(pool))
														
 
															+    return get_proxy_from_file(filename)
														
 
															+
														
 
															+
														
 
															+def get_proxy_from_file(filename, **kwargs):
														
 
															+    """
														
 
															+    从指定本地文件获取代理
														
 
															+        文件格式
														
 
															+        ip:port:https
														
 
															+        ip:port:http
														
 
															+        ip:port
														
 
															+    :param filename:
														
 
															+    :param kwargs:
														
 
															+    :return:
														
 
															+    """
														
 
															+    proxies_list = []
														
 
															+    with open(os.path.join(proxy_path, filename), "r") as f:
														
 
															+        lines = f.readlines()
														
 
															+
														
 
															+    for line in lines:
														
 
															+        line = line.strip()
														
 
															+        if not line:
														
 
															+            continue
														
 
															+        # 解析
														
 
															+        auth = ""
														
 
															+        if "@" in line:
														
 
															+            auth, line = line.split("@")
														
 
															+        # 改写，解析代理有效期结束时间
														
 
															+        line, end = line.split("&&")
														
 
															+
														
 
															+        items = line.split(":")
														
 
															+        if len(items) < 2:
														
 
															+            continue
														
 
															+
														
 
															+        ip, port, *protocol = items
														
 
															+        if not all([port, ip]):
														
 
															+            continue
														
 
															+        if auth:
														
 
															+            ip = "{}@{}".format(auth, ip)
														
 
															+        if not protocol:
														
 
															+            # 改写：判断代理是否在有效期内，并将代理格式重http格式改成socks格式
														
 
															+            if time.time() < int(end):
														
 
															+                proxies = {
														
 
															+                    "https": "socks5://%s:%s" % (ip, port),
														
 
															+                    "http": "socks5://%s:%s" % (ip, port),
														
 
															+                    # "end":end
														
 
															+                }
														
 
															+            else:
														
 
															+                continue
														
 
															+        else:
														
 
															+            proxies = {protocol[0]: "%s://%s:%s" % (protocol[0], ip, port)}
														
 
															+        proxies_list.append(proxies)
														
 
															+
														
 
															+    return proxies_list
														
 
															+
														
 
															+
														
 
															+def get_proxy_from_redis(proxy_source_url, **kwargs):
														
 
															+    """
														
 
															+    从指定 redis 地址获取代理
														
 
															+    @param proxy_source_url: redis://:passwd@host:ip/db
														
 
															+        redis 存储结构 zset
														
 
															+        ip:port ts
														
 
															+    @param kwargs:
														
 
															+        {"redis_proxies_key": "xxx"}
														
 
															+    @return: [{'http':'http://xxx.xxx.xxx:xxx', 'https':'https://xxx.xxx.xxx.xxx:xxx'}]
														
 
															+    """
														
 
															+
														
 
															+    redis_conn = redis.StrictRedis.from_url(proxy_source_url)
														
 
															+    key = kwargs.get("redis_proxies_key")
														
 
															+    assert key, "从redis中获取代理 需要指定 redis_proxies_key"
														
 
															+    proxies = redis_conn.zrange(key, 0, -1)
														
 
															+    proxies_list = []
														
 
															+    for proxy in proxies:
														
 
															+        proxy = proxy.decode()
														
 
															+        proxies_list.append(
														
 
															+            {"https": "https://%s" % proxy, "http": "http://%s" % proxy}
														
 
															+        )
														
 
															+    return proxies_list
														
 
															+
														
 
															+
														
 
															+def check_proxy(
														
 
															+        ip="",
														
 
															+        port="",
														
 
															+        proxies=None,
														
 
															+        type=0,
														
 
															+        timeout=5,
														
 
															+        logger=None,
														
 
															+        show_error_log=True,
														
 
															+        **kwargs,
														
 
															+):
														
 
															+    """
														
 
															+    代理有效性检查
														
 
															+    :param ip:
														
 
															+    :param port:
														
 
															+    :param type: 0:socket  1:requests
														
 
															+    :param timeout:
														
 
															+    :param logger:
														
 
															+    :return:
														
 
															+    """
														
 
															+    if not logger:
														
 
															+        logger = log
														
 
															+    ok = 0
														
 
															+    if type == 0 and ip and port:
														
 
															+        # socket检测成功 不代表代理一定可用 Connection closed by foreign host. 这种情况就不行
														
 
															+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sk:
														
 
															+            sk.settimeout(timeout)
														
 
															+            try:
														
 
															+                # 必须检测 否则代理永远不刷新
														
 
															+                sk.connect((ip, int(port)))
														
 
															+                ok = 1
														
 
															+            except Exception as e:
														
 
															+                if show_error_log:
														
 
															+                    logger.debug("check proxy failed: {} {}:{}".format(e, ip, port))
														
 
															+            sk.close()
														
 
															+    else:
														
 
															+        if not proxies:
														
 
															+            proxies = {
														
 
															+                "http": "socks5://{}:{}".format(ip, port),
														
 
															+                "https": "socks5//{}:{}".format(ip, port),
														
 
															+            }
														
 
															+        try:
														
 
															+            # 改写：代理检测的url
														
 
															+            r = requests.get(
														
 
															+                "https://myip.ipip.net", proxies=proxies, timeout=timeout, stream=True
														
 
															+            )
														
 
															+            ok = 1
														
 
															+            r.close()
														
 
															+        except Exception as e:
														
 
															+            if show_error_log:
														
 
															+                logger.debug(
														
 
															+                    "check proxy failed: {} {}:{} {}".format(e, ip, port, proxies)
														
 
															+                )
														
 
															+    return ok
														
 
															+
														
 
															+
														
 
															+class ProxyItem(object):
														
 
															+    """单个代理对象"""
														
 
															+
														
 
															+    # 代理标记
														
 
															+    proxy_tag_list = (-1, 0, 1)
														
 
															+
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            proxies=None,
														
 
															+            valid_timeout=20,
														
 
															+            check_interval=180,
														
 
															+            max_proxy_use_num=10000,
														
 
															+            delay=30,
														
 
															+            use_interval=None,
														
 
															+            **kwargs,
														
 
															+    ):
														
 
															+        """
														
 
															+        :param proxies:
														
 
															+        :param valid_timeout:  代理检测超时时间 默认-1    20181008  默认不再监测有效性
														
 
															+        :param check_interval:
														
 
															+        :param max_proxy_use_num:
														
 
															+        :param delay:
														
 
															+        :param use_interval: 使用间隔 单位秒 默认不限制
														
 
															+        :param logger: 日志处理器 默认 log.get_logger()
														
 
															+        :param kwargs:
														
 
															+        """
														
 
															+        # {"http": ..., "https": ...}
														
 
															+        self.proxies = proxies
														
 
															+        # 检测超时时间 秒
														
 
															+        self.valid_timeout = valid_timeout
														
 
															+        # 检测间隔 秒
														
 
															+        self.check_interval = check_interval
														
 
															+
														
 
															+        # 标记  0:正常 -1:丢弃  1: 待会再用 ...
														
 
															+        self.flag = 0
														
 
															+        # 上次状态变化时间
														
 
															+        self.flag_ts = 0
														
 
															+        # 上次更新时间 有效时间
														
 
															+        self.update_ts = 0
														
 
															+        # 最大被使用次数
														
 
															+        self.max_proxy_use_num = max_proxy_use_num
														
 
															+        # 被使用次数记录
														
 
															+        self.use_num = 0
														
 
															+        # 延迟使用时间
														
 
															+        self.delay = delay
														
 
															+        # 使用间隔 单位秒
														
 
															+        self.use_interval = use_interval
														
 
															+        # 使用时间
														
 
															+        self.use_ts = 0
														
 
															+
														
 
															+        self.proxy_args = self.parse_proxies(self.proxies)
														
 
															+        self.proxy_ip = self.proxy_args["ip"]
														
 
															+        self.proxy_port = self.proxy_args["port"]
														
 
															+        self.proxy_ip_port = "{}:{}".format(self.proxy_ip, self.proxy_port)
														
 
															+        if self.proxy_args["user"]:
														
 
															+            self.proxy_id = "{user}:{password}@{ip}:{port}".format(**self.proxy_args)
														
 
															+        else:
														
 
															+            self.proxy_id = self.proxy_ip_port
														
 
															+
														
 
															+        # 日志处理器
														
 
															+        self.logger = log
														
 
															+
														
 
															+    def get_proxies(self):
														
 
															+        self.use_num += 1
														
 
															+        return self.proxies
														
 
															+
														
 
															+    def is_delay(self):
														
 
															+        return self.flag == 1
														
 
															+
														
 
															+    def is_valid(self, force=0, type=0):
														
 
															+        """
														
 
															+        检测代理是否有效
														
 
															+            1 有效
														
 
															+            2 延时使用
														
 
															+            0 无效 直接在代理池删除
														
 
															+        :param force:
														
 
															+        :param type:
														
 
															+        :return:
														
 
															+        """
														
 
															+        if self.use_num > self.max_proxy_use_num > 0:
														
 
															+            self.logger.debug("代理达到最大使用次数: {} {}".format(self.use_num, self.proxies))
														
 
															+            return 0
														
 
															+        if self.flag == -1:
														
 
															+            self.logger.debug("代理被标记 -1 丢弃 %s" % self.proxies)
														
 
															+            return 0
														
 
															+        if self.delay > 0 and self.flag == 1:
														
 
															+            if time.time() - self.flag_ts < self.delay:
														
 
															+                self.logger.debug("代理被标记 1 延迟 %s" % self.proxies)
														
 
															+                return 2
														
 
															+            else:
														
 
															+                self.flag = 0
														
 
															+                self.logger.debug("延迟代理释放: {}".format(self.proxies))
														
 
															+        if self.use_interval:
														
 
															+            if time.time() - self.use_ts < self.use_interval:
														
 
															+                return 2
														
 
															+        if not force:
														
 
															+            if time.time() - self.update_ts < self.check_interval:
														
 
															+                return 1
														
 
															+        if self.valid_timeout > 0:
														
 
															+            ok = check_proxy(
														
 
															+                proxies=self.proxies,
														
 
															+                type=type,
														
 
															+                timeout=self.valid_timeout,
														
 
															+                logger=self.logger,
														
 
															+            )
														
 
															+        else:
														
 
															+            ok = 1
														
 
															+        self.update_ts = time.time()
														
 
															+        return ok
														
 
															+
														
 
															+    @classmethod
														
 
															+    def parse_proxies(self, proxies):
														
 
															+        """
														
 
															+        分解代理组成部分
														
 
															+        :param proxies:
														
 
															+        :return:
														
 
															+        """
														
 
															+        if not proxies:
														
 
															+            return {}
														
 
															+        if isinstance(proxies, (str, bytes)):
														
 
															+            proxies = json.loads(proxies)
														
 
															+        protocol = list(proxies.keys())
														
 
															+        if not protocol:
														
 
															+            return {}
														
 
															+        _url = proxies.get(protocol[0])
														
 
															+        # 改写：注释http代理url的拼接，以正常生成代理池
														
 
															+        # if not _url.startswith("http"):
														
 
															+        #     _url = "http://" + _url
														
 
															+        _url_parse = parse.urlparse(_url)
														
 
															+        netloc = _url_parse.netloc
														
 
															+        if "@" in netloc:
														
 
															+            netloc_auth, netloc_host = netloc.split("@")
														
 
															+        else:
														
 
															+            netloc_auth, netloc_host = "", netloc
														
 
															+        ip, *port = netloc_host.split(":")
														
 
															+        port = port[0] if port else "80"
														
 
															+        user, *password = netloc_auth.split(":")
														
 
															+        password = password[0] if password else ""
														
 
															+        return {
														
 
															+            "protocol": protocol,
														
 
															+            "ip": ip,
														
 
															+            "port": port,
														
 
															+            "user": user,
														
 
															+            "password": password,
														
 
															+            "ip_port": "{}:{}".format(ip, port),
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+class ProxyPoolBase(object):
														
 
															+    def __init__(self, *args, **kwargs):
														
 
															+        pass
														
 
															+
														
 
															+    def get(self, *args, **kwargs):
														
 
															+        raise NotImplementedError
														
 
															+
														
 
															+
														
 
															+class ProxyPool(ProxyPoolBase):
														
 
															+    """代理池"""
														
 
															+
														
 
															+    def __init__(self, **kwargs):
														
 
															+        """
														
 
															+        :param size: 代理池大小  -1 为不限制
														
 
															+        :param proxy_source_url: 代理文件地址 支持列表
														
 
															+        :param proxy_instance:  提供代理的实例
														
 
															+        :param reset_interval:  代理池重置间隔 最小间隔
														
 
															+        :param reset_interval_max:  代理池重置间隔 最大间隔 默认2分钟
														
 
															+        :param check_valid: 是否在获取代理时进行检测有效性
														
 
															+        :param local_proxy_file_cache_timeout: 本地缓存的代理文件超时时间
														
 
															+        :param logger: 日志处理器 默认 log.get_logger()
														
 
															+        :param kwargs: 其他的参数
														
 
															+        """
														
 
															+        kwargs.setdefault("size", -1)
														
 
															+        kwargs.setdefault("proxy_source_url", setting.PROXY_EXTRACT_API)
														
 
															+
														
 
															+        super(ProxyPool, self).__init__(**kwargs)
														
 
															+        # 队列最大长度
														
 
															+        self.max_queue_size = kwargs.get("size", -1)
														
 
															+        # 实际代理数量
														
 
															+        self.real_max_proxy_count = 1000
														
 
															+        # 代理可用最大次数
														
 
															+        # 代理获取地址 http://localhost/proxy.txt
														
 
															+        self.proxy_source_url = kwargs.get("proxy_source_url", [])
														
 
															+        if not isinstance(self.proxy_source_url, list):
														
 
															+            self.proxy_source_url = [self.proxy_source_url]
														
 
															+            self.proxy_source_url = [x for x in self.proxy_source_url if x]
														
 
															+            self.proxy_source_url = list(set(self.proxy_source_url))
														
 
															+            kwargs.update({"proxy_source_url": self.proxy_source_url})
														
 
															+        # 处理日志
														
 
															+        self.logger = kwargs.get("logger") or log
														
 
															+        kwargs["logger"] = self.logger
														
 
															+        if not self.proxy_source_url:
														
 
															+            self.logger.warn("need set proxy_source_url or proxy_instance")
														
 
															+
														
 
															+        # 代理池重置间隔
														
 
															+        self.reset_interval = kwargs.get("reset_interval", 5)
														
 
															+        # 强制重置一下代理 添加新的代理进来 防止一直使用旧的被封的代理
														
 
															+        self.reset_interval_max = kwargs.get("reset_interval_max", 180)
														
 
															+        # 是否监测代理有效性
														
 
															+        self.check_valid = kwargs.get("check_valid", True)
														
 
															+
														
 
															+        # 代理队列
														
 
															+        self.proxy_queue = None
														
 
															+        # {代理id: ProxyItem, ...}
														
 
															+        self.proxy_dict = {}
														
 
															+        # 失效代理队列
														
 
															+        self.invalid_proxy_dict = {}
														
 
															+
														
 
															+        self.kwargs = kwargs
														
 
															+
														
 
															+        # 重置代理池锁
														
 
															+        self.reset_lock = None
														
 
															+        # 重置时间
														
 
															+        self.last_reset_time = 0
														
 
															+        # 重置的太快了  计数
														
 
															+        self.reset_fast_count = 0
														
 
															+        # 计数 获取代理重试3次仍然失败 次数
														
 
															+        self.no_valid_proxy_times = 0
														
 
															+
														
 
															+        # 上次获取代理时间
														
 
															+        self.last_get_ts = time.time()
														
 
															+
														
 
															+        # 记录ProxyItem的update_ts 防止由于重置太快导致重复检测有效性
														
 
															+        self.proxy_item_update_ts_dict = {}
														
 
															+
														
 
															+        # 警告
														
 
															+        self.warn_flag = False
														
 
															+
														
 
															+    def warn(self):
														
 
															+        if not self.warn_flag:
														
 
															+            for url in self.proxy_source_url:
														
 
															+                if "zhima" in url:
														
 
															+                    continue
														
 
															+            self.warn_flag = True
														
 
															+        return
														
 
															+
														
 
															+    @property
														
 
															+    def queue_size(self):
														
 
															+        """
														
 
															+        当前代理池中代理数量
														
 
															+        :return:
														
 
															+        """
														
 
															+        return self.proxy_queue.qsize() if self.proxy_queue is not None else 0
														
 
															+
														
 
															+    def clear(self):
														
 
															+        """
														
 
															+        清空自己
														
 
															+        :return:
														
 
															+        """
														
 
															+        self.proxy_queue = None
														
 
															+        # {代理ip: ProxyItem, ...}
														
 
															+        self.proxy_dict = {}
														
 
															+        # 清理失效代理集合
														
 
															+        _limit = datetime.datetime.now() - datetime.timedelta(minutes=10)
														
 
															+        self.invalid_proxy_dict = {
														
 
															+            k: v for k, v in self.invalid_proxy_dict.items() if v > _limit
														
 
															+        }
														
 
															+        # 清理超时的update_ts记录
														
 
															+        _limit = time.time() - 600
														
 
															+        self.proxy_item_update_ts_dict = {
														
 
															+            k: v for k, v in self.proxy_item_update_ts_dict.items() if v > _limit
														
 
															+        }
														
 
															+        return
														
 
															+
														
 
															+    def get(self, retry: int = 0) -> dict:
														
 
															+        """
														
 
															+        从代理池中获取代理
														
 
															+        :param retry:
														
 
															+        :return:
														
 
															+        """
														
 
															+        retry += 1
														
 
															+        if retry > 3:
														
 
															+            self.no_valid_proxy_times += 1
														
 
															+            return None
														
 
															+        # if time.time() - self.last_get_ts > 3 * 60:
														
 
															+        #     # 3分钟没有获取过 重置一下
														
 
															+        #     try:
														
 
															+        #         self.reset_proxy_pool()
														
 
															+        #     except Exception as e:
														
 
															+        #         self.logger.exception(e)
														
 
															+        # 记录获取时间
														
 
															+        self.last_get_ts = time.time()
														
 
															+        #
														
 
															+        self.warn()
														
 
															+        proxy_item = self.get_random_proxy()
														
 
															+        if proxy_item:
														
 
															+            # 不检测
														
 
															+            if not self.check_valid:  #
														
 
															+                # 塞回去
														
 
															+                proxies = proxy_item.get_proxies()
														
 
															+                self.put_proxy_item(proxy_item)
														
 
															+                return proxies
														
 
															+            else:
														
 
															+                is_valid = proxy_item.is_valid()
														
 
															+                if is_valid:
														
 
															+                    # 记录update_ts
														
 
															+                    self.proxy_item_update_ts_dict[
														
 
															+                        proxy_item.proxy_id
														
 
															+                    ] = proxy_item.update_ts
														
 
															+                    # 塞回去
														
 
															+                    proxies = proxy_item.get_proxies()
														
 
															+                    self.put_proxy_item(proxy_item)
														
 
															+                    if is_valid == 1:
														
 
															+                        if proxy_item.use_interval:
														
 
															+                            proxy_item.use_ts = time.time()
														
 
															+                        return proxies
														
 
															+                else:
														
 
															+                    # 处理失效代理
														
 
															+                    self.proxy_dict.pop(proxy_item.proxy_id, "")
														
 
															+                    self.invalid_proxy_dict[
														
 
															+                        proxy_item.proxy_id
														
 
															+                    ] = datetime.datetime.now()
														
 
															+        else:
														
 
															+            try:
														
 
															+                time.sleep(3)
														
 
															+                self.reset_proxy_pool()
														
 
															+            except Exception as e:
														
 
															+                self.logger.exception(e)
														
 
															+        if self.no_valid_proxy_times >= 5:
														
 
															+            # 解决bug: 当爬虫仅剩一个任务时 由于只有一个线程检测代理 而不可用代理又刚好很多（时间越长越多） 可能出现一直获取不到代理的情况
														
 
															+            # 导致爬虫烂尾
														
 
															+            try:
														
 
															+                time.sleep(3)
														
 
															+                self.reset_proxy_pool()
														
 
															+            except Exception as e:
														
 
															+                self.logger.exception(e)
														
 
															+        return self.get(retry)
														
 
															+
														
 
															+    get_proxy = get
														
 
															+
														
 
															+    def get_random_proxy(self) -> ProxyItem:
														
 
															+        """
														
 
															+        随机获取代理
														
 
															+        :return:
														
 
															+        """
														
 
															+        if self.proxy_queue is not None:
														
 
															+            if random.random() < 0.5:
														
 
															+                # 一半概率检查 这是个高频操作 优化一下
														
 
															+                if time.time() - self.last_reset_time > self.reset_interval_max:
														
 
															+                    time.sleep(3)
														
 
															+                    self.reset_proxy_pool(force=True)
														
 
															+                else:
														
 
															+                    min_q_size = (
														
 
															+                        min(self.max_queue_size / 2, self.real_max_proxy_count / 2)
														
 
															+                        if self.max_queue_size > 0
														
 
															+                        else self.real_max_proxy_count / 2
														
 
															+                    )
														
 
															+                    if self.proxy_queue.qsize() < min_q_size:
														
 
															+                        time.sleep(3)
														
 
															+                        self.reset_proxy_pool()
														
 
															+            try:
														
 
															+                return self.proxy_queue.get_nowait()
														
 
															+            except Exception:
														
 
															+                pass
														
 
															+        return None
														
 
															+
														
 
															+    def append_proxies(self, proxies_list: list) -> int:
														
 
															+        """
														
 
															+        添加代理到代理池
														
 
															+        :param proxies_list:
														
 
															+        :return:
														
 
															+        """
														
 
															+        count = 0
														
 
															+        if not isinstance(proxies_list, list):
														
 
															+            proxies_list = [proxies_list]
														
 
															+        for proxies in proxies_list:
														
 
															+            if proxies:
														
 
															+                proxy_item = ProxyItem(proxies=proxies, **self.kwargs)
														
 
															+                # 增加失效判断 2018/12/18
														
 
															+                if proxy_item.proxy_id in self.invalid_proxy_dict:
														
 
															+                    continue
														
 
															+                if proxy_item.proxy_id not in self.proxy_dict:
														
 
															+                    # 补充update_ts
														
 
															+                    if not proxy_item.update_ts:
														
 
															+                        proxy_item.update_ts = self.proxy_item_update_ts_dict.get(
														
 
															+                            proxy_item.proxy_id, 0
														
 
															+                        )
														
 
															+                    self.put_proxy_item(proxy_item)
														
 
															+                    self.proxy_dict[proxy_item.proxy_id] = proxy_item
														
 
															+                    count += 1
														
 
															+        return count
														
 
															+
														
 
															+    def put_proxy_item(self, proxy_item: ProxyItem):
														
 
															+        """
														
 
															+        添加 ProxyItem 到代理池
														
 
															+        :param proxy_item:
														
 
															+        :return:
														
 
															+        """
														
 
															+        return self.proxy_queue.put_nowait(proxy_item)
														
 
															+
														
 
															+    def reset_proxy_pool(self, force: bool = False):
														
 
															+        """
														
 
															+        重置代理池
														
 
															+        :param force: 是否强制重置代理池
														
 
															+        :return:
														
 
															+        """
														
 
															+        if not self.reset_lock:
														
 
															+            # 必须用时调用 否则 可能存在 gevent patch前 threading就已经被导入 导致的Rlock patch失效
														
 
															+            import threading
														
 
															+
														
 
															+            self.reset_lock = threading.RLock()
														
 
															+        with self.reset_lock:
														
 
															+            if (
														
 
															+                    force
														
 
															+                    or self.proxy_queue is None
														
 
															+                    or (
														
 
															+                    self.max_queue_size > 0
														
 
															+                    and self.proxy_queue.qsize() < self.max_queue_size / 2
														
 
															+            )
														
 
															+                    or (
														
 
															+                    self.max_queue_size < 0
														
 
															+                    and self.proxy_queue.qsize() < self.real_max_proxy_count / 2
														
 
															+            )
														
 
															+                    or self.no_valid_proxy_times >= 5
														
 
															+            ):
														
 
															+                if time.time() - self.last_reset_time < self.reset_interval:
														
 
															+                    self.reset_fast_count += 1
														
 
															+                    if self.reset_fast_count % 10 == 0:
														
 
															+                        self.logger.debug(
														
 
															+                            "代理池重置的太快了:) {}".format(self.reset_fast_count)
														
 
															+                        )
														
 
															+                        time.sleep(1)
														
 
															+                else:
														
 
															+                    self.clear()
														
 
															+                    if self.proxy_queue is None:
														
 
															+                        import queue
														
 
															+
														
 
															+                        self.proxy_queue = queue.Queue()
														
 
															+                    # TODO 这里获取到的可能重复
														
 
															+                    proxies_list = get_proxy_from_url(**self.kwargs)
														
 
															+                    self.real_max_proxy_count = len(proxies_list)
														
 
															+                    if 0 < self.max_queue_size < self.real_max_proxy_count:
														
 
															+                        proxies_list = random.sample(proxies_list, self.max_queue_size)
														
 
															+                    _valid_count = self.append_proxies(proxies_list)
														
 
															+                    self.last_reset_time = time.time()
														
 
															+                    self.no_valid_proxy_times = 0
														
 
															+                    self.logger.debug(
														
 
															+                        "重置代理池成功: 获取{}, 成功添加{}, 失效{},  当前代理数{},".format(
														
 
															+                            len(proxies_list),
														
 
															+                            _valid_count,
														
 
															+                            len(self.invalid_proxy_dict),
														
 
															+                            len(self.proxy_dict),
														
 
															+                        )
														
 
															+                    )
														
 
															+        return
														
 
															+
														
 
															+    def tag_proxy(self, proxies_list: list, flag: int, *, delay=30) -> bool:
														
 
															+        """
														
 
															+        对代理进行标记
														
 
															+        :param proxies_list:
														
 
															+        :param flag:
														
 
															+                    -1  废弃
														
 
															+                    1 延迟使用
														
 
															+        :param delay: 延迟时间
														
 
															+        :return:
														
 
															+        """
														
 
															+        if int(flag) not in ProxyItem.proxy_tag_list or not proxies_list:
														
 
															+            return False
														
 
															+        if not isinstance(proxies_list, list):
														
 
															+            proxies_list = [proxies_list]
														
 
															+        for proxies in proxies_list:
														
 
															+            if not proxies:
														
 
															+                continue
														
 
															+            proxy_id = ProxyItem(proxies).proxy_id
														
 
															+            if proxy_id not in self.proxy_dict:
														
 
															+                continue
														
 
															+            self.proxy_dict[proxy_id].flag = flag
														
 
															+            self.proxy_dict[proxy_id].flag_ts = time.time()
														
 
															+            self.proxy_dict[proxy_id].delay = delay
														
 
															+
														
 
															+        return True
														
 
															+
														
 
															+    def get_proxy_item(self, proxy_id="", proxies=None):
														
 
															+        """
														
 
															+        获取代理对象
														
 
															+        :param proxy_id:
														
 
															+        :param proxies:
														
 
															+        :return:
														
 
															+        """
														
 
															+        if proxy_id:
														
 
															+            return self.proxy_dict.get(proxy_id)
														
 
															+        if proxies:
														
 
															+            proxy_id = ProxyItem(proxies).proxy_id
														
 
															+            return self.proxy_dict.get(proxy_id)
														
 
															+        return
														
 
															+
														
 
															+    def copy(self):
														
 
															+        return ProxyPool(**self.kwargs)
														
 
															+
														
 
															+    def all(self) -> list:
														
 
															+        """
														
 
															+        获取当前代理池中的全部代理
														
 
															+        :return:
														
 
															+        """
														
 
															+        return get_proxy_from_url(**self.kwargs)
														
--- a/A数据处理/site_monitor/network/request.py
+++ b/A数据处理/site_monitor/network/request.py
@@ -0,0 +1,524 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2018-07-25 11:49:08
														
 
															+---------
														
 
															+@summary: 请求结构体
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email:  boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import copy
														
 
															+import re
														
 
															+
														
 
															+import requests
														
 
															+from requests.cookies import RequestsCookieJar
														
 
															+from requests.packages.urllib3.exceptions import InsecureRequestWarning
														
 
															+
														
 
															+import setting as setting
														
 
															+import utils.tools as tools
														
 
															+from db.redisdb import RedisDB
														
 
															+from network import user_agent
														
 
															+from network.downloader.base import Downloader, RenderDownloader
														
 
															+from network.proxy_pool import ProxyPool
														
 
															+from network.response import Response
														
 
															+from utils.log import logger as log
														
 
															+
														
 
															+# 屏蔽warning信息
														
 
															+requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
														
 
															+
														
 
															+
														
 
															+class Request:
														
 
															+    user_agent_pool = user_agent
														
 
															+    proxies_pool: ProxyPool = None
														
 
															+
														
 
															+    cache_db = None  # redis / pika
														
 
															+    cached_redis_key = None  # 缓存response的文件文件夹 response_cached:cached_redis_key:md5
														
 
															+    cached_expire_time = 1200  # 缓存过期时间
														
 
															+
														
 
															+    # 下载器
														
 
															+    downloader: Downloader = None
														
 
															+    session_downloader: Downloader = None
														
 
															+    render_downloader: RenderDownloader = None
														
 
															+
														
 
															+    __REQUEST_ATTRS__ = {
														
 
															+        # "method",
														
 
															+        # "url",
														
 
															+        "params",
														
 
															+        "data",
														
 
															+        "headers",
														
 
															+        "cookies",
														
 
															+        "files",
														
 
															+        "auth",
														
 
															+        "timeout",
														
 
															+        "allow_redirects",
														
 
															+        "proxies",
														
 
															+        "hooks",
														
 
															+        "stream",
														
 
															+        "verify",
														
 
															+        "cert",
														
 
															+        "json",
														
 
															+    }
														
 
															+
														
 
															+    _DEFAULT_KEY_VALUE_ = dict(
														
 
															+        url="",
														
 
															+        method=None,
														
 
															+        retry_times=0,
														
 
															+        priority=300,
														
 
															+        parser_name=None,
														
 
															+        callback=None,
														
 
															+        filter_repeat=True,
														
 
															+        auto_request=True,
														
 
															+        request_sync=False,
														
 
															+        use_session=None,
														
 
															+        random_user_agent=True,
														
 
															+        download_midware=None,
														
 
															+        is_abandoned=False,
														
 
															+        render=False,
														
 
															+        render_time=0,
														
 
															+        make_absolute_links=None,
														
 
															+    )
														
 
															+
														
 
															+    _CUSTOM_PROPERTIES_ = {
														
 
															+        "requests_kwargs",
														
 
															+        "custom_ua",
														
 
															+        "custom_proxies",
														
 
															+    }
														
 
															+
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        url="",
														
 
															+        retry_times=0,
														
 
															+        priority=300,
														
 
															+        parser_name=None,
														
 
															+        callback=None,
														
 
															+        filter_repeat=True,
														
 
															+        auto_request=True,
														
 
															+        request_sync=False,
														
 
															+        use_session=None,
														
 
															+        random_user_agent=True,
														
 
															+        download_midware=None,
														
 
															+        is_abandoned=False,
														
 
															+        render=False,
														
 
															+        render_time=0,
														
 
															+        make_absolute_links=None,
														
 
															+        **kwargs,
														
 
															+    ):
														
 
															+        """
														
 
															+        @summary: Request参数
														
 
															+        ---------
														
 
															+        框架参数
														
 
															+        @param url: 待抓取url
														
 
															+        @param retry_times: 当前重试次数
														
 
															+        @param priority: 优先级 越小越优先 默认300
														
 
															+        @param parser_name: 回调函数所在的类名 默认为当前类
														
 
															+        @param callback: 回调函数 可以是函数 也可是函数名（如想跨类回调时，parser_name指定那个类名，callback指定那个类想回调的方法名即可）
														
 
															+        @param filter_repeat: 是否需要去重 (True/False) 当setting中的REQUEST_FILTER_ENABLE设置为True时该参数生效 默认True
														
 
															+        @param auto_request: 是否需要自动请求下载网页 默认是。设置为False时返回的response为空，需要自己去请求网页
														
 
															+        @param request_sync: 是否同步请求下载网页，默认异步。如果该请求url过期时间快，可设置为True，相当于yield的reqeust会立即响应，而不是去排队
														
 
															+        @param use_session: 是否使用session方式
														
 
															+        @param random_user_agent: 是否随机User-Agent (True/False) 当setting中的RANDOM_HEADERS设置为True时该参数生效 默认True
														
 
															+        @param download_midware: 下载中间件。默认为parser中的download_midware
														
 
															+        @param is_abandoned: 当发生异常时是否放弃重试 True/False. 默认False
														
 
															+        @param render: 是否用浏览器渲染
														
 
															+        @param render_time: 渲染时长，即打开网页等待指定时间后再获取源码
														
 
															+        @param make_absolute_links: 是否转成绝对连接，默认是
														
 
															+        --
														
 
															+        以下参数与requests参数使用方式一致
														
 
															+        @param method: 请求方式，如POST或GET，默认根据data值是否为空来判断
														
 
															+        @param params: 请求参数
														
 
															+        @param data: 请求body
														
 
															+        @param json: 请求json字符串，同 json.dumps(data)
														
 
															+        @param headers:
														
 
															+        @param cookies: 字典 或 CookieJar 对象
														
 
															+        @param files:
														
 
															+        @param auth:
														
 
															+        @param timeout: (浮点或元组)等待服务器数据的超时限制，是一个浮点数，或是一个(connect timeout, read timeout) 元组
														
 
															+        @param allow_redirects : Boolean. True 表示允许跟踪 POST/PUT/DELETE 方法的重定向
														
 
															+        @param proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
														
 
															+        @param verify: 为 True 时将会验证 SSL 证书
														
 
															+        @param stream: 如果为 False，将会立即下载响应内容
														
 
															+        @param cert:
														
 
															+        --
														
 
															+        @param **kwargs: 其他值: 如 Request(item=item) 则item可直接用 request.item 取出
														
 
															+        ---------
														
 
															+        @result:
														
 
															+        """
														
 
															+
														
 
															+        self.url = url
														
 
															+        self.method = None
														
 
															+        self.retry_times = retry_times
														
 
															+        self.priority = priority
														
 
															+        self.parser_name = parser_name
														
 
															+        self.callback = callback
														
 
															+        self.filter_repeat = filter_repeat
														
 
															+        self.auto_request = auto_request
														
 
															+        self.request_sync = request_sync
														
 
															+        self.use_session = use_session
														
 
															+        self.random_user_agent = random_user_agent
														
 
															+        self.download_midware = download_midware
														
 
															+        self.is_abandoned = is_abandoned
														
 
															+        self.render = render
														
 
															+        self.render_time = render_time
														
 
															+        self.make_absolute_links = (
														
 
															+            make_absolute_links
														
 
															+            if make_absolute_links is not None
														
 
															+            else setting.MAKE_ABSOLUTE_LINKS
														
 
															+        )
														
 
															+
														
 
															+        # 自定义属性，不参与序列化
														
 
															+        self.requests_kwargs = {}
														
 
															+        for key, value in kwargs.items():
														
 
															+            if key in self.__class__.__REQUEST_ATTRS__:  # 取requests参数
														
 
															+                self.requests_kwargs[key] = value
														
 
															+
														
 
															+            self.__dict__[key] = value
														
 
															+
														
 
															+        self.custom_ua = False
														
 
															+        self.custom_proxies = False
														
 
															+
														
 
															+    def __repr__(self):
														
 
															+        try:
														
 
															+            return "<Request {}>".format(self.url)
														
 
															+        except:
														
 
															+            return "<Request {}>".format(str(self.to_dict)[:40])
														
 
															+
														
 
															+    def __setattr__(self, key, value):
														
 
															+        """
														
 
															+        针对 request.xxx = xxx 的形式，更新reqeust及内部参数值
														
 
															+        @param key:
														
 
															+        @param value:
														
 
															+        @return:
														
 
															+        """
														
 
															+        self.__dict__[key] = value
														
 
															+
														
 
															+        if key in self.__class__.__REQUEST_ATTRS__:
														
 
															+            self.requests_kwargs[key] = value
														
 
															+
														
 
															+    def __lt__(self, other):
														
 
															+        return self.priority < other.priority
														
 
															+
														
 
															+    @property
														
 
															+    def _proxies_pool(self):
														
 
															+        if not self.__class__.proxies_pool:
														
 
															+            self.__class__.proxies_pool = ProxyPool()
														
 
															+
														
 
															+        return self.__class__.proxies_pool
														
 
															+
														
 
															+    @property
														
 
															+    def _downloader(self):
														
 
															+        if not self.__class__.downloader:
														
 
															+            self.__class__.downloader = tools.import_cls(setting.DOWNLOADER)()
														
 
															+
														
 
															+        return self.__class__.downloader
														
 
															+
														
 
															+    @property
														
 
															+    def _session_downloader(self):
														
 
															+        if not self.__class__.session_downloader:
														
 
															+            self.__class__.session_downloader = tools.import_cls(
														
 
															+                setting.SESSION_DOWNLOADER
														
 
															+            )()
														
 
															+
														
 
															+        return self.__class__.session_downloader
														
 
															+
														
 
															+    @property
														
 
															+    def _render_downloader(self):
														
 
															+        if not self.__class__.render_downloader:
														
 
															+            self.__class__.render_downloader = tools.import_cls(
														
 
															+                setting.RENDER_DOWNLOADER
														
 
															+            )()
														
 
															+
														
 
															+        return self.__class__.render_downloader
														
 
															+
														
 
															+    @property
														
 
															+    def to_dict(self):
														
 
															+        request_dict = {}
														
 
															+
														
 
															+        self.callback = (
														
 
															+            getattr(self.callback, "__name__")
														
 
															+            if callable(self.callback)
														
 
															+            else self.callback
														
 
															+        )
														
 
															+
														
 
															+        if isinstance(self.download_midware, (tuple, list)):
														
 
															+            self.download_midware = [
														
 
															+                getattr(download_midware, "__name__")
														
 
															+                if callable(download_midware)
														
 
															+                else download_midware
														
 
															+                for download_midware in self.download_midware
														
 
															+            ]
														
 
															+        else:
														
 
															+            self.download_midware = (
														
 
															+                getattr(self.download_midware, "__name__")
														
 
															+                if callable(self.download_midware)
														
 
															+                else self.download_midware
														
 
															+            )
														
 
															+
														
 
															+        for key, value in self.__dict__.items():
														
 
															+            if (
														
 
															+                key in self.__class__._DEFAULT_KEY_VALUE_
														
 
															+                and self.__class__._DEFAULT_KEY_VALUE_.get(key) == value
														
 
															+                or key in self.__class__._CUSTOM_PROPERTIES_
														
 
															+            ):
														
 
															+                continue
														
 
															+
														
 
															+            if value is not None:
														
 
															+                if key in self.__class__.__REQUEST_ATTRS__:
														
 
															+                    if not isinstance(
														
 
															+                        value, (bytes, bool, float, int, str, tuple, list, dict)
														
 
															+                    ):
														
 
															+                        value = tools.dumps_obj(value)
														
 
															+                else:
														
 
															+                    if not isinstance(value, (bytes, bool, float, int, str)):
														
 
															+                        value = tools.dumps_obj(value)
														
 
															+
														
 
															+            request_dict[key] = value
														
 
															+
														
 
															+        return request_dict
														
 
															+
														
 
															+    @property
														
 
															+    def callback_name(self):
														
 
															+        return (
														
 
															+            getattr(self.callback, "__name__")
														
 
															+            if callable(self.callback)
														
 
															+            else self.callback
														
 
															+        )
														
 
															+
														
 
															+    def make_requests_kwargs(self):
														
 
															+        """
														
 
															+        处理参数
														
 
															+        """
														
 
															+        # 设置超时默认时间
														
 
															+        self.requests_kwargs.setdefault(
														
 
															+            "timeout", setting.REQUEST_TIMEOUT
														
 
															+        )  # connect=22 read=22
														
 
															+
														
 
															+        # 设置stream
														
 
															+        # 默认情况下，当你进行网络请求后，响应体会立即被下载。
														
 
															+        # stream=True是，调用Response.content 才会下载响应体，默认只返回header。
														
 
															+        # 缺点： stream 设为 True，Requests 无法将连接释放回连接池，除非消耗了所有的数据，或者调用了 Response.close。 这样会带来连接效率低下的问题。
														
 
															+        self.requests_kwargs.setdefault("stream", True)
														
 
															+
														
 
															+        # 关闭证书验证
														
 
															+        self.requests_kwargs.setdefault("verify", False)
														
 
															+
														
 
															+        # 设置请求方法
														
 
															+        method = self.__dict__.get("method")
														
 
															+        if not method:
														
 
															+            if "data" in self.requests_kwargs or "json" in self.requests_kwargs:
														
 
															+                method = "POST"
														
 
															+            else:
														
 
															+                method = "GET"
														
 
															+        self.method = method
														
 
															+
														
 
															+        # 设置user—agent
														
 
															+        headers = self.requests_kwargs.get("headers", {})
														
 
															+        if "user-agent" not in headers and "User-Agent" not in headers:
														
 
															+            if self.random_user_agent and setting.RANDOM_HEADERS:
														
 
															+                # 随机user—agent
														
 
															+                ua = self.__class__.user_agent_pool.get(setting.USER_AGENT_TYPE)
														
 
															+                headers.update({"User-Agent": ua})
														
 
															+                self.requests_kwargs.update(headers=headers)
														
 
															+            else:
														
 
															+                # 使用默认的user—agent
														
 
															+                self.requests_kwargs.setdefault(
														
 
															+                    "headers", {"User-Agent": setting.DEFAULT_USERAGENT}
														
 
															+                )
														
 
															+        else:
														
 
															+            self.custom_ua = True
														
 
															+
														
 
															+        # 代理
														
 
															+        proxies = self.requests_kwargs.get("proxies", -1)
														
 
															+        if proxies == -1 and setting.PROXY_ENABLE and setting.PROXY_EXTRACT_API:
														
 
															+            while True:
														
 
															+                proxies = self._proxies_pool.get()
														
 
															+                if proxies:
														
 
															+                    self.requests_kwargs.update(proxies=proxies)
														
 
															+                    break
														
 
															+                else:
														
 
															+                    log.debug("暂无可用代理 ...")
														
 
															+        else:
														
 
															+            self.custom_proxies = True
														
 
															+
														
 
															+    def get_response(self, save_cached=False):
														
 
															+        """
														
 
															+        获取带有selector功能的response
														
 
															+        @param save_cached: 保存缓存 方便调试时不用每次都重新下载
														
 
															+        @return:
														
 
															+        """
														
 
															+        self.make_requests_kwargs()
														
 
															+
														
 
															+        log.debug(
														
 
															+            """
														
 
															+                -------------- %srequest for ----------------
														
 
															+                url  = %s
														
 
															+                method = %s
														
 
															+                args = %s
														
 
															+                """
														
 
															+            % (
														
 
															+                ""
														
 
															+                if not self.parser_name
														
 
															+                else "%s.%s "
														
 
															+                % (
														
 
															+                    self.parser_name,
														
 
															+                    (
														
 
															+                        self.callback
														
 
															+                        and callable(self.callback)
														
 
															+                        and getattr(self.callback, "__name__")
														
 
															+                        or self.callback
														
 
															+                    )
														
 
															+                    or "parse",
														
 
															+                ),
														
 
															+                self.url,
														
 
															+                self.method,
														
 
															+                self.requests_kwargs,
														
 
															+            )
														
 
															+        )
														
 
															+
														
 
															+        # def hooks(response, *args, **kwargs):
														
 
															+        #     print(response.url)
														
 
															+        #
														
 
															+        # self.requests_kwargs.update(hooks={'response': hooks})
														
 
															+
														
 
															+        # self.use_session 优先级高
														
 
															+        use_session = (
														
 
															+            setting.USE_SESSION if self.use_session is None else self.use_session
														
 
															+        )
														
 
															+
														
 
															+        if self.render:
														
 
															+            response = self._render_downloader.download(self)
														
 
															+        elif use_session:
														
 
															+            response = self._session_downloader.download(self)
														
 
															+        else:
														
 
															+            response = self._downloader.download(self)
														
 
															+
														
 
															+        response.make_absolute_links = self.make_absolute_links
														
 
															+
														
 
															+        if save_cached:
														
 
															+            self.save_cached(response, expire_time=self.__class__.cached_expire_time)
														
 
															+
														
 
															+        return response
														
 
															+
														
 
															+    def get_params(self):
														
 
															+        return self.requests_kwargs.get("params")
														
 
															+
														
 
															+    def get_proxies(self) -> dict:
														
 
															+        """
														
 
															+
														
 
															+        Returns: {"https": "https://ip:port", "http": "http://ip:port"}
														
 
															+
														
 
															+        """
														
 
															+        return self.requests_kwargs.get("proxies")
														
 
															+
														
 
															+    def get_proxy(self) -> str:
														
 
															+        """
														
 
															+
														
 
															+        Returns: ip:port
														
 
															+
														
 
															+        """
														
 
															+        proxies = self.get_proxies()
														
 
															+        if proxies:
														
 
															+            return re.sub(
														
 
															+                "http.*?//", "", proxies.get("http", "") or proxies.get("https", "")
														
 
															+            )
														
 
															+
														
 
															+    def get_headers(self) -> dict:
														
 
															+        return self.requests_kwargs.get("headers", {})
														
 
															+
														
 
															+    def get_user_agent(self) -> str:
														
 
															+        return self.get_headers().get("user_agent") or self.get_headers().get(
														
 
															+            "User-Agent"
														
 
															+        )
														
 
															+
														
 
															+    def get_cookies(self) -> dict:
														
 
															+        cookies = self.requests_kwargs.get("cookies")
														
 
															+        if cookies and isinstance(cookies, RequestsCookieJar):
														
 
															+            cookies = cookies.get_dict()
														
 
															+
														
 
															+        if not cookies:
														
 
															+            cookie_str = self.get_headers().get("Cookie") or self.get_headers().get(
														
 
															+                "cookie"
														
 
															+            )
														
 
															+            if cookie_str:
														
 
															+                cookies = tools.get_cookies_from_str(cookie_str)
														
 
															+        return cookies
														
 
															+
														
 
															+    @property
														
 
															+    def fingerprint(self):
														
 
															+        """
														
 
															+        request唯一表识
														
 
															+        @return:
														
 
															+        """
														
 
															+        url = self.__dict__.get("url", "")
														
 
															+        # url 归一化
														
 
															+        url = tools.canonicalize_url(url)
														
 
															+        args = [url]
														
 
															+
														
 
															+        for arg in ["params", "data", "files", "auth", "cert", "json"]:
														
 
															+            if self.requests_kwargs.get(arg):
														
 
															+                args.append(self.requests_kwargs.get(arg))
														
 
															+
														
 
															+        return tools.get_md5(*args)
														
 
															+
														
 
															+    @property
														
 
															+    def _cache_db(self):
														
 
															+        if not self.__class__.cache_db:
														
 
															+            self.__class__.cache_db = RedisDB()  # .from_url(setting.pika_spider_1_uri)
														
 
															+
														
 
															+        return self.__class__.cache_db
														
 
															+
														
 
															+    @property
														
 
															+    def _cached_redis_key(self):
														
 
															+        if self.__class__.cached_redis_key:
														
 
															+            return (
														
 
															+                f"response_cached:{self.__class__.cached_redis_key}:{self.fingerprint}"
														
 
															+            )
														
 
															+        else:
														
 
															+            return f"response_cached:test:{self.fingerprint}"
														
 
															+
														
 
															+    def save_cached(self, response, expire_time=1200):
														
 
															+        """
														
 
															+        使用redis保存response 用于调试 不用每回都下载
														
 
															+        @param response:
														
 
															+        @param expire_time: 过期时间
														
 
															+        @return:
														
 
															+        """
														
 
															+
														
 
															+        self._cache_db.strset(self._cached_redis_key, response.to_dict, ex=expire_time)
														
 
															+
														
 
															+    def get_response_from_cached(self, save_cached=True):
														
 
															+        """
														
 
															+        从缓存中获取response
														
 
															+        注意：
														
 
															+            属性值为空：
														
 
															+                -raw ： urllib3.response.HTTPResponse
														
 
															+                -connection：requests.adapters.HTTPAdapter
														
 
															+                -history
														
 
															+
														
 
															+            属性含义改变：
														
 
															+                - request 由requests 改为Request
														
 
															+        @param: save_cached 当无缓存 直接下载 下载完是否保存缓存
														
 
															+        @return:
														
 
															+        """
														
 
															+        response_dict = self._cache_db.strget(self._cached_redis_key)
														
 
															+        if not response_dict:
														
 
															+            log.info("无response缓存  重新下载")
														
 
															+            response_obj = self.get_response(save_cached=save_cached)
														
 
															+        else:
														
 
															+            response_dict = eval(response_dict)
														
 
															+            response_obj = Response.from_dict(response_dict)
														
 
															+        return response_obj
														
 
															+
														
 
															+    def del_response_cached(self):
														
 
															+        self._cache_db.clear(self._cached_redis_key)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_dict(cls, request_dict):
														
 
															+        for key, value in request_dict.items():
														
 
															+            if isinstance(value, bytes):  # 反序列化 如item
														
 
															+                request_dict[key] = tools.loads_obj(value)
														
 
															+
														
 
															+        return cls(**request_dict)
														
 
															+
														
 
															+    def copy(self):
														
 
															+        return self.__class__.from_dict(copy.deepcopy(self.to_dict))
														
--- a/A数据处理/site_monitor/network/response.py
+++ b/A数据处理/site_monitor/network/response.py
@@ -0,0 +1,396 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2023-05-10 
														
 
															+---------
														
 
															+@summary:  
														
 
															+---------
														
 
															+@author: Dzr
														
 
															+"""
														
 
															+import copy
														
 
															+import datetime
														
 
															+import os
														
 
															+import re
														
 
															+import time
														
 
															+from urllib.parse import urlparse, urlunparse, urljoin
														
 
															+
														
 
															+from bs4 import BeautifulSoup
														
 
															+from bs4.dammit import UnicodeDammit
														
 
															+from lxml.html import fromstring, HtmlElement
														
 
															+from lxml.html.clean import Cleaner
														
 
															+from parsel import Selector
														
 
															+from requests.cookies import RequestsCookieJar
														
 
															+from requests.models import Response as res
														
 
															+from w3lib.encoding import (
														
 
															+    http_content_type_encoding,
														
 
															+    html_body_declared_encoding
														
 
															+)
														
 
															+
														
 
															+from utils.log import logger as log
														
 
															+
														
 
															+FAIL_ENCODING = "ISO-8859-1"
														
 
															+
														
 
															+# html 源码中的特殊字符，需要删掉，否则会影响etree的构建
														
 
															+SPECIAL_CHARACTERS = [
														
 
															+    # 移除控制字符 全部字符列表 https://zh.wikipedia.org/wiki/%E6%8E%A7%E5%88%B6%E5%AD%97%E7%AC%A6
														
 
															+    "[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]"
														
 
															+]
														
 
															+
														
 
															+SPECIAL_CHARACTER_PATTERNS = [
														
 
															+    re.compile(special_character) for special_character in SPECIAL_CHARACTERS
														
 
															+]
														
 
															+
														
 
															+
														
 
															+def iter_node(element: HtmlElement):
														
 
															+    yield element
														
 
															+    for sub_element in element:
														
 
															+        if isinstance(sub_element, HtmlElement):
														
 
															+            yield from iter_node(sub_element)
														
 
															+
														
 
															+
														
 
															+class Response(res):
														
 
															+    def __init__(self, response):
														
 
															+        super(Response, self).__init__()
														
 
															+        self.__dict__.update(response.__dict__)
														
 
															+
														
 
															+        self._cached_selector = None
														
 
															+        self._cached_text = None
														
 
															+        self._cached_json = None
														
 
															+
														
 
															+        self._encoding = None
														
 
															+
														
 
															+        self.encoding_errors = "strict"  # strict / replace / ignore
														
 
															+
														
 
															+    @classmethod
														
 
															+    def from_dict(cls, response_dict):
														
 
															+        """
														
 
															+        利用字典获取Response对象
														
 
															+        @param response_dict: 原生的response.__dict__
														
 
															+        @return:
														
 
															+        """
														
 
															+        cookie_jar = RequestsCookieJar()
														
 
															+        cookie_jar.update(other=response_dict["cookies"])
														
 
															+        response_dict["cookies"] = cookie_jar
														
 
															+
														
 
															+        response_dict["elapsed"] = datetime.timedelta(
														
 
															+            0, 0, response_dict["elapsed"]
														
 
															+        )  # 耗时
														
 
															+        response_dict["connection"] = None
														
 
															+        response_dict["_content_consumed"] = True
														
 
															+
														
 
															+        response = res()
														
 
															+        response.__dict__.update(response_dict)
														
 
															+        return cls(response)
														
 
															+
														
 
															+    @property
														
 
															+    def to_dict(self):
														
 
															+        response_dict = {
														
 
															+            "_content": self.content,
														
 
															+            "cookies": self.cookies.get_dict(),
														
 
															+            "encoding": self.encoding,
														
 
															+            "headers": self.headers,
														
 
															+            "status_code": self.status_code,
														
 
															+            "elapsed": self.elapsed.microseconds,  # 耗时
														
 
															+            "url": self.url,
														
 
															+        }
														
 
															+
														
 
															+        return response_dict
														
 
															+
														
 
															+    def __clear_cache(self):
														
 
															+        self.__dict__["_cached_selector"] = None
														
 
															+        self.__dict__["_cached_text"] = None
														
 
															+        self.__dict__["_cached_json"] = None
														
 
															+
														
 
															+    @property
														
 
															+    def encoding(self):
														
 
															+        """
														
 
															+        编码优先级：自定义编码 > header中编码 > 页面编码 > 根据content猜测的编码
														
 
															+        """
														
 
															+        self._encoding = (
														
 
															+            self._encoding
														
 
															+            or self._headers_encoding()
														
 
															+            or self._body_declared_encoding()
														
 
															+            or self.apparent_encoding
														
 
															+        )
														
 
															+        return self._encoding
														
 
															+
														
 
															+    @encoding.setter
														
 
															+    def encoding(self, val):
														
 
															+        self.__clear_cache()
														
 
															+        self._encoding = val
														
 
															+
														
 
															+    code = encoding
														
 
															+
														
 
															+    def _headers_encoding(self):
														
 
															+        """
														
 
															+        从headers获取头部charset编码
														
 
															+        """
														
 
															+        content_type = self.headers.get("Content-Type") or self.headers.get(
														
 
															+            "content-type"
														
 
															+        )
														
 
															+        if content_type:
														
 
															+            return (
														
 
															+                http_content_type_encoding(content_type) or "utf-8"
														
 
															+                if "application/json" in content_type
														
 
															+                else None
														
 
															+            )
														
 
															+
														
 
															+    def _body_declared_encoding(self):
														
 
															+        """
														
 
															+        从html xml等获取<meta charset="编码">
														
 
															+        """
														
 
															+
														
 
															+        return html_body_declared_encoding(self.content)
														
 
															+
														
 
															+    def _get_unicode_html(self, html):
														
 
															+        if not html or not isinstance(html, bytes):
														
 
															+            return html
														
 
															+
														
 
															+        converted = UnicodeDammit(html, is_html=True)
														
 
															+        if not converted.unicode_markup:
														
 
															+            raise Exception(
														
 
															+                "Failed to detect encoding of article HTML, tried: %s"
														
 
															+                % ", ".join(converted.tried_encodings)
														
 
															+            )
														
 
															+
														
 
															+        html = converted.unicode_markup
														
 
															+        return html
														
 
															+
														
 
															+    def _make_absolute(self, link):
														
 
															+        """Makes a given link absolute."""
														
 
															+        try:
														
 
															+
														
 
															+            link = link.strip()
														
 
															+
														
 
															+            # Parse the link with stdlib.
														
 
															+            parsed = urlparse(link)._asdict()
														
 
															+
														
 
															+            # If link is relative, then join it with base_url.
														
 
															+            if not parsed["netloc"]:
														
 
															+                return urljoin(self.url, link)
														
 
															+
														
 
															+            # Link is absolute; if it lacks a scheme, add one from base_url.
														
 
															+            if not parsed["scheme"]:
														
 
															+                parsed["scheme"] = urlparse(self.url).scheme
														
 
															+
														
 
															+                # Reconstruct the URL to incorporate the new scheme.
														
 
															+                parsed = (v for v in parsed.values())
														
 
															+                return urlunparse(parsed)
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            log.error(
														
 
															+                "Invalid URL <{}> can't make absolute_link. exception: {}".format(
														
 
															+                    link, e
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+        # Link is absolute and complete with scheme; nothing to be done here.
														
 
															+        return link
														
 
															+
														
 
															+    def _absolute_links(self, text):
														
 
															+        regexs = [
														
 
															+            r'(<(?i)a.*?href\s*?=\s*?["\'])(.+?)(["\'])',  # a
														
 
															+            r'(<(?i)img.*?src\s*?=\s*?["\'])(.+?)(["\'])',  # img
														
 
															+            r'(<(?i)link.*?href\s*?=\s*?["\'])(.+?)(["\'])',  # css
														
 
															+            r'(<(?i)script.*?src\s*?=\s*?["\'])(.+?)(["\'])',  # js
														
 
															+        ]
														
 
															+
														
 
															+        for regex in regexs:
														
 
															+
														
 
															+            def replace_href(text):
														
 
															+                # html = text.group(0)
														
 
															+                link = text.group(2)
														
 
															+                absolute_link = self._make_absolute(link)
														
 
															+
														
 
															+                # return re.sub(regex, r'\1{}\3'.format(absolute_link), html) # 使用正则替换，个别字符不支持。如该网址源代码http://permit.mep.gov.cn/permitExt/syssb/xxgk/xxgk!showImage.action?dataid=0b092f8115ff45c5a50947cdea537726
														
 
															+                return text.group(1) + absolute_link + text.group(3)
														
 
															+
														
 
															+            text = re.sub(regex, replace_href, text, flags=re.S)
														
 
															+
														
 
															+        return text
														
 
															+
														
 
															+    def _del_special_character(self, text):
														
 
															+        """
														
 
															+        删除特殊字符
														
 
															+        """
														
 
															+        for special_character_pattern in SPECIAL_CHARACTER_PATTERNS:
														
 
															+            text = special_character_pattern.sub("", text)
														
 
															+
														
 
															+        return text
														
 
															+
														
 
															+    @property
														
 
															+    def __text(self):
														
 
															+        """Content of the response, in unicode.
														
 
															+
														
 
															+        If Response.encoding is None, encoding will be guessed using
														
 
															+        ``chardet``.
														
 
															+
														
 
															+        The encoding of the response content is determined based solely on HTTP
														
 
															+        headers, following RFC 2616 to the letter. If you can take advantage of
														
 
															+        non-HTTP knowledge to make a better guess at the encoding, you should
														
 
															+        set ``r.encoding`` appropriately before accessing this property.
														
 
															+        """
														
 
															+
														
 
															+        if not self.content:
														
 
															+            return ""
														
 
															+
														
 
															+        # Decode unicode from given encoding.
														
 
															+        try:
														
 
															+            content = str(self.content, self.encoding, errors=self.encoding_errors)
														
 
															+        except (LookupError, TypeError):
														
 
															+            # A LookupError is raised if the encoding was not found which could
														
 
															+            # indicate a misspelling or similar mistake.
														
 
															+            #
														
 
															+            # A TypeError can be raised if encoding is None
														
 
															+            #
														
 
															+            # So we try blindly encoding.
														
 
															+            content = str(self.content, errors=self.encoding_errors)
														
 
															+
														
 
															+        return content
														
 
															+
														
 
															+    @property
														
 
															+    def text(self):
														
 
															+        if self._cached_text is None:
														
 
															+            if self.encoding and self.encoding.upper() != FAIL_ENCODING:
														
 
															+                try:
														
 
															+                    self._cached_text = self.__text
														
 
															+                except UnicodeDecodeError:
														
 
															+                    self._cached_text = self._get_unicode_html(self.content)
														
 
															+            else:
														
 
															+                self._cached_text = self._get_unicode_html(self.content)
														
 
															+
														
 
															+            if self._cached_text:
														
 
															+                self._cached_text = self._absolute_links(self._cached_text)
														
 
															+                self._cached_text = self._del_special_character(self._cached_text)
														
 
															+
														
 
															+        return self._cached_text
														
 
															+
														
 
															+    @text.setter
														
 
															+    def text(self, html):
														
 
															+        self._cached_text = html
														
 
															+        self._cached_text = self._absolute_links(self._cached_text)
														
 
															+        self._cached_text = self._del_special_character(self._cached_text)
														
 
															+        self._cached_selector = Selector(self.text)
														
 
															+
														
 
															+    @property
														
 
															+    def json(self, **kwargs):
														
 
															+        if self._cached_json is None:
														
 
															+            self.encoding = self.encoding or "utf-8"
														
 
															+            self._cached_json = super(Response, self).json(**kwargs)
														
 
															+
														
 
															+        return self._cached_json
														
 
															+
														
 
															+    @property
														
 
															+    def content(self):
														
 
															+        content = super(Response, self).content
														
 
															+        return content
														
 
															+
														
 
															+    @property
														
 
															+    def is_html(self):
														
 
															+        content_type = self.headers.get("Content-Type", "")
														
 
															+        if "text/html" in content_type:
														
 
															+            return True
														
 
															+        else:
														
 
															+            return False
														
 
															+
														
 
															+    @property
														
 
															+    def selector(self):
														
 
															+        if self._cached_selector is None:
														
 
															+            self._cached_selector = Selector(self.text)
														
 
															+        return self._cached_selector
														
 
															+
														
 
															+    def bs4(self, features="html.parser"):
														
 
															+        soup = BeautifulSoup(self.text, features)
														
 
															+        return soup
														
 
															+
														
 
															+    def extract(self):
														
 
															+        return self.selector.get()
														
 
															+
														
 
															+    def xpath(self, query, **kwargs):
														
 
															+        return self.selector.xpath(query, **kwargs)
														
 
															+
														
 
															+    def css(self, query):
														
 
															+        return self.selector.css(query)
														
 
															+
														
 
															+    def re(self, regex, replace_entities=False):
														
 
															+        """
														
 
															+        @summary: 正则匹配
														
 
															+        注意：网页源码<a class='page-numbers'...  会被处理成<a class="page-numbers" ； 写正则时要写<a class="(.*?)"。 但不会改非html的文本引号格式
														
 
															+        为了使用方便，正则单双引号自动处理为不敏感
														
 
															+        ---------
														
 
															+        @param regex: 正则或者re.compile
														
 
															+        @param replace_entities: 为True时 去掉&nbsp;等字符， 转义&quot;为 " 等， 会使网页结构发生变化。如在网页源码中提取json， 建议设置成False
														
 
															+        ---------
														
 
															+        @result: 列表
														
 
															+        """
														
 
															+
														
 
															+        # 将单双引号设置为不敏感
														
 
															+        if isinstance(regex, str):
														
 
															+            regex = re.sub("['\"]", "['\"]", regex)
														
 
															+
														
 
															+        return self.selector.re(regex, replace_entities)
														
 
															+
														
 
															+    def re_first(self, regex, default=None, replace_entities=False):
														
 
															+        """
														
 
															+        @summary: 正则匹配
														
 
															+        注意：网页源码<a class='page-numbers'...  会被处理成<a class="page-numbers" ； 写正则时要写<a class="(.*?)"。 但不会改非html的文本引号格式
														
 
															+        为了使用方便，正则单双引号自动处理为不敏感
														
 
															+        ---------
														
 
															+        @param regex: 正则或者re.compile
														
 
															+        @param default: 未匹配到， 默认值
														
 
															+        @param replace_entities: 为True时 去掉&nbsp;等字符， 转义&quot;为 " 等， 会使网页结构发生变化。如在网页源码中提取json， 建议设置成False
														
 
															+        ---------
														
 
															+        @result: 第一个值或默认值
														
 
															+        """
														
 
															+
														
 
															+        # 将单双引号设置为不敏感
														
 
															+        if isinstance(regex, str):
														
 
															+            regex = re.sub("['\"]", "['\"]", regex)
														
 
															+
														
 
															+        return self.selector.re_first(regex, default, replace_entities)
														
 
															+
														
 
															+    def close_browser(self, request):
														
 
															+        if hasattr(self, "browser"):
														
 
															+            request._webdriver_pool.remove(self.browser)
														
 
															+            del self.browser
														
 
															+
														
 
															+    def __del__(self):
														
 
															+        self.close()
														
 
															+
														
 
															+    def open(self, delete_temp_file=False):
														
 
															+        with open("temp.html", "w", encoding=self.encoding, errors="replace") as html:
														
 
															+            self.encoding_errors = "replace"
														
 
															+            html.write(self.text)
														
 
															+
														
 
															+        os.system("open temp.html")
														
 
															+
														
 
															+        if delete_temp_file:
														
 
															+            time.sleep(1)
														
 
															+            os.remove("temp.html")
														
 
															+
														
 
															+    @property
														
 
															+    def plain_text(self):
														
 
															+        return re.findall('[\u4e00-\u9fa5]', self.text, re.S)
														
 
															+
														
 
															+    def tags(self):
														
 
															+        tags_dict = {}
														
 
															+
														
 
															+        html = copy.deepcopy(self.text)
														
 
															+        if len(html) == 0:
														
 
															+            tags_dict['tags_count'] = 0
														
 
															+            return tags_dict
														
 
															+
														
 
															+        cleaner = Cleaner()
														
 
															+        html = cleaner.clean_html(html)
														
 
															+
														
 
															+        count = 0
														
 
															+        node = fromstring(html)
														
 
															+        for elem in iter_node(node.xpath('/html')[0]):
														
 
															+            count += 1
														
 
															+            tag = elem.tag
														
 
															+            if not tags_dict.get(tag):
														
 
															+                tags_dict[tag] = 1
														
 
															+            else:
														
 
															+                tags_dict[tag] += 1
														
 
															+
														
 
															+        tags_dict['tags_count'] = count
														
 
															+        return tags_dict
														
--- a/A数据处理/site_monitor/network/user_agent.py
+++ b/A数据处理/site_monitor/network/user_agent.py
@@ -0,0 +1,389 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2016-12-28 17:55
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import random
														
 
															+
														
 
															+USER_AGENTS = {
														
 
															+    "chrome": [
														
 
															+        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
														
 
															+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
														
 
															+        "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
														
 
															+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
														
 
															+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
														
 
															+        "Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
														
 
															+        "Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
														
 
															+        "Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14",
														
 
															+    ],
														
 
															+    "opera": [
														
 
															+        "Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
														
 
															+        "Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
														
 
															+        "Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
														
 
															+        "Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
														
 
															+        "Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
														
 
															+        "Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
														
 
															+        "Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
														
 
															+        "Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
														
 
															+        "Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
														
 
															+        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
														
 
															+        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
														
 
															+        "Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
														
 
															+        "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
														
 
															+        "Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
														
 
															+        "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
														
 
															+        "Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
														
 
															+        "Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
														
 
															+        "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
														
 
															+        "Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
														
 
															+        "Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
														
 
															+        "Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
														
 
															+        "Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
														
 
															+        "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
														
 
															+        "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
														
 
															+    ],
														
 
															+    "firefox": [
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
														
 
															+        "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
														
 
															+        "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
														
 
															+        "Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
														
 
															+        "Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101  Firefox/28.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
														
 
															+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
														
 
															+        "Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
														
 
															+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
														
 
															+        "Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0)  Gecko/20100101 Firefox/18.0",
														
 
															+        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
														
 
															+    ],
														
 
															+    "internetexplorer": [
														
 
															+        "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
														
 
															+        "Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0;  rv:11.0) like Gecko",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
														
 
															+        "Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
														
 
															+        "Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
														
 
															+        "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
														
 
															+        "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; Tablet PC 2.0; InfoPath.3; .NET4.0C; .NET4.0E)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; yie8)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET CLR 1.1.4322; .NET4.0C; Tablet PC 2.0)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; FunWebProducts)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/13.0.782.215)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/11.0.696.57)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) chromeframe/10.0.648.205",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.1; SV1; .NET CLR 2.8.52393; WOW64; en-US)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; chromeframe/11.0.696.57)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/4.0; GTB7.4; InfoPath.3; SV1; .NET CLR 3.1.76908; WOW64; en-US)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.8.36217; WOW64; en-US)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; .NET CLR 2.7.58687; SLCC2; Media Center PC 5.0; Zune 3.4; Tablet PC 3.6; InfoPath.3)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 1.1.4322)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; SLCC1; .NET CLR 1.1.4322)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 3.0.04506.30)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.0; Trident/4.0; FBSMTWB; .NET CLR 2.0.34861; .NET CLR 3.0.3746.3218; .NET CLR 3.5.33652; msn OptimizedIE8;ENUS)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.2; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; Media Center PC 6.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.2)",
														
 
															+        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 3.0)",
														
 
															+    ],
														
 
															+    "safari": [
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; ko-KR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; fr-FR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; cs-CZ) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; sv-se) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ko-kr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; it-it) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-fr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; es-es) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-gb) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; de-de) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; sv-SE) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; hu-HU) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; it-IT) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-us) AppleWebKit/534.16+ (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-ch) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; de-de) AppleWebKit/534.15+ (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; ar) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Android 2.2; Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-HK) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; tr-TR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; nb-NO) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-TW) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; zh-cn) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
														
 
															+    ],
														
 
															+    "mobile": [
														
 
															+        "Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/14.2 Safari/536.2+",
														
 
															+        "Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/14.2 Safari/536.2+",
														
 
															+        "Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/14.2 Mobile Safari/537.10+",
														
 
															+        "Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/14.2 Mobile Safari/537.10+",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/14.2 Mobile Safari/534.30",
														
 
															+        "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/14.2 Mobile/14E304 Safari/602.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/14.2 Mobile/14E304 Safari/602.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/14.2 Mobile/15A372 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Mobile/15E148 Safari/604.1",
														
 
															+        "Mozilla/5.0 (Mobile; LYF/F300B/LYF-F300B-001-01-15-130718-i;Android; rv:89.0 Gecko/48.0 Firefox/90.0 KAIOS/2.5",
														
 
															+        "Mozilla/5.0 (Mobile; LYF/F300B/LYF-F300B-001-01-15-130718-i;Android; rv:89.0 Gecko/48.0 Firefox/90.0 KAIOS/2.5",
														
 
															+        "Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true",
														
 
															+        "Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
														
 
															+        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
														
 
															+        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
														
 
															+        "Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36 Edge/14.14263",
														
 
															+        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)",
														
 
															+        "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)",
														
 
															+        "Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13",
														
 
															+        "Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 11; Pixel 4a (5G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 11; Pixel 4a (5G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Mobile Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36 Edg/93.0.4576.0",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0 Gecko/20100101 Firefox/90.0",
														
 
															+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.2 Safari/605.1.15",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4576.0 Safari/537.36 Edg/93.0.4576.0",
														
 
															+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0 Gecko/20100101 Firefox/90.0",
														
 
															+    ],
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def get(ua_type: str = None):
														
 
															+    if not ua_type:
														
 
															+        ua_type = random.choice(list(USER_AGENTS.keys()))
														
 
															+    elif ua_type not in USER_AGENTS:
														
 
															+        raise ValueError(
														
 
															+            "ua_type error, expect one of {}".format(list(USER_AGENTS.keys()))
														
 
															+        )
														
 
															+
														
 
															+    return random.choice(USER_AGENTS[ua_type])
														
--- a/A数据处理/site_monitor/requirements.txt
+++ b/A数据处理/site_monitor/requirements.txt
@@ -0,0 +1,14 @@
 
															+beautifulsoup4==4.9.3
														
 
															+bs4==0.0.1
														
 
															+loguru==0.5.3
														
 
															+lxml==4.9.1
														
 
															+numpy==1.24.1
														
 
															+parsel==1.7.0
														
 
															+playwright==1.24.1
														
 
															+pymongo==3.12.0
														
 
															+redis==3.5.3
														
 
															+requests==2.30.0
														
 
															+six==1.16.0
														
 
															+w3lib==2.1.1
														
 
															+PyExecJS>=1.5.1
														
 
															+redis-py-cluster>=2.1.0
														
--- a/A数据处理/site_monitor/setting.py
+++ b/A数据处理/site_monitor/setting.py
@@ -0,0 +1,65 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""爬虫配置文件"""
														
 
															+import os
														
 
															+
														
 
															+# MONGODB
														
 
															+MONGO_IP = "172.17.4.87"
														
 
															+MONGO_PORT = 27080
														
 
															+MONGO_DB = "py_spider"
														
 
															+MONGO_USER_NAME = os.getenv("MONGO_USER_NAME")
														
 
															+MONGO_USER_PASS = os.getenv("MONGO_USER_PASS")
														
 
															+
														
 
															+# REDIS
														
 
															+# ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"]
														
 
															+REDISDB_IP_PORTS = "172.17.4.232:7361"
														
 
															+REDISDB_USER_PASS = "k5ZJR5KV4q7DRZ92DQ"
														
 
															+REDISDB_DB = 4
														
 
															+# 适用于redis哨兵模式
														
 
															+REDISDB_SERVICE_NAME = os.getenv("REDISDB_SERVICE_NAME")
														
 
															+
														
 
															+# 浏览器渲染
														
 
															+PLAYWRIGHT = dict(
														
 
															+    user_agent=None,  # 字符串 或 无参函数，返回值为user_agent
														
 
															+    proxy=None,  # xxx.xxx.xxx.xxx:xxxx 或 无参函数，返回值为代理地址
														
 
															+    headless=True,  # 是否为无头浏览器
														
 
															+    driver_type="webkit",  # chromium、firefox、webkit
														
 
															+    timeout=60,  # 请求超时时间
														
 
															+    window_size=(1024, 800),  # 窗口大小
														
 
															+    executable_path=None,  # 浏览器路径，默认为默认路径
														
 
															+    download_path=None,  # 下载文件的路径
														
 
															+    render_time=0,  # 渲染时长，即打开网页等待指定时间后再获取源码
														
 
															+    wait_until="networkidle",  # 等待页面加载完成的事件,可选值："commit", "domcontentloaded", "load", "networkidle"
														
 
															+    use_stealth_js=False,  # 使用stealth.min.js隐藏浏览器特征
														
 
															+    page_on_event_callback=None,  # page.on() 事件的回调 如 page_on_event_callback={"dialog": lambda dialog: dialog.accept()}
														
 
															+    storage_state_path=None,  # 保存浏览器状态的路径
														
 
															+    url_regexes=None,  # 拦截接口，支持正则，数组类型
														
 
															+    save_all=False,  # 是否保存所有拦截的接口, 配合url_regexes使用，为False时只保存最后一次拦截的接口
														
 
															+)
														
 
															+
														
 
															+# request网络请求超时时间
														
 
															+REQUEST_TIMEOUT = 30  # 等待服务器响应的超时时间，浮点数，或(connect timeout, read timeout)元组
														
 
															+
														
 
															+# 设置代理
														
 
															+PROXY_EXTRACT_API = "http://proxy.spdata.jianyu360.com/proxy/getallip"  # 代理提取API ，返回的代理分割符为\r\n
														
 
															+PROXY_ENABLE = True
														
 
															+
														
 
															+# 随机headers
														
 
															+RANDOM_HEADERS = True
														
 
															+# UserAgent类型 支持 'chrome', 'opera', 'firefox', 'internetexplorer', 'safari'，'mobile' 若不指定则随机类型
														
 
															+USER_AGENT_TYPE = "chrome"
														
 
															+# 默认使用的浏览器头
														
 
															+DEFAULT_USERAGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"
														
 
															+# requests 使用session
														
 
															+USE_SESSION = False
														
 
															+
														
 
															+# 下载
														
 
															+DOWNLOADER = "network.downloader.RequestsDownloader"
														
 
															+SESSION_DOWNLOADER = "network.downloader.RequestsSessionDownloader"
														
 
															+RENDER_DOWNLOADER = "network.downloader.PlaywrightDownloader"
														
 
															+MAKE_ABSOLUTE_LINKS = True  # 自动转成绝对连接
														
 
															+
														
 
															+# 企业微信报警
														
 
															+WECHAT_WARNING_URL = ""  # 企业微信机器人api
														
 
															+WECHAT_WARNING_PHONE = ""  # 报警人 将会在群内@此人, 支持列表，可指定多人
														
 
															+WECHAT_WARNING_ALL = False  # 是否提示所有人， 默认为False
														
 
															+WARNING_INTERVAL = 3600  # 相同报警的报警时间间隔，防止刷屏; 0表示不去重
														
--- a/A数据处理/site_monitor/utils/__init__.py
+++ b/A数据处理/site_monitor/utils/__init__.py
@@ -0,0 +1,8 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2023-05-10 
														
 
															+---------
														
 
															+@summary:  
														
 
															+---------
														
 
															+@author: Dzr
														
 
															+"""
														
--- a/A数据处理/site_monitor/utils/clean_html.py
+++ b/A数据处理/site_monitor/utils/clean_html.py
@@ -0,0 +1,147 @@
 
															+import re
														
 
															+__all__ = ['cleaner']
														
 
															+
														
 
															+# 独立元素
														
 
															+INDEPENDENT_TAGS = {
														
 
															+    '<head>[\s\S]*?</head>': '',
														
 
															+    '<html>|<html [^>]*>|</html>': '',
														
 
															+    '<body>|<body [^>]*>|</body>': '',
														
 
															+    '<meta[^<>]*>|<meta [^<>]*>|<meta[^<>]*>[\s\S]*?</meta>|</meta>': '',  # 元数据
														
 
															+    '&(nbsp|e[mn]sp|thinsp|zwn?j|#13);': '',  # 空格
														
 
															+    '\\xa0|\\u3000': '',  # 空格
														
 
															+    '<!--[\s\S]*?-->': '',  # 注释
														
 
															+    '<style[^<>]*>[\s\S]*?</style>': '',  # 样式
														
 
															+    '<script[^<>]*>[\s\S]*?</script>': '',  # JavaScript
														
 
															+    '<input>': '',  # 输入框
														
 
															+    '<img[^>]*>': '<br>',  # 图片
														
 
															+}
														
 
															+# 行内元素
														
 
															+INLINE_TAGS = {
														
 
															+    '<a>|<a [^>]*>|</a>': '',  # 超链接
														
 
															+    '<link>|<link [^>]*>|</link>': '',  # 超链接
														
 
															+    '<span>|<span [^>]*>|</span>': '',  # span
														
 
															+    '<label>|<label [^>]*>|</label>': '<br>',  # label
														
 
															+    '<font>|<font [^>]*>|</font>': '',  # font
														
 
															+    'data:image(.*?) ': '',            # 图片base64
														
 
															+}
														
 
															+# 块级元素
														
 
															+BLOCK_TAGS = {
														
 
															+    '<div>\s*?</div>':'',
														
 
															+    '<h[1-6][^>]*>|</h[1-6]>': '',  # 标题
														
 
															+    '<p>|<p [^>]*>': '<br>',  # 段落
														
 
															+    '</p>': '',  # 段落
														
 
															+    '<div>|<div [^>]*>': '<br>',  # 分割 division
														
 
															+    '</div>': '',  # 分割 division
														
 
															+    '<o:p>|<o:p [^>]*>|</o:p>': ''  # OFFICE微软WORD段落
														
 
															+}
														
 
															+# 其他
														
 
															+OTHER = {
														
 
															+    '<?xml[^>]*>|<?xml [^>]*>|<?xml:.*?>': '',
														
 
															+    '<epointform>': '',
														
 
															+    '<!doctype html>|<!doctype html [^>]*>': '',
														
 
															+    '【关闭】|关闭': '',
														
 
															+    '【打印】|打印本页': '',
														
 
															+    '【字体：[\s\S]*】': '',
														
 
															+    '文章来源：[\u4e00-\u9fa5]+': '',
														
 
															+    '浏览次数：.*[<]+': '',
														
 
															+    '（责任编辑：.*?）': '',
														
 
															+    '分享到[：]': '',
														
 
															+
														
 
															+}
														
 
															+# 样式
														
 
															+CSS_STYLE = {
														
 
															+    'style="[\s\S]*?"|style ="[\s\S]*?"': '',
														
 
															+    'bgcolor="[\s\S]*?"|bgcolor ="[\s\S]*?"': '',
														
 
															+    'bordercolor="[\s\S]*?"|bordercolor ="[\s\S]*?"': '',
														
 
															+    'class="[\s\S]*?"|class ="[\s\S]*?"': '',
														
 
															+    'align="[\s\S]*?"|align ="[\s\S]*?"': '',
														
 
															+    'cellpadding="(\d+)"|cellspacing="(\d+)"': '',
														
 
															+
														
 
															+}
														
 
															+# 空白符
														
 
															+BLANKS = {
														
 
															+    '\n\s*\n': '\n',
														
 
															+    '\s*\n\s*': '\n',
														
 
															+    '[^\S\n]': ' ',
														
 
															+    '\s+': ' ',
														
 
															+}
														
 
															+# css标签集合
														
 
															+TAGS = {'table', 'tr', 'td', 'div', 'span', 'p'}
														
 
															+# css属性集合
														
 
															+ATTRS = {'id', 'class', 'style', 'width'}
														
 
															+
														
 
															+
														
 
															+def _repair_tag():
														
 
															+    """异常的标签组合,用来替换非标准页面的标签"""
														
 
															+    _repairs = {}
														
 
															+    for tag in TAGS:
														
 
															+        for attr in ATTRS:
														
 
															+            key = '{}{}'.format(tag, attr)
														
 
															+            val = '{} {}'.format(tag, attr)
														
 
															+            _repairs[key] = val
														
 
															+    return _repairs
														
 
															+
														
 
															+
														
 
															+def _escape_character(html):
														
 
															+    """转义字符"""
														
 
															+    html = html.replace('&lt;', '<')
														
 
															+    html = html.replace('&gt;', '>')
														
 
															+    html = html.replace('&quot;', '"')
														
 
															+    html = html.replace('&amp;', '&')
														
 
															+    # 不显示输入框边框
														
 
															+    html = html.replace('<input', '<input style="border-color: transparent;"')
														
 
															+    return html
														
 
															+
														
 
															+
														
 
															+def _lowercase_tag(html):
														
 
															+    """标签归一化处理（全部小写 + 标签修复）"""
														
 
															+    tags = re.findall("<[^>]+>", html)
														
 
															+    tag_sets = set(tags)
														
 
															+
														
 
															+    if len(tag_sets) > 10000:
														
 
															+        from bs4 import BeautifulSoup
														
 
															+        soup = BeautifulSoup(html, "lxml")
														
 
															+        html = str(soup.body.next_element)
														
 
															+    else:
														
 
															+        for tag in tag_sets:
														
 
															+            html = html.replace(tag, str(tag).lower())
														
 
															+
														
 
															+    repair_tags = _repair_tag()
														
 
															+    for err, right in repair_tags.items():
														
 
															+        html = html.replace(err, right)
														
 
															+
														
 
															+    return html
														
 
															+
														
 
															+
														
 
															+def cleaner(html, special=None, completely=False):
														
 
															+    """
														
 
															+    数据清洗
														
 
															+
														
 
															+    :param html: 清洗的页面
														
 
															+    :param special: 额外指定页面清洗规则
														
 
															+    :param completely: 是否完全清洗页面
														
 
															+    :return: 清洗后的页面源码
														
 
															+    """
														
 
															+    if special is None:
														
 
															+        special = {}
														
 
															+
														
 
															+    OTHER.update(special)
														
 
															+    remove_tags = {
														
 
															+        **INDEPENDENT_TAGS,
														
 
															+        **INLINE_TAGS,
														
 
															+        **BLOCK_TAGS,
														
 
															+        **OTHER,
														
 
															+        **CSS_STYLE,
														
 
															+        **BLANKS,
														
 
															+    }
														
 
															+    html = _lowercase_tag(html)
														
 
															+    for tag, repl in remove_tags.items():
														
 
															+        html = re.sub(tag, repl, html)
														
 
															+
														
 
															+    if completely:
														
 
															+        html = re.sub(r'<canvas[^<>]*>[\s\S]*?</canvas>', '', html)  # 画布
														
 
															+        html = re.sub(r'<iframe[^<>]*>[\s\S]*?</iframe>', '', html)  # 内框架
														
 
															+        html = re.sub('<([^<>\u4e00-\u9fa5]|微软雅黑|宋体|仿宋)+>', '', html)
														
 
															+
														
 
															+    html = _escape_character(html)
														
 
															+    return html
														
--- a/A数据处理/site_monitor/utils/js/intercept.js
+++ b/A数据处理/site_monitor/utils/js/intercept.js
--- a/A数据处理/site_monitor/utils/js/stealth.min.js
+++ b/A数据处理/site_monitor/utils/js/stealth.min.js
--- a/A数据处理/site_monitor/utils/log.py
+++ b/A数据处理/site_monitor/utils/log.py
@@ -0,0 +1,14 @@
 
															+from pathlib import Path
														
 
															+
														
 
															+from loguru import logger
														
 
															+
														
 
															+_absolute = Path(__file__).absolute().parent.parent
														
 
															+_log_path = (_absolute / 'logs/log_{time:YYYY-MM-DD}.log').resolve()
														
 
															+logger.add(
														
 
															+    _log_path,
														
 
															+    format='{time:YYYY-MM-DD HH:mm:ss} - {level} - {message}',
														
 
															+    level='INFO',
														
 
															+    rotation='00:00',
														
 
															+    retention='1 week',
														
 
															+    encoding='utf-8',
														
 
															+)
														
--- a/A数据处理/site_monitor/utils/tools.py
+++ b/A数据处理/site_monitor/utils/tools.py
@@ -0,0 +1,2438 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2018-09-06 14:21
														
 
															+---------
														
 
															+@summary: 工具
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import calendar
														
 
															+import codecs
														
 
															+import configparser  # 读配置文件的
														
 
															+import datetime
														
 
															+import functools
														
 
															+import hashlib
														
 
															+import html
														
 
															+import importlib
														
 
															+import json
														
 
															+import os
														
 
															+import pickle
														
 
															+import random
														
 
															+import re
														
 
															+import socket
														
 
															+import ssl
														
 
															+import string
														
 
															+import sys
														
 
															+import time
														
 
															+import traceback
														
 
															+import urllib
														
 
															+import urllib.parse
														
 
															+import uuid
														
 
															+import weakref
														
 
															+from functools import partial, wraps
														
 
															+from hashlib import md5
														
 
															+from pprint import pformat
														
 
															+from pprint import pprint
														
 
															+from urllib import request
														
 
															+from urllib.parse import urljoin
														
 
															+
														
 
															+import bson
														
 
															+import execjs  # pip install PyExecJS
														
 
															+import redis
														
 
															+import requests
														
 
															+import six
														
 
															+from requests.cookies import RequestsCookieJar
														
 
															+from w3lib.url import canonicalize_url as _canonicalize_url
														
 
															+
														
 
															+import setting as setting
														
 
															+from db.redisdb import RedisDB
														
 
															+from utils.log import logger as log
														
 
															+
														
 
															+os.environ["EXECJS_RUNTIME"] = "Node"  # 设置使用node执行js
														
 
															+
														
 
															+# 全局取消ssl证书验证
														
 
															+ssl._create_default_https_context = ssl._create_unverified_context
														
 
															+
														
 
															+TIME_OUT = 30
														
 
															+TIMER_TIME = 5
														
 
															+
														
 
															+redisdb = None
														
 
															+
														
 
															+
														
 
															+def get_redisdb():
														
 
															+    global redisdb
														
 
															+    if not redisdb:
														
 
															+        redisdb = RedisDB()
														
 
															+    return redisdb
														
 
															+
														
 
															+
														
 
															+# 装饰器
														
 
															+class Singleton(object):
														
 
															+    def __init__(self, cls):
														
 
															+        self._cls = cls
														
 
															+        self._instance = {}
														
 
															+
														
 
															+    def __call__(self, *args, **kwargs):
														
 
															+        if self._cls not in self._instance:
														
 
															+            self._instance[self._cls] = self._cls(*args, **kwargs)
														
 
															+        return self._instance[self._cls]
														
 
															+
														
 
															+
														
 
															+def log_function_time(func):
														
 
															+    try:
														
 
															+
														
 
															+        @functools.wraps(func)  # 将函数的原来属性付给新函数
														
 
															+        def calculate_time(*args, **kw):
														
 
															+            began_time = time.time()
														
 
															+            callfunc = func(*args, **kw)
														
 
															+            end_time = time.time()
														
 
															+            log.debug(func.__name__ + " run time  = " + str(end_time - began_time))
														
 
															+            return callfunc
														
 
															+
														
 
															+        return calculate_time
														
 
															+    except:
														
 
															+        log.debug("求取时间无效 因为函数参数不符")
														
 
															+        return func
														
 
															+
														
 
															+
														
 
															+def run_safe_model(module_name):
														
 
															+    def inner_run_safe_model(func):
														
 
															+        try:
														
 
															+
														
 
															+            @functools.wraps(func)  # 将函数的原来属性付给新函数
														
 
															+            def run_func(*args, **kw):
														
 
															+                callfunc = None
														
 
															+                try:
														
 
															+                    callfunc = func(*args, **kw)
														
 
															+                except Exception as e:
														
 
															+                    log.error(module_name + ": " + func.__name__ + " - " + str(e))
														
 
															+                    traceback.print_exc()
														
 
															+                return callfunc
														
 
															+
														
 
															+            return run_func
														
 
															+        except Exception as e:
														
 
															+            log.error(module_name + ": " + func.__name__ + " - " + str(e))
														
 
															+            traceback.print_exc()
														
 
															+            return func
														
 
															+
														
 
															+    return inner_run_safe_model
														
 
															+
														
 
															+
														
 
															+def memoizemethod_noargs(method):
														
 
															+    """Decorator to cache the result of a method (without arguments) using a
														
 
															+    weak reference to its object
														
 
															+    """
														
 
															+    cache = weakref.WeakKeyDictionary()
														
 
															+
														
 
															+    @functools.wraps(method)
														
 
															+    def new_method(self, *args, **kwargs):
														
 
															+        if self not in cache:
														
 
															+            cache[self] = method(self, *args, **kwargs)
														
 
															+        return cache[self]
														
 
															+
														
 
															+    return new_method
														
 
															+
														
 
															+
														
 
															+########################【网页解析相关】###############################
														
 
															+
														
 
															+
														
 
															+# @log_function_time
														
 
															+def get_html_by_requests(
														
 
															+    url, headers=None, code="utf-8", data=None, proxies={}, with_response=False
														
 
															+):
														
 
															+    html = ""
														
 
															+    r = None
														
 
															+    try:
														
 
															+        if data:
														
 
															+            r = requests.post(
														
 
															+                url, headers=headers, timeout=TIME_OUT, data=data, proxies=proxies
														
 
															+            )
														
 
															+        else:
														
 
															+            r = requests.get(url, headers=headers, timeout=TIME_OUT, proxies=proxies)
														
 
															+
														
 
															+        if code:
														
 
															+            r.encoding = code
														
 
															+        html = r.text
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        log.error(e)
														
 
															+    finally:
														
 
															+        r and r.close()
														
 
															+
														
 
															+    if with_response:
														
 
															+        return html, r
														
 
															+    else:
														
 
															+        return html
														
 
															+
														
 
															+
														
 
															+def get_json_by_requests(
														
 
															+    url,
														
 
															+    params=None,
														
 
															+    headers=None,
														
 
															+    data=None,
														
 
															+    proxies={},
														
 
															+    with_response=False,
														
 
															+    cookies=None,
														
 
															+):
														
 
															+    json = {}
														
 
															+    response = None
														
 
															+    try:
														
 
															+        # response = requests.get(url, params = params)
														
 
															+        if data:
														
 
															+            response = requests.post(
														
 
															+                url,
														
 
															+                headers=headers,
														
 
															+                data=data,
														
 
															+                params=params,
														
 
															+                timeout=TIME_OUT,
														
 
															+                proxies=proxies,
														
 
															+                cookies=cookies,
														
 
															+            )
														
 
															+        else:
														
 
															+            response = requests.get(
														
 
															+                url,
														
 
															+                headers=headers,
														
 
															+                params=params,
														
 
															+                timeout=TIME_OUT,
														
 
															+                proxies=proxies,
														
 
															+                cookies=cookies,
														
 
															+            )
														
 
															+        response.encoding = "utf-8"
														
 
															+        json = response.json()
														
 
															+    except Exception as e:
														
 
															+        log.error(e)
														
 
															+    finally:
														
 
															+        response and response.close()
														
 
															+
														
 
															+    if with_response:
														
 
															+        return json, response
														
 
															+    else:
														
 
															+        return json
														
 
															+
														
 
															+
														
 
															+def get_cookies(response):
														
 
															+    cookies = requests.utils.dict_from_cookiejar(response.cookies)
														
 
															+    return cookies
														
 
															+
														
 
															+
														
 
															+def get_cookies_from_str(cookie_str):
														
 
															+    """
														
 
															+    >>> get_cookies_from_str("key=value; key2=value2; key3=; key4=; ")
														
 
															+    {'key': 'value', 'key2': 'value2', 'key3': '', 'key4': ''}
														
 
															+
														
 
															+    Args:
														
 
															+        cookie_str: key=value; key2=value2; key3=; key4=
														
 
															+
														
 
															+    Returns:
														
 
															+
														
 
															+    """
														
 
															+    cookies = {}
														
 
															+    for cookie in cookie_str.split(";"):
														
 
															+        cookie = cookie.strip()
														
 
															+        if not cookie:
														
 
															+            continue
														
 
															+        key, value = cookie.split("=", 1)
														
 
															+        key = key.strip()
														
 
															+        value = value.strip()
														
 
															+        cookies[key] = value
														
 
															+
														
 
															+    return cookies
														
 
															+
														
 
															+
														
 
															+def get_cookies_jar(cookies):
														
 
															+    """
														
 
															+    @summary: 适用于selenium生成的cookies转requests的cookies
														
 
															+    requests.get(xxx, cookies=jar)
														
 
															+    参考：https://www.cnblogs.com/small-bud/p/9064674.html
														
 
															+
														
 
															+    ---------
														
 
															+    @param cookies: [{},{}]
														
 
															+    ---------
														
 
															+    @result: cookie jar
														
 
															+    """
														
 
															+
														
 
															+    cookie_jar = RequestsCookieJar()
														
 
															+    for cookie in cookies:
														
 
															+        cookie_jar.set(cookie["name"], cookie["value"])
														
 
															+
														
 
															+    return cookie_jar
														
 
															+
														
 
															+
														
 
															+def get_cookies_from_selenium_cookie(cookies):
														
 
															+    """
														
 
															+    @summary: 适用于selenium生成的cookies转requests的cookies
														
 
															+    requests.get(xxx, cookies=jar)
														
 
															+    参考：https://www.cnblogs.com/small-bud/p/9064674.html
														
 
															+
														
 
															+    ---------
														
 
															+    @param cookies: [{},{}]
														
 
															+    ---------
														
 
															+    @result: cookie jar
														
 
															+    """
														
 
															+
														
 
															+    cookie_dict = {}
														
 
															+    for cookie in cookies:
														
 
															+        if cookie.get("name"):
														
 
															+            cookie_dict[cookie["name"]] = cookie["value"]
														
 
															+
														
 
															+    return cookie_dict
														
 
															+
														
 
															+
														
 
															+def cookiesjar2str(cookies):
														
 
															+    str_cookie = ""
														
 
															+    for k, v in requests.utils.dict_from_cookiejar(cookies).items():
														
 
															+        str_cookie += k
														
 
															+        str_cookie += "="
														
 
															+        str_cookie += v
														
 
															+        str_cookie += "; "
														
 
															+    return str_cookie
														
 
															+
														
 
															+
														
 
															+def cookies2str(cookies):
														
 
															+    str_cookie = ""
														
 
															+    for k, v in cookies.items():
														
 
															+        str_cookie += k
														
 
															+        str_cookie += "="
														
 
															+        str_cookie += v
														
 
															+        str_cookie += "; "
														
 
															+    return str_cookie
														
 
															+
														
 
															+
														
 
															+def get_urls(
														
 
															+    html,
														
 
															+    stop_urls=(
														
 
															+        "javascript",
														
 
															+        "+",
														
 
															+        ".css",
														
 
															+        ".js",
														
 
															+        ".rar",
														
 
															+        ".xls",
														
 
															+        ".exe",
														
 
															+        ".apk",
														
 
															+        ".doc",
														
 
															+        ".jpg",
														
 
															+        ".png",
														
 
															+        ".flv",
														
 
															+        ".mp4",
														
 
															+    ),
														
 
															+):
														
 
															+    # 不匹配javascript、 +、 # 这样的url
														
 
															+    regex = r'<a.*?href.*?=.*?["|\'](.*?)["|\']'
														
 
															+
														
 
															+    urls = get_info(html, regex)
														
 
															+    urls = sorted(set(urls), key=urls.index)
														
 
															+    if stop_urls:
														
 
															+        stop_urls = isinstance(stop_urls, str) and [stop_urls] or stop_urls
														
 
															+        use_urls = []
														
 
															+        for url in urls:
														
 
															+            for stop_url in stop_urls:
														
 
															+                if stop_url in url:
														
 
															+                    break
														
 
															+            else:
														
 
															+                use_urls.append(url)
														
 
															+
														
 
															+        urls = use_urls
														
 
															+    return urls
														
 
															+
														
 
															+
														
 
															+def get_full_url(root_url, sub_url):
														
 
															+    """
														
 
															+    @summary: 得到完整的ur
														
 
															+    ---------
														
 
															+    @param root_url: 根url （网页的url）
														
 
															+    @param sub_url:  子url （带有相对路径的 可以拼接成完整的）
														
 
															+    ---------
														
 
															+    @result: 返回完整的url
														
 
															+    """
														
 
															+
														
 
															+    return urljoin(root_url, sub_url)
														
 
															+
														
 
															+
														
 
															+def joint_url(url, params):
														
 
															+    # param_str = "?"
														
 
															+    # for key, value in params.items():
														
 
															+    #     value = isinstance(value, str) and value or str(value)
														
 
															+    #     param_str += key + "=" + value + "&"
														
 
															+    #
														
 
															+    # return url + param_str[:-1]
														
 
															+
														
 
															+    if not params:
														
 
															+        return url
														
 
															+
														
 
															+    params = urlencode(params)
														
 
															+    separator = "?" if "?" not in url else "&"
														
 
															+    return url + separator + params
														
 
															+
														
 
															+
														
 
															+def canonicalize_url(url):
														
 
															+    """
														
 
															+    url 归一化 会参数排序 及去掉锚点
														
 
															+    """
														
 
															+    return _canonicalize_url(url)
														
 
															+
														
 
															+
														
 
															+def get_url_md5(url):
														
 
															+    url = canonicalize_url(url)
														
 
															+    url = re.sub("^http://", "https://", url)
														
 
															+    return get_md5(url)
														
 
															+
														
 
															+
														
 
															+def fit_url(urls, identis):
														
 
															+    identis = isinstance(identis, str) and [identis] or identis
														
 
															+    fit_urls = []
														
 
															+    for link in urls:
														
 
															+        for identi in identis:
														
 
															+            if identi in link:
														
 
															+                fit_urls.append(link)
														
 
															+    return list(set(fit_urls))
														
 
															+
														
 
															+
														
 
															+def get_param(url, key):
														
 
															+    params = url.split("?")[-1].split("&")
														
 
															+    for param in params:
														
 
															+        key_value = param.split("=", 1)
														
 
															+        if key == key_value[0]:
														
 
															+            return key_value[1]
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															+def urlencode(params):
														
 
															+    """
														
 
															+    字典类型的参数转为字符串
														
 
															+    @param params:
														
 
															+    {
														
 
															+        'a': 1,
														
 
															+        'b': 2
														
 
															+    }
														
 
															+    @return: a=1&b=2
														
 
															+    """
														
 
															+    return urllib.parse.urlencode(params)
														
 
															+
														
 
															+
														
 
															+def urldecode(url):
														
 
															+    """
														
 
															+    将字符串类型的参数转为json
														
 
															+    @param url: xxx?a=1&b=2
														
 
															+    @return:
														
 
															+    {
														
 
															+        'a': 1,
														
 
															+        'b': 2
														
 
															+    }
														
 
															+    """
														
 
															+    params_json = {}
														
 
															+    params = url.split("?")[-1].split("&")
														
 
															+    for param in params:
														
 
															+        key, value = param.split("=")
														
 
															+        params_json[key] = unquote_url(value)
														
 
															+
														
 
															+    return params_json
														
 
															+
														
 
															+
														
 
															+def unquote_url(url, encoding="utf-8"):
														
 
															+    """
														
 
															+    @summary: 将url解码
														
 
															+    ---------
														
 
															+    @param url:
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    return urllib.parse.unquote(url, encoding=encoding)
														
 
															+
														
 
															+
														
 
															+def quote_url(url, encoding="utf-8"):
														
 
															+    """
														
 
															+    @summary: 将url编码 编码意思http://www.w3school.com.cn/tags/html_ref_urlencode.html
														
 
															+    ---------
														
 
															+    @param url:
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    return urllib.parse.quote(url, safe="%;/?:@&=+$,", encoding=encoding)
														
 
															+
														
 
															+
														
 
															+def quote_chinese_word(text, encoding="utf-8"):
														
 
															+    def quote_chinese_word_func(text):
														
 
															+        chinese_word = text.group(0)
														
 
															+        return urllib.parse.quote(chinese_word, encoding=encoding)
														
 
															+
														
 
															+    return re.sub("([\u4e00-\u9fa5]+)", quote_chinese_word_func, text, flags=re.S)
														
 
															+
														
 
															+
														
 
															+def unescape(str):
														
 
															+    """
														
 
															+    反转译
														
 
															+    """
														
 
															+    return html.unescape(str)
														
 
															+
														
 
															+
														
 
															+def excape(str):
														
 
															+    """
														
 
															+    转译
														
 
															+    """
														
 
															+    return html.escape(str)
														
 
															+
														
 
															+
														
 
															+_regexs = {}
														
 
															+
														
 
															+
														
 
															+# @log_function_time
														
 
															+def get_info(html, regexs, allow_repeat=True, fetch_one=False, split=None):
														
 
															+    regexs = isinstance(regexs, str) and [regexs] or regexs
														
 
															+
														
 
															+    infos = []
														
 
															+    for regex in regexs:
														
 
															+        if regex == "":
														
 
															+            continue
														
 
															+
														
 
															+        if regex not in _regexs.keys():
														
 
															+            _regexs[regex] = re.compile(regex, re.S)
														
 
															+
														
 
															+        if fetch_one:
														
 
															+            infos = _regexs[regex].search(html)
														
 
															+            if infos:
														
 
															+                infos = infos.groups()
														
 
															+            else:
														
 
															+                continue
														
 
															+        else:
														
 
															+            infos = _regexs[regex].findall(str(html))
														
 
															+
														
 
															+        if len(infos) > 0:
														
 
															+            # print(regex)
														
 
															+            break
														
 
															+
														
 
															+    if fetch_one:
														
 
															+        infos = infos if infos else ("",)
														
 
															+        return infos if len(infos) > 1 else infos[0]
														
 
															+    else:
														
 
															+        infos = allow_repeat and infos or sorted(set(infos), key=infos.index)
														
 
															+        infos = split.join(infos) if split else infos
														
 
															+        return infos
														
 
															+
														
 
															+
														
 
															+def table_json(table, save_one_blank=True):
														
 
															+    """
														
 
															+    将表格转为json 适应于 key：value 在一行类的表格
														
 
															+    @param table: 使用selector封装后的具有xpath的selector
														
 
															+    @param save_one_blank: 保留一个空白符
														
 
															+    @return:
														
 
															+    """
														
 
															+    data = {}
														
 
															+
														
 
															+    trs = table.xpath(".//tr")
														
 
															+    for tr in trs:
														
 
															+        tds = tr.xpath("./td|./th")
														
 
															+
														
 
															+        for i in range(0, len(tds), 2):
														
 
															+            if i + 1 > len(tds) - 1:
														
 
															+                break
														
 
															+
														
 
															+            key = tds[i].xpath("string(.)").extract_first(default="").strip()
														
 
															+            value = tds[i + 1].xpath("string(.)").extract_first(default="").strip()
														
 
															+            value = replace_str(value, "[\f\n\r\t\v]", "")
														
 
															+            value = replace_str(value, " +", " " if save_one_blank else "")
														
 
															+
														
 
															+            if key:
														
 
															+                data[key] = value
														
 
															+
														
 
															+    return data
														
 
															+
														
 
															+
														
 
															+def get_table_row_data(table):
														
 
															+    """
														
 
															+    获取表格里每一行数据
														
 
															+    @param table: 使用selector封装后的具有xpath的selector
														
 
															+    @return: [[],[]..]
														
 
															+    """
														
 
															+
														
 
															+    datas = []
														
 
															+    rows = table.xpath(".//tr")
														
 
															+    for row in rows:
														
 
															+        cols = row.xpath("./td|./th")
														
 
															+        row_datas = []
														
 
															+        for col in cols:
														
 
															+            data = col.xpath("string(.)").extract_first(default="").strip()
														
 
															+            row_datas.append(data)
														
 
															+        datas.append(row_datas)
														
 
															+
														
 
															+    return datas
														
 
															+
														
 
															+
														
 
															+def rows2json(rows, keys=None):
														
 
															+    """
														
 
															+    将行数据转为json
														
 
															+    @param rows: 每一行的数据
														
 
															+    @param keys: json的key，空时将rows的第一行作为key
														
 
															+    @return:
														
 
															+    """
														
 
															+    data_start_pos = 0 if keys else 1
														
 
															+    datas = []
														
 
															+    keys = keys or rows[0]
														
 
															+    for values in rows[data_start_pos:]:
														
 
															+        datas.append(dict(zip(keys, values)))
														
 
															+
														
 
															+    return datas
														
 
															+
														
 
															+
														
 
															+def get_form_data(form):
														
 
															+    """
														
 
															+    提取form中提交的数据
														
 
															+    :param form: 使用selector封装后的具有xpath的selector
														
 
															+    :return:
														
 
															+    """
														
 
															+    data = {}
														
 
															+    inputs = form.xpath(".//input")
														
 
															+    for input in inputs:
														
 
															+        name = input.xpath("./@name").extract_first()
														
 
															+        value = input.xpath("./@value").extract_first()
														
 
															+        if name:
														
 
															+            data[name] = value
														
 
															+
														
 
															+    return data
														
 
															+
														
 
															+
														
 
															+def get_domain(url):
														
 
															+    return urllib.parse.urlparse(url).netloc
														
 
															+
														
 
															+
														
 
															+def get_index_url(url):
														
 
															+    return "/".join(url.split("/")[:3])
														
 
															+
														
 
															+
														
 
															+def get_ip(domain):
														
 
															+    ip = socket.getaddrinfo(domain, "http")[0][4][0]
														
 
															+    return ip
														
 
															+
														
 
															+
														
 
															+def get_localhost_ip():
														
 
															+    """
														
 
															+    利用 UDP 协议来实现的，生成一个UDP包，把自己的 IP 放如到 UDP 协议头中，然后从UDP包中获取本机的IP。
														
 
															+    这个方法并不会真实的向外部发包，所以用抓包工具是看不到的
														
 
															+    :return:
														
 
															+    """
														
 
															+    s = None
														
 
															+    try:
														
 
															+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
														
 
															+        s.connect(("8.8.8.8", 80))
														
 
															+        ip = s.getsockname()[0]
														
 
															+    finally:
														
 
															+        if s:
														
 
															+            s.close()
														
 
															+
														
 
															+    return ip
														
 
															+
														
 
															+
														
 
															+def ip_to_num(ip):
														
 
															+    import struct
														
 
															+
														
 
															+    ip_num = socket.ntohl(struct.unpack("I", socket.inet_aton(str(ip)))[0])
														
 
															+    return ip_num
														
 
															+
														
 
															+
														
 
															+def is_valid_proxy(proxy, check_url=None):
														
 
															+    """
														
 
															+    检验代理是否有效
														
 
															+    @param proxy: xxx.xxx.xxx:xxx
														
 
															+    @param check_url: 利用目标网站检查，目标网站url。默认为None， 使用代理服务器的socket检查, 但不能排除Connection closed by foreign host
														
 
															+    @return: True / False
														
 
															+    """
														
 
															+    is_valid = False
														
 
															+
														
 
															+    if check_url:
														
 
															+        proxies = {"http": f"http://{proxy}", "https": f"https://{proxy}"}
														
 
															+        headers = {
														
 
															+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
														
 
															+        }
														
 
															+        response = None
														
 
															+        try:
														
 
															+            response = requests.get(
														
 
															+                check_url, headers=headers, proxies=proxies, stream=True, timeout=20
														
 
															+            )
														
 
															+            is_valid = True
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            log.error("check proxy failed: {} {}".format(e, proxy))
														
 
															+
														
 
															+        finally:
														
 
															+            if response:
														
 
															+                response.close()
														
 
															+
														
 
															+    else:
														
 
															+        ip, port = proxy.split(":")
														
 
															+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sk:
														
 
															+            sk.settimeout(7)
														
 
															+            try:
														
 
															+                sk.connect((ip, int(port)))  # 检查代理服务器是否开着
														
 
															+                is_valid = True
														
 
															+
														
 
															+            except Exception as e:
														
 
															+                log.error("check proxy failed: {} {}:{}".format(e, ip, port))
														
 
															+
														
 
															+    return is_valid
														
 
															+
														
 
															+
														
 
															+def is_valid_url(url):
														
 
															+    """
														
 
															+    验证url是否合法
														
 
															+    :param url:
														
 
															+    :return:
														
 
															+    """
														
 
															+    if re.match(r"(^https?:/{2}\w.+$)|(ftp://)", url):
														
 
															+        return True
														
 
															+    else:
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+def get_text(soup, *args):
														
 
															+    try:
														
 
															+        return soup.get_text()
														
 
															+    except Exception as e:
														
 
															+        log.error(e)
														
 
															+        return ""
														
 
															+
														
 
															+
														
 
															+def del_html_tag(content, except_line_break=False, save_img=False, white_replaced=""):
														
 
															+    """
														
 
															+    删除html标签
														
 
															+    @param content: html内容
														
 
															+    @param except_line_break: 保留p标签
														
 
															+    @param save_img: 保留图片
														
 
															+    @param white_replaced: 空白符替换
														
 
															+    @return:
														
 
															+    """
														
 
															+    content = replace_str(content, "(?i)<script(.|\n)*?</script>")  # (?)忽略大小写
														
 
															+    content = replace_str(content, "(?i)<style(.|\n)*?</style>")
														
 
															+    content = replace_str(content, "<!--(.|\n)*?-->")
														
 
															+    content = replace_str(
														
 
															+        content, "(?!&[a-z]+=)&[a-z]+;?"
														
 
															+    )  # 干掉&nbsp等无用的字符 但&xxx= 这种表示参数的除外
														
 
															+    if except_line_break:
														
 
															+        content = content.replace("</p>", "/p")
														
 
															+        content = replace_str(content, "<[^p].*?>")
														
 
															+        content = content.replace("/p", "</p>")
														
 
															+        content = replace_str(content, "[ \f\r\t\v]")
														
 
															+
														
 
															+    elif save_img:
														
 
															+        content = replace_str(content, "(?!<img.+?>)<.+?>")  # 替换掉除图片外的其他标签
														
 
															+        content = replace_str(content, "(?! +)\s+", "\n")  # 保留空格
														
 
															+        content = content.strip()
														
 
															+
														
 
															+    else:
														
 
															+        content = replace_str(content, "<(.|\n)*?>")
														
 
															+        content = replace_str(content, "\s", white_replaced)
														
 
															+        content = content.strip()
														
 
															+
														
 
															+    return content
														
 
															+
														
 
															+
														
 
															+def del_html_js_css(content):
														
 
															+    content = replace_str(content, "(?i)<script(.|\n)*?</script>")  # (?)忽略大小写
														
 
															+    content = replace_str(content, "(?i)<style(.|\n)*?</style>")
														
 
															+    content = replace_str(content, "<!--(.|\n)*?-->")
														
 
															+
														
 
															+    return content
														
 
															+
														
 
															+
														
 
															+def is_have_chinese(content):
														
 
															+    regex = "[\u4e00-\u9fa5]+"
														
 
															+    chinese_word = get_info(content, regex)
														
 
															+    return chinese_word and True or False
														
 
															+
														
 
															+
														
 
															+def is_have_english(content):
														
 
															+    regex = "[a-zA-Z]+"
														
 
															+    english_words = get_info(content, regex)
														
 
															+    return english_words and True or False
														
 
															+
														
 
															+
														
 
															+def get_chinese_word(content):
														
 
															+    regex = "[\u4e00-\u9fa5]+"
														
 
															+    chinese_word = get_info(content, regex)
														
 
															+    return chinese_word
														
 
															+
														
 
															+
														
 
															+def get_english_words(content):
														
 
															+    regex = "[a-zA-Z]+"
														
 
															+    english_words = get_info(content, regex)
														
 
															+    return english_words or ""
														
 
															+
														
 
															+
														
 
															+##################################################
														
 
															+def get_json(json_str):
														
 
															+    """
														
 
															+    @summary: 取json对象
														
 
															+    ---------
														
 
															+    @param json_str: json格式的字符串
														
 
															+    ---------
														
 
															+    @result: 返回json对象
														
 
															+    """
														
 
															+
														
 
															+    try:
														
 
															+        return json.loads(json_str) if json_str else {}
														
 
															+    except Exception as e1:
														
 
															+        try:
														
 
															+            json_str = json_str.strip()
														
 
															+            json_str = json_str.replace("'", '"')
														
 
															+            keys = get_info(json_str, "(\w+):")
														
 
															+            for key in keys:
														
 
															+                json_str = json_str.replace(key, '"%s"' % key)
														
 
															+
														
 
															+            return json.loads(json_str) if json_str else {}
														
 
															+
														
 
															+        except Exception as e2:
														
 
															+            pass
														
 
															+
														
 
															+        return {}
														
 
															+
														
 
															+
														
 
															+def jsonp2json(jsonp):
														
 
															+    """
														
 
															+    将jsonp转为json
														
 
															+    @param jsonp: jQuery172013600082560040794_1553230569815({})
														
 
															+    @return:
														
 
															+    """
														
 
															+    try:
														
 
															+        return json.loads(re.match(".*?({.*}).*", jsonp, re.S).group(1))
														
 
															+    except:
														
 
															+        raise ValueError("Invalid Input")
														
 
															+
														
 
															+
														
 
															+def dumps_json(data, indent=4, sort_keys=False):
														
 
															+    """
														
 
															+    @summary: 格式化json 用于打印
														
 
															+    ---------
														
 
															+    @param data: json格式的字符串或json对象
														
 
															+    ---------
														
 
															+    @result: 格式化后的字符串
														
 
															+    """
														
 
															+    try:
														
 
															+        if isinstance(data, str):
														
 
															+            data = get_json(data)
														
 
															+
														
 
															+        data = json.dumps(
														
 
															+            data,
														
 
															+            ensure_ascii=False,
														
 
															+            indent=indent,
														
 
															+            skipkeys=True,
														
 
															+            sort_keys=sort_keys,
														
 
															+            default=str,
														
 
															+        )
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        data = pformat(data)
														
 
															+
														
 
															+    return data
														
 
															+
														
 
															+
														
 
															+def get_json_value(json_object, key):
														
 
															+    """
														
 
															+    @summary:
														
 
															+    ---------
														
 
															+    @param json_object: json对象或json格式的字符串
														
 
															+    @param key: 建值 如果在多个层级目录下 可写 key1.key2  如{'key1':{'key2':3}}
														
 
															+    ---------
														
 
															+    @result: 返回对应的值，如果没有，返回''
														
 
															+    """
														
 
															+    current_key = ""
														
 
															+    value = ""
														
 
															+    try:
														
 
															+        json_object = (
														
 
															+            isinstance(json_object, str) and get_json(json_object) or json_object
														
 
															+        )
														
 
															+
														
 
															+        current_key = key.split(".")[0]
														
 
															+        value = json_object[current_key]
														
 
															+
														
 
															+        key = key[key.find(".") + 1 :]
														
 
															+    except Exception as e:
														
 
															+        return value
														
 
															+
														
 
															+    if key == current_key:
														
 
															+        return value
														
 
															+    else:
														
 
															+        return get_json_value(value, key)
														
 
															+
														
 
															+
														
 
															+def get_all_keys(datas, depth=None, current_depth=0):
														
 
															+    """
														
 
															+    @summary: 获取json李所有的key
														
 
															+    ---------
														
 
															+    @param datas: dict / list
														
 
															+    @param depth: 字典key的层级 默认不限制层级 层级从1开始
														
 
															+    @param current_depth: 字典key的当前层级 不用传参
														
 
															+    ---------
														
 
															+    @result: 返回json所有的key
														
 
															+    """
														
 
															+
														
 
															+    keys = []
														
 
															+    if depth and current_depth >= depth:
														
 
															+        return keys
														
 
															+
														
 
															+    if isinstance(datas, list):
														
 
															+        for data in datas:
														
 
															+            keys.extend(get_all_keys(data, depth, current_depth=current_depth + 1))
														
 
															+    elif isinstance(datas, dict):
														
 
															+        for key, value in datas.items():
														
 
															+            keys.append(key)
														
 
															+            if isinstance(value, dict):
														
 
															+                keys.extend(get_all_keys(value, depth, current_depth=current_depth + 1))
														
 
															+
														
 
															+    return keys
														
 
															+
														
 
															+
														
 
															+def to_chinese(unicode_str):
														
 
															+    format_str = json.loads('{"chinese":"%s"}' % unicode_str)
														
 
															+    return format_str["chinese"]
														
 
															+
														
 
															+
														
 
															+##################################################
														
 
															+def replace_str(source_str, regex, replace_str=""):
														
 
															+    """
														
 
															+    @summary: 替换字符串
														
 
															+    ---------
														
 
															+    @param source_str: 原字符串
														
 
															+    @param regex: 正则
														
 
															+    @param replace_str: 用什么来替换 默认为''
														
 
															+    ---------
														
 
															+    @result: 返回替换后的字符串
														
 
															+    """
														
 
															+    str_info = re.compile(regex)
														
 
															+    return str_info.sub(replace_str, source_str)
														
 
															+
														
 
															+
														
 
															+def del_redundant_blank_character(text):
														
 
															+    """
														
 
															+    删除冗余的空白符， 只保留一个
														
 
															+    :param text:
														
 
															+    :return:
														
 
															+    """
														
 
															+    return re.sub("\s+", " ", text)
														
 
															+
														
 
															+
														
 
															+##################################################
														
 
															+def get_conf_value(config_file, section, key):
														
 
															+    cp = configparser.ConfigParser(allow_no_value=True)
														
 
															+    with codecs.open(config_file, "r", encoding="utf-8") as f:
														
 
															+        cp.read_file(f)
														
 
															+    return cp.get(section, key)
														
 
															+
														
 
															+
														
 
															+def mkdir(path):
														
 
															+    try:
														
 
															+        if not os.path.exists(path):
														
 
															+            os.makedirs(path)
														
 
															+    except OSError as exc:  # Python >2.5
														
 
															+        pass
														
 
															+
														
 
															+
														
 
															+def write_file(filename, content, mode="w", encoding="utf-8"):
														
 
															+    """
														
 
															+    @summary: 写文件
														
 
															+    ---------
														
 
															+    @param filename: 文件名（有路径）
														
 
															+    @param content: 内容
														
 
															+    @param mode: 模式 w/w+ (覆盖/追加)
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    directory = os.path.dirname(filename)
														
 
															+    mkdir(directory)
														
 
															+    with open(filename, mode, encoding=encoding) as file:
														
 
															+        file.writelines(content)
														
 
															+
														
 
															+
														
 
															+def read_file(filename, readlines=False, encoding="utf-8"):
														
 
															+    """
														
 
															+    @summary: 读文件
														
 
															+    ---------
														
 
															+    @param filename: 文件名（有路径）
														
 
															+    @param readlines: 按行读取 （默认False）
														
 
															+    ---------
														
 
															+    @result: 按行读取返回List，否则返回字符串
														
 
															+    """
														
 
															+
														
 
															+    content = None
														
 
															+    try:
														
 
															+        with open(filename, "r", encoding=encoding) as file:
														
 
															+            content = file.readlines() if readlines else file.read()
														
 
															+    except Exception as e:
														
 
															+        log.error(e)
														
 
															+
														
 
															+    return content
														
 
															+
														
 
															+
														
 
															+def get_oss_file_list(oss_handler, prefix, date_range_min, date_range_max=None):
														
 
															+    """
														
 
															+    获取文件列表
														
 
															+    @param prefix: 路径前缀 如 data/car_service_line/yiche/yiche_serial_zongshu_info
														
 
															+    @param date_range_min: 时间范围 最小值 日期分隔符为/ 如 2019/03/01 或 2019/03/01/00/00/00
														
 
															+    @param date_range_max: 时间范围 最大值 日期分隔符为/ 如 2019/03/01 或 2019/03/01/00/00/00
														
 
															+    @return: 每个文件路径 如 html/e_commerce_service_line/alibaba/alibaba_shop_info/2019/03/22/15/53/15/8ca8b9e4-4c77-11e9-9dee-acde48001122.json.snappy
														
 
															+    """
														
 
															+
														
 
															+    # 计算时间范围
														
 
															+    date_range_max = date_range_max or date_range_min
														
 
															+    date_format = "/".join(
														
 
															+        ["%Y", "%m", "%d", "%H", "%M", "%S"][: date_range_min.count("/") + 1]
														
 
															+    )
														
 
															+    time_interval = [
														
 
															+        {"days": 365},
														
 
															+        {"days": 31},
														
 
															+        {"days": 1},
														
 
															+        {"hours": 1},
														
 
															+        {"minutes": 1},
														
 
															+        {"seconds": 1},
														
 
															+    ][date_range_min.count("/")]
														
 
															+    date_range = get_between_date(
														
 
															+        date_range_min, date_range_max, date_format=date_format, **time_interval
														
 
															+    )
														
 
															+
														
 
															+    for date in date_range:
														
 
															+        file_folder_path = os.path.join(prefix, date)
														
 
															+        objs = oss_handler.list(prefix=file_folder_path)
														
 
															+        for obj in objs:
														
 
															+            filename = obj.key
														
 
															+            yield filename
														
 
															+
														
 
															+
														
 
															+def is_html(url):
														
 
															+    if not url:
														
 
															+        return False
														
 
															+
														
 
															+    try:
														
 
															+        content_type = request.urlopen(url).info().get("Content-Type", "")
														
 
															+
														
 
															+        if "text/html" in content_type:
														
 
															+            return True
														
 
															+        else:
														
 
															+            return False
														
 
															+    except Exception as e:
														
 
															+        log.error(e)
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+def is_exist(file_path):
														
 
															+    """
														
 
															+    @summary: 文件是否存在
														
 
															+    ---------
														
 
															+    @param file_path:
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    return os.path.exists(file_path)
														
 
															+
														
 
															+
														
 
															+def download_file(url, file_path, *, call_func=None, proxies=None, data=None):
														
 
															+    """
														
 
															+    下载文件，会自动创建文件存储目录
														
 
															+    Args:
														
 
															+        url: 地址
														
 
															+        file_path: 文件存储地址
														
 
															+        call_func: 下载成功的回调
														
 
															+        proxies: 代理
														
 
															+        data: 请求体
														
 
															+
														
 
															+    Returns:
														
 
															+
														
 
															+    """
														
 
															+    directory = os.path.dirname(file_path)
														
 
															+    mkdir(directory)
														
 
															+
														
 
															+    # 进度条
														
 
															+    def progress_callfunc(blocknum, blocksize, totalsize):
														
 
															+        """回调函数
														
 
															+        @blocknum : 已经下载的数据块
														
 
															+        @blocksize : 数据块的大小
														
 
															+        @totalsize: 远程文件的大小
														
 
															+        """
														
 
															+        percent = 100.0 * blocknum * blocksize / totalsize
														
 
															+        if percent > 100:
														
 
															+            percent = 100
														
 
															+        # print ('进度条 %.2f%%' % percent, end = '\r')
														
 
															+        sys.stdout.write("进度条 %.2f%%" % percent + "\r")
														
 
															+        sys.stdout.flush()
														
 
															+
														
 
															+    if url:
														
 
															+        try:
														
 
															+            if proxies:
														
 
															+                # create the object, assign it to a variable
														
 
															+                proxy = request.ProxyHandler(proxies)
														
 
															+                # construct a new opener using your proxy settings
														
 
															+                opener = request.build_opener(proxy)
														
 
															+                # install the openen on the module-level
														
 
															+                request.install_opener(opener)
														
 
															+
														
 
															+            request.urlretrieve(url, file_path, progress_callfunc, data)
														
 
															+
														
 
															+            if callable(call_func):
														
 
															+                call_func()
														
 
															+            return 1
														
 
															+        except Exception as e:
														
 
															+            log.error(e)
														
 
															+            return 0
														
 
															+    else:
														
 
															+        return 0
														
 
															+
														
 
															+
														
 
															+def get_file_list(path, ignore=[]):
														
 
															+    templist = path.split("*")
														
 
															+    path = templist[0]
														
 
															+    file_type = templist[1] if len(templist) >= 2 else ""
														
 
															+
														
 
															+    # 递归遍历文件
														
 
															+    def get_file_list_(path, file_type, ignore, all_file=[]):
														
 
															+        file_list = os.listdir(path)
														
 
															+
														
 
															+        for file_name in file_list:
														
 
															+            if file_name in ignore:
														
 
															+                continue
														
 
															+
														
 
															+            file_path = os.path.join(path, file_name)
														
 
															+            if os.path.isdir(file_path):
														
 
															+                get_file_list_(file_path, file_type, ignore, all_file)
														
 
															+            else:
														
 
															+                if not file_type or file_name.endswith(file_type):
														
 
															+                    all_file.append(file_path)
														
 
															+
														
 
															+        return all_file
														
 
															+
														
 
															+    return get_file_list_(path, file_type, ignore) if os.path.isdir(path) else [path]
														
 
															+
														
 
															+
														
 
															+def rename_file(old_name, new_name):
														
 
															+    os.rename(old_name, new_name)
														
 
															+
														
 
															+
														
 
															+def del_file(path, ignore=()):
														
 
															+    files = get_file_list(path, ignore)
														
 
															+    for file in files:
														
 
															+        try:
														
 
															+            os.remove(file)
														
 
															+        except Exception as e:
														
 
															+            log.error(
														
 
															+                """
														
 
															+                删除出错: %s
														
 
															+                Exception : %s
														
 
															+                """
														
 
															+                % (file, str(e))
														
 
															+            )
														
 
															+        finally:
														
 
															+            pass
														
 
															+
														
 
															+
														
 
															+def get_file_type(file_name):
														
 
															+    """
														
 
															+    @summary: 取文件后缀名
														
 
															+    ---------
														
 
															+    @param file_name:
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+    try:
														
 
															+        return os.path.splitext(file_name)[1]
														
 
															+    except Exception as e:
														
 
															+        log.exception(e)
														
 
															+
														
 
															+
														
 
															+def get_file_path(file_path):
														
 
															+    """
														
 
															+    @summary: 取文件路径
														
 
															+    ---------
														
 
															+    @param file_path: /root/a.py
														
 
															+    ---------
														
 
															+    @result: /root
														
 
															+    """
														
 
															+    try:
														
 
															+        return os.path.split(file_path)[0]
														
 
															+    except Exception as e:
														
 
															+        log.exception(e)
														
 
															+
														
 
															+
														
 
															+#############################################
														
 
															+
														
 
															+
														
 
															+def exec_js(js_code):
														
 
															+    """
														
 
															+    @summary: 执行js代码
														
 
															+    ---------
														
 
															+    @param js_code: js代码
														
 
															+    ---------
														
 
															+    @result: 返回执行结果
														
 
															+    """
														
 
															+
														
 
															+    return execjs.eval(js_code)
														
 
															+
														
 
															+
														
 
															+def compile_js(js_func):
														
 
															+    """
														
 
															+    @summary: 编译js函数
														
 
															+    ---------
														
 
															+    @param js_func:js函数
														
 
															+    ---------
														
 
															+    @result: 返回函数对象 调用 fun('js_funName', param1,param2)
														
 
															+    """
														
 
															+
														
 
															+    ctx = execjs.compile(js_func)
														
 
															+    return ctx.call
														
 
															+
														
 
															+
														
 
															+###############################################
														
 
															+
														
 
															+#############################################
														
 
															+
														
 
															+
														
 
															+def date_to_timestamp(date, time_format="%Y-%m-%d %H:%M:%S"):
														
 
															+    """
														
 
															+    @summary:
														
 
															+    ---------
														
 
															+    @param date:将"2011-09-28 10:00:00"时间格式转化为时间戳
														
 
															+    @param format:时间格式
														
 
															+    ---------
														
 
															+    @result: 返回时间戳
														
 
															+    """
														
 
															+
														
 
															+    timestamp = time.mktime(time.strptime(date, time_format))
														
 
															+    return int(timestamp)
														
 
															+
														
 
															+
														
 
															+def timestamp_to_date(timestamp, time_format="%Y-%m-%d %H:%M:%S"):
														
 
															+    """
														
 
															+    @summary:
														
 
															+    ---------
														
 
															+    @param timestamp: 将时间戳转化为日期
														
 
															+    @param format: 日期格式
														
 
															+    ---------
														
 
															+    @result: 返回日期
														
 
															+    """
														
 
															+    if timestamp is None:
														
 
															+        raise ValueError("timestamp is null")
														
 
															+
														
 
															+    date = time.localtime(timestamp)
														
 
															+    return time.strftime(time_format, date)
														
 
															+
														
 
															+
														
 
															+def get_current_timestamp():
														
 
															+    return int(time.time())
														
 
															+
														
 
															+
														
 
															+def get_current_date(date_format="%Y-%m-%d %H:%M:%S"):
														
 
															+    return datetime.datetime.now().strftime(date_format)
														
 
															+    # return time.strftime(date_format, time.localtime(time.time()))
														
 
															+
														
 
															+
														
 
															+def get_date_number(year=None, month=None, day=None):
														
 
															+    """
														
 
															+    @summary: 获取指定日期对应的日期数
														
 
															+    默认当前周
														
 
															+    ---------
														
 
															+    @param year: 2010
														
 
															+    @param month: 6
														
 
															+    @param day: 16
														
 
															+    ---------
														
 
															+    @result: (年号，第几周，第几天) 如 (2010, 24, 3)
														
 
															+    """
														
 
															+    if year and month and day:
														
 
															+        return datetime.date(year, month, day).isocalendar()
														
 
															+    elif not any([year, month, day]):
														
 
															+        return datetime.datetime.now().isocalendar()
														
 
															+    else:
														
 
															+        assert year, "year 不能为空"
														
 
															+        assert month, "month 不能为空"
														
 
															+        assert day, "day 不能为空"
														
 
															+
														
 
															+
														
 
															+def get_between_date(
														
 
															+    begin_date, end_date=None, date_format="%Y-%m-%d", **time_interval
														
 
															+):
														
 
															+    """
														
 
															+    @summary: 获取一段时间间隔内的日期，默认为每一天
														
 
															+    ---------
														
 
															+    @param begin_date: 开始日期 str 如 2018-10-01
														
 
															+    @param end_date: 默认为今日
														
 
															+    @param date_format: 日期格式，应与begin_date的日期格式相对应
														
 
															+    @param time_interval: 时间间隔 默认一天 支持 days、seconds、microseconds、milliseconds、minutes、hours、weeks
														
 
															+    ---------
														
 
															+    @result: list 值为字符串
														
 
															+    """
														
 
															+
														
 
															+    date_list = []
														
 
															+
														
 
															+    begin_date = datetime.datetime.strptime(begin_date, date_format)
														
 
															+    end_date = (
														
 
															+        datetime.datetime.strptime(end_date, date_format)
														
 
															+        if end_date
														
 
															+        else datetime.datetime.strptime(
														
 
															+            time.strftime(date_format, time.localtime(time.time())), date_format
														
 
															+        )
														
 
															+    )
														
 
															+    time_interval = time_interval or dict(days=1)
														
 
															+
														
 
															+    while begin_date <= end_date:
														
 
															+        date_str = begin_date.strftime(date_format)
														
 
															+        date_list.append(date_str)
														
 
															+
														
 
															+        begin_date += datetime.timedelta(**time_interval)
														
 
															+
														
 
															+    if end_date.strftime(date_format) not in date_list:
														
 
															+        date_list.append(end_date.strftime(date_format))
														
 
															+
														
 
															+    return date_list
														
 
															+
														
 
															+
														
 
															+def get_between_months(begin_date, end_date=None):
														
 
															+    """
														
 
															+    @summary: 获取一段时间间隔内的月份
														
 
															+    需要满一整月
														
 
															+    ---------
														
 
															+    @param begin_date: 开始时间 如 2018-01-01
														
 
															+    @param end_date: 默认当前时间
														
 
															+    ---------
														
 
															+    @result: 列表 如 ['2018-01', '2018-02']
														
 
															+    """
														
 
															+
														
 
															+    def add_months(dt, months):
														
 
															+        month = dt.month - 1 + months
														
 
															+        year = dt.year + month // 12
														
 
															+        month = month % 12 + 1
														
 
															+        day = min(dt.day, calendar.monthrange(year, month)[1])
														
 
															+        return dt.replace(year=year, month=month, day=day)
														
 
															+
														
 
															+    date_list = []
														
 
															+    begin_date = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
														
 
															+    end_date = (
														
 
															+        datetime.datetime.strptime(end_date, "%Y-%m-%d")
														
 
															+        if end_date
														
 
															+        else datetime.datetime.strptime(
														
 
															+            time.strftime("%Y-%m-%d", time.localtime(time.time())), "%Y-%m-%d"
														
 
															+        )
														
 
															+    )
														
 
															+    while begin_date <= end_date:
														
 
															+        date_str = begin_date.strftime("%Y-%m")
														
 
															+        date_list.append(date_str)
														
 
															+        begin_date = add_months(begin_date, 1)
														
 
															+    return date_list
														
 
															+
														
 
															+
														
 
															+def get_today_of_day(day_offset=0):
														
 
															+    return str(datetime.date.today() + datetime.timedelta(days=day_offset))
														
 
															+
														
 
															+
														
 
															+def get_days_of_month(year, month):
														
 
															+    """
														
 
															+    返回天数
														
 
															+    """
														
 
															+
														
 
															+    return calendar.monthrange(year, month)[1]
														
 
															+
														
 
															+
														
 
															+def get_firstday_of_month(date):
														
 
															+    """''
														
 
															+    date format = "YYYY-MM-DD"
														
 
															+    """
														
 
															+
														
 
															+    year, month, day = date.split("-")
														
 
															+    year, month, day = int(year), int(month), int(day)
														
 
															+
														
 
															+    days = "01"
														
 
															+    if int(month) < 10:
														
 
															+        month = "0" + str(int(month))
														
 
															+    arr = (year, month, days)
														
 
															+    return "-".join("%s" % i for i in arr)
														
 
															+
														
 
															+
														
 
															+def get_lastday_of_month(date):
														
 
															+    """''
														
 
															+    get the last day of month
														
 
															+    date format = "YYYY-MM-DD"
														
 
															+    """
														
 
															+    year, month, day = date.split("-")
														
 
															+    year, month, day = int(year), int(month), int(day)
														
 
															+
														
 
															+    days = calendar.monthrange(year, month)[1]
														
 
															+    month = add_zero(month)
														
 
															+    arr = (year, month, days)
														
 
															+    return "-".join("%s" % i for i in arr)
														
 
															+
														
 
															+
														
 
															+def get_firstday_month(month_offset=0):
														
 
															+    """''
														
 
															+    get the first day of month from today
														
 
															+    month_offset is how many months
														
 
															+    """
														
 
															+    (y, m, d) = get_year_month_and_days(month_offset)
														
 
															+    d = "01"
														
 
															+    arr = (y, m, d)
														
 
															+    return "-".join("%s" % i for i in arr)
														
 
															+
														
 
															+
														
 
															+def get_lastday_month(month_offset=0):
														
 
															+    """''
														
 
															+    get the last day of month from today
														
 
															+    month_offset is how many months
														
 
															+    """
														
 
															+    return "-".join("%s" % i for i in get_year_month_and_days(month_offset))
														
 
															+
														
 
															+
														
 
															+def get_last_month(month_offset=0):
														
 
															+    """''
														
 
															+    get the last day of month from today
														
 
															+    month_offset is how many months
														
 
															+    """
														
 
															+    return "-".join("%s" % i for i in get_year_month_and_days(month_offset)[:2])
														
 
															+
														
 
															+
														
 
															+def get_year_month_and_days(month_offset=0):
														
 
															+    """
														
 
															+    @summary:
														
 
															+    ---------
														
 
															+    @param month_offset: 月份偏移量
														
 
															+    ---------
														
 
															+    @result: ('2019', '04', '30')
														
 
															+    """
														
 
															+
														
 
															+    today = datetime.datetime.now()
														
 
															+    year, month = today.year, today.month
														
 
															+
														
 
															+    this_year = int(year)
														
 
															+    this_month = int(month)
														
 
															+    total_month = this_month + month_offset
														
 
															+    if month_offset >= 0:
														
 
															+        if total_month <= 12:
														
 
															+            days = str(get_days_of_month(this_year, total_month))
														
 
															+            total_month = add_zero(total_month)
														
 
															+            return (year, total_month, days)
														
 
															+        else:
														
 
															+            i = total_month // 12
														
 
															+            j = total_month % 12
														
 
															+            if j == 0:
														
 
															+                i -= 1
														
 
															+                j = 12
														
 
															+            this_year += i
														
 
															+            days = str(get_days_of_month(this_year, j))
														
 
															+            j = add_zero(j)
														
 
															+            return (str(this_year), str(j), days)
														
 
															+    else:
														
 
															+        if (total_month > 0) and (total_month < 12):
														
 
															+            days = str(get_days_of_month(this_year, total_month))
														
 
															+            total_month = add_zero(total_month)
														
 
															+            return (year, total_month, days)
														
 
															+        else:
														
 
															+            i = total_month // 12
														
 
															+            j = total_month % 12
														
 
															+            if j == 0:
														
 
															+                i -= 1
														
 
															+                j = 12
														
 
															+            this_year += i
														
 
															+            days = str(get_days_of_month(this_year, j))
														
 
															+            j = add_zero(j)
														
 
															+            return (str(this_year), str(j), days)
														
 
															+
														
 
															+
														
 
															+def add_zero(n):
														
 
															+    return "%02d" % n
														
 
															+
														
 
															+
														
 
															+def get_month(month_offset=0):
														
 
															+    """''
														
 
															+    获取当前日期前后N月的日期
														
 
															+    if month_offset>0, 获取当前日期前N月的日期
														
 
															+    if month_offset<0, 获取当前日期后N月的日期
														
 
															+    date format = "YYYY-MM-DD"
														
 
															+    """
														
 
															+    today = datetime.datetime.now()
														
 
															+    day = add_zero(today.day)
														
 
															+
														
 
															+    (y, m, d) = get_year_month_and_days(month_offset)
														
 
															+    arr = (y, m, d)
														
 
															+    if int(day) < int(d):
														
 
															+        arr = (y, m, day)
														
 
															+    return "-".join("%s" % i for i in arr)
														
 
															+
														
 
															+
														
 
															+@run_safe_model("format_date")
														
 
															+def format_date(date, old_format="", new_format="%Y-%m-%d %H:%M:%S"):
														
 
															+    """
														
 
															+    @summary: 格式化日期格式
														
 
															+    ---------
														
 
															+    @param date: 日期 eg：2017年4月17日 3时27分12秒
														
 
															+    @param old_format: 原来的日期格式 如 '%Y年%m月%d日 %H时%M分%S秒'
														
 
															+        %y 两位数的年份表示（00-99）
														
 
															+        %Y 四位数的年份表示（000-9999）
														
 
															+        %m 月份（01-12）
														
 
															+        %d 月内中的一天（0-31）
														
 
															+        %H 24小时制小时数（0-23）
														
 
															+        %I 12小时制小时数（01-12）
														
 
															+        %M 分钟数（00-59）
														
 
															+        %S 秒（00-59）
														
 
															+    @param new_format: 输出的日期格式
														
 
															+    ---------
														
 
															+    @result: 格式化后的日期，类型为字符串 如2017-4-17 03:27:12
														
 
															+    """
														
 
															+    if not date:
														
 
															+        return ""
														
 
															+
														
 
															+    if not old_format:
														
 
															+        regex = "(\d+)"
														
 
															+        numbers = get_info(date, regex, allow_repeat=True)
														
 
															+        formats = ["%Y", "%m", "%d", "%H", "%M", "%S"]
														
 
															+        old_format = date
														
 
															+        for i, number in enumerate(numbers[:6]):
														
 
															+            if i == 0 and len(number) == 2:  # 年份可能是两位 用小%y
														
 
															+                old_format = old_format.replace(
														
 
															+                    number, formats[i].lower(), 1
														
 
															+                )  # 替换一次 '2017年11月30日 11:49' 防止替换11月时，替换11小时
														
 
															+            else:
														
 
															+                old_format = old_format.replace(number, formats[i], 1)  # 替换一次
														
 
															+
														
 
															+    try:
														
 
															+        date_obj = datetime.datetime.strptime(date, old_format)
														
 
															+        if "T" in date and "Z" in date:
														
 
															+            date_obj += datetime.timedelta(hours=8)
														
 
															+            date_str = date_obj.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+        else:
														
 
															+            date_str = datetime.datetime.strftime(date_obj, new_format)
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        log.error("日期格式化出错，old_format = %s 不符合 %s 格式" % (old_format, date))
														
 
															+        date_str = date
														
 
															+
														
 
															+    return date_str
														
 
															+
														
 
															+
														
 
															+def transform_lower_num(data_str: str):
														
 
															+    num_map = {
														
 
															+        "一": "1",
														
 
															+        "二": "2",
														
 
															+        "三": "3",
														
 
															+        "四": "4",
														
 
															+        "五": "5",
														
 
															+        "六": "6",
														
 
															+        "七": "7",
														
 
															+        "八": "8",
														
 
															+        "九": "9",
														
 
															+        "十": "0",
														
 
															+    }
														
 
															+    pattern = f'[{"|".join(num_map.keys())}|零]'
														
 
															+    res = re.search(pattern, data_str)
														
 
															+    if not res:
														
 
															+        #  如果字符串中没有包含中文数字 不做处理 直接返回
														
 
															+        return data_str
														
 
															+
														
 
															+    data_str = data_str.replace("0", "零")
														
 
															+    for n in num_map:
														
 
															+        data_str = data_str.replace(n, num_map[n])
														
 
															+
														
 
															+    re_data_str = re.findall("\d+", data_str)
														
 
															+    for i in re_data_str:
														
 
															+        if len(i) == 3:
														
 
															+            new_i = i.replace("0", "")
														
 
															+            data_str = data_str.replace(i, new_i, 1)
														
 
															+        elif len(i) == 4:
														
 
															+            new_i = i.replace("10", "")
														
 
															+            data_str = data_str.replace(i, new_i, 1)
														
 
															+        elif len(i) == 2 and int(i) < 10:
														
 
															+            new_i = int(i) + 10
														
 
															+            data_str = data_str.replace(i, str(new_i), 1)
														
 
															+        elif len(i) == 1 and int(i) == 0:
														
 
															+            new_i = int(i) + 10
														
 
															+            data_str = data_str.replace(i, str(new_i), 1)
														
 
															+
														
 
															+    return data_str.replace("零", "0")
														
 
															+
														
 
															+
														
 
															+@run_safe_model("format_time")
														
 
															+def format_time(release_time, date_format="%Y-%m-%d %H:%M:%S"):
														
 
															+    """
														
 
															+    >>> format_time("2个月前")
														
 
															+    '2021-08-15 16:24:21'
														
 
															+    >>> format_time("2月前")
														
 
															+    '2021-08-15 16:24:36'
														
 
															+    """
														
 
															+    release_time = transform_lower_num(release_time)
														
 
															+    release_time = release_time.replace("日", "天").replace("/", "-")
														
 
															+
														
 
															+    if "年前" in release_time:
														
 
															+        years = re.compile("(\d+)\s*年前").findall(release_time)
														
 
															+        years_ago = datetime.datetime.now() - datetime.timedelta(
														
 
															+            days=int(years[0]) * 365
														
 
															+        )
														
 
															+        release_time = years_ago.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+
														
 
															+    elif "月前" in release_time:
														
 
															+        months = re.compile("(\d+)[\s个]*月前").findall(release_time)
														
 
															+        months_ago = datetime.datetime.now() - datetime.timedelta(
														
 
															+            days=int(months[0]) * 30
														
 
															+        )
														
 
															+        release_time = months_ago.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+
														
 
															+    elif "周前" in release_time:
														
 
															+        weeks = re.compile("(\d+)\s*周前").findall(release_time)
														
 
															+        weeks_ago = datetime.datetime.now() - datetime.timedelta(days=int(weeks[0]) * 7)
														
 
															+        release_time = weeks_ago.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+
														
 
															+    elif "天前" in release_time:
														
 
															+        ndays = re.compile("(\d+)\s*天前").findall(release_time)
														
 
															+        days_ago = datetime.datetime.now() - datetime.timedelta(days=int(ndays[0]))
														
 
															+        release_time = days_ago.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+
														
 
															+    elif "小时前" in release_time:
														
 
															+        nhours = re.compile("(\d+)\s*小时前").findall(release_time)
														
 
															+        hours_ago = datetime.datetime.now() - datetime.timedelta(hours=int(nhours[0]))
														
 
															+        release_time = hours_ago.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+
														
 
															+    elif "分钟前" in release_time:
														
 
															+        nminutes = re.compile("(\d+)\s*分钟前").findall(release_time)
														
 
															+        minutes_ago = datetime.datetime.now() - datetime.timedelta(
														
 
															+            minutes=int(nminutes[0])
														
 
															+        )
														
 
															+        release_time = minutes_ago.strftime("%Y-%m-%d %H:%M:%S")
														
 
															+
														
 
															+    elif "前天" in release_time:
														
 
															+        today = datetime.date.today()
														
 
															+        yesterday = today - datetime.timedelta(days=2)
														
 
															+        release_time = release_time.replace("前天", str(yesterday))
														
 
															+
														
 
															+    elif "昨天" in release_time:
														
 
															+        today = datetime.date.today()
														
 
															+        yesterday = today - datetime.timedelta(days=1)
														
 
															+        release_time = release_time.replace("昨天", str(yesterday))
														
 
															+
														
 
															+    elif "今天" in release_time:
														
 
															+        release_time = release_time.replace("今天", get_current_date("%Y-%m-%d"))
														
 
															+
														
 
															+    elif "刚刚" in release_time:
														
 
															+        release_time = get_current_date()
														
 
															+
														
 
															+    elif re.search("^\d\d:\d\d", release_time):
														
 
															+        release_time = get_current_date("%Y-%m-%d") + " " + release_time
														
 
															+
														
 
															+    elif not re.compile("\d{4}").findall(release_time):
														
 
															+        month = re.compile("\d{1,2}").findall(release_time)
														
 
															+        if month and int(month[0]) <= int(get_current_date("%m")):
														
 
															+            release_time = get_current_date("%Y") + "-" + release_time
														
 
															+        else:
														
 
															+            release_time = str(int(get_current_date("%Y")) - 1) + "-" + release_time
														
 
															+
														
 
															+    # 把日和小时粘在一起的拆开
														
 
															+    template = re.compile("(\d{4}-\d{1,2}-\d{2})(\d{1,2})")
														
 
															+    release_time = re.sub(template, r"\1 \2", release_time)
														
 
															+    release_time = format_date(release_time, new_format=date_format)
														
 
															+
														
 
															+    return release_time
														
 
															+
														
 
															+
														
 
															+def to_date(date_str, date_format="%Y-%m-%d %H:%M:%S"):
														
 
															+    return datetime.datetime.strptime(date_str, date_format)
														
 
															+
														
 
															+
														
 
															+def get_before_date(
														
 
															+    current_date,
														
 
															+    days,
														
 
															+    current_date_format="%Y-%m-%d %H:%M:%S",
														
 
															+    return_date_format="%Y-%m-%d %H:%M:%S",
														
 
															+):
														
 
															+    """
														
 
															+    @summary: 获取之前时间
														
 
															+    ---------
														
 
															+    @param current_date: 当前时间 str类型
														
 
															+    @param days: 时间间隔 -1 表示前一天 1 表示后一天
														
 
															+    @param days: 返回的时间格式
														
 
															+    ---------
														
 
															+    @result: 字符串
														
 
															+    """
														
 
															+
														
 
															+    current_date = to_date(current_date, current_date_format)
														
 
															+    date_obj = current_date + datetime.timedelta(days=days)
														
 
															+    return datetime.datetime.strftime(date_obj, return_date_format)
														
 
															+
														
 
															+
														
 
															+def get_utcnow():
														
 
															+    """utc时间"""
														
 
															+    return datetime.datetime.utcnow()
														
 
															+
														
 
															+
														
 
															+def delay_time(sleep_time=60):
														
 
															+    """
														
 
															+    @summary: 睡眠  默认1分钟
														
 
															+    ---------
														
 
															+    @param sleep_time: 以秒为单位
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    time.sleep(sleep_time)
														
 
															+
														
 
															+
														
 
															+def format_seconds(seconds):
														
 
															+    """
														
 
															+    @summary: 将秒转为时分秒
														
 
															+    ---------
														
 
															+    @param seconds:
														
 
															+    ---------
														
 
															+    @result: 2天3小时2分49秒
														
 
															+    """
														
 
															+
														
 
															+    seconds = int(seconds + 0.5)  # 向上取整
														
 
															+
														
 
															+    m, s = divmod(seconds, 60)
														
 
															+    h, m = divmod(m, 60)
														
 
															+    d, h = divmod(h, 24)
														
 
															+
														
 
															+    times = ""
														
 
															+    if d:
														
 
															+        times += "{}天".format(d)
														
 
															+    if h:
														
 
															+        times += "{}小时".format(h)
														
 
															+    if m:
														
 
															+        times += "{}分".format(m)
														
 
															+    if s:
														
 
															+        times += "{}秒".format(s)
														
 
															+
														
 
															+    return times
														
 
															+
														
 
															+
														
 
															+################################################
														
 
															+def get_md5(*args):
														
 
															+    """
														
 
															+    @summary: 获取唯一的32位md5
														
 
															+    ---------
														
 
															+    @param *args: 参与联合去重的值
														
 
															+    ---------
														
 
															+    @result: 7c8684bcbdfcea6697650aa53d7b1405
														
 
															+    """
														
 
															+
														
 
															+    m = hashlib.md5()
														
 
															+    for arg in args:
														
 
															+        m.update(str(arg).encode())
														
 
															+
														
 
															+    return m.hexdigest()
														
 
															+
														
 
															+
														
 
															+def get_sha1(*args):
														
 
															+    """
														
 
															+    @summary: 获取唯一的40位值， 用于获取唯一的id
														
 
															+    ---------
														
 
															+    @param *args: 参与联合去重的值
														
 
															+    ---------
														
 
															+    @result: ba4868b3f277c8e387b55d9e3d0be7c045cdd89e
														
 
															+    """
														
 
															+
														
 
															+    sha1 = hashlib.sha1()
														
 
															+    for arg in args:
														
 
															+        sha1.update(str(arg).encode())
														
 
															+    return sha1.hexdigest()  # 40位
														
 
															+
														
 
															+
														
 
															+def get_base64(secret, message):
														
 
															+    """
														
 
															+    @summary: 数字证书签名算法是："HMAC-SHA256"
														
 
															+              参考：https://www.jokecamp.com/blog/examples-of-creating-base64-hashes-using-hmac-sha256-in-different-languages/
														
 
															+    ---------
														
 
															+    @param secret: 秘钥
														
 
															+    @param message: 消息
														
 
															+    ---------
														
 
															+    @result: 签名输出类型是："base64"
														
 
															+    """
														
 
															+
														
 
															+    import hashlib
														
 
															+    import hmac
														
 
															+    import base64
														
 
															+
														
 
															+    message = bytes(message, "utf-8")
														
 
															+    secret = bytes(secret, "utf-8")
														
 
															+
														
 
															+    signature = base64.b64encode(
														
 
															+        hmac.new(secret, message, digestmod=hashlib.sha256).digest()
														
 
															+    ).decode("utf8")
														
 
															+    return signature
														
 
															+
														
 
															+
														
 
															+def get_uuid(key1="", key2=""):
														
 
															+    """
														
 
															+    @summary: 计算uuid值
														
 
															+    可用于将两个字符串组成唯一的值。如可将域名和新闻标题组成uuid，形成联合索引
														
 
															+    ---------
														
 
															+    @param key1:str
														
 
															+    @param key2:str
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    uuid_object = ""
														
 
															+
														
 
															+    if not key1 and not key2:
														
 
															+        uuid_object = uuid.uuid1()
														
 
															+    else:
														
 
															+        hash = md5(bytes(key1, "utf-8") + bytes(key2, "utf-8")).digest()
														
 
															+        uuid_object = uuid.UUID(bytes=hash[:16], version=3)
														
 
															+
														
 
															+    return str(uuid_object)
														
 
															+
														
 
															+
														
 
															+def get_hash(text):
														
 
															+    return hash(text)
														
 
															+
														
 
															+
														
 
															+##################################################
														
 
															+
														
 
															+
														
 
															+def cut_string(text, length):
														
 
															+    """
														
 
															+    @summary: 将文本按指定长度拆分
														
 
															+    ---------
														
 
															+    @param text: 文本
														
 
															+    @param length: 拆分长度
														
 
															+    ---------
														
 
															+    @result: 返回按指定长度拆分后形成的list
														
 
															+    """
														
 
															+
														
 
															+    text_list = re.findall(".{%d}" % length, text, re.S)
														
 
															+    leave_text = text[len(text_list) * length :]
														
 
															+    if leave_text:
														
 
															+        text_list.append(leave_text)
														
 
															+
														
 
															+    return text_list
														
 
															+
														
 
															+
														
 
															+def get_random_string(length=1):
														
 
															+    random_string = "".join(random.sample(string.ascii_letters + string.digits, length))
														
 
															+    return random_string
														
 
															+
														
 
															+
														
 
															+def get_random_password(length=8, special_characters=""):
														
 
															+    """
														
 
															+    @summary: 创建随机密码 默认长度为8，包含大写字母、小写字母、数字
														
 
															+    ---------
														
 
															+    @param length: 密码长度 默认8
														
 
															+    @param special_characters: 特殊字符
														
 
															+    ---------
														
 
															+    @result: 指定长度的密码
														
 
															+    """
														
 
															+
														
 
															+    while True:
														
 
															+        random_password = "".join(
														
 
															+            random.sample(
														
 
															+                string.ascii_letters + string.digits + special_characters, length
														
 
															+            )
														
 
															+        )
														
 
															+        if (
														
 
															+            re.search("[0-9]", random_password)
														
 
															+            and re.search("[A-Z]", random_password)
														
 
															+            and re.search("[a-z]", random_password)
														
 
															+        ):
														
 
															+            if not special_characters:
														
 
															+                break
														
 
															+            elif set(random_password).intersection(special_characters):
														
 
															+                break
														
 
															+
														
 
															+    return random_password
														
 
															+
														
 
															+
														
 
															+def get_random_email(length=None, email_types: list = None, special_characters=""):
														
 
															+    """
														
 
															+    随机生成邮箱
														
 
															+    :param length: 邮箱长度
														
 
															+    :param email_types: 邮箱类型
														
 
															+    :param special_characters: 特殊字符
														
 
															+    :return:
														
 
															+    """
														
 
															+    if not length:
														
 
															+        length = random.randint(4, 12)
														
 
															+    if not email_types:
														
 
															+        email_types = [
														
 
															+            "qq.com",
														
 
															+            "163.com",
														
 
															+            "gmail.com",
														
 
															+            "yahoo.com",
														
 
															+            "hotmail.com",
														
 
															+            "yeah.net",
														
 
															+            "126.com",
														
 
															+            "139.com",
														
 
															+            "sohu.com",
														
 
															+        ]
														
 
															+
														
 
															+    email_body = get_random_password(length, special_characters)
														
 
															+    email_type = random.choice(email_types)
														
 
															+
														
 
															+    email = email_body + "@" + email_type
														
 
															+    return email
														
 
															+
														
 
															+
														
 
															+#################################
														
 
															+
														
 
															+
														
 
															+def dumps_obj(obj):
														
 
															+    return pickle.dumps(obj)
														
 
															+
														
 
															+
														
 
															+def loads_obj(obj_str):
														
 
															+    return pickle.loads(obj_str)
														
 
															+
														
 
															+
														
 
															+def get_method(obj, name):
														
 
															+    name = str(name)
														
 
															+    try:
														
 
															+        return getattr(obj, name)
														
 
															+    except AttributeError:
														
 
															+        log.error("Method %r not found in: %s" % (name, obj))
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def witch_workspace(project_path):
														
 
															+    """
														
 
															+    @summary:
														
 
															+    ---------
														
 
															+    @param project_path:
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    os.chdir(project_path)  # 切换工作路经
														
 
															+
														
 
															+
														
 
															+############### 数据库相关 #######################
														
 
															+def format_sql_value(value):
														
 
															+    if isinstance(value, str):
														
 
															+        value = value.strip()
														
 
															+
														
 
															+    elif isinstance(value, (list, dict)):
														
 
															+        value = dumps_json(value, indent=None)
														
 
															+
														
 
															+    elif isinstance(value, (datetime.date, datetime.time)):
														
 
															+        value = str(value)
														
 
															+
														
 
															+    elif isinstance(value, bool):
														
 
															+        value = int(value)
														
 
															+
														
 
															+    return value
														
 
															+
														
 
															+
														
 
															+def list2str(datas):
														
 
															+    """
														
 
															+    列表转字符串
														
 
															+    :param datas: [1, 2]
														
 
															+    :return: (1, 2)
														
 
															+    """
														
 
															+    data_str = str(tuple(datas))
														
 
															+    data_str = re.sub(",\)$", ")", data_str)
														
 
															+    return data_str
														
 
															+
														
 
															+
														
 
															+def make_insert_sql(
														
 
															+    table, data, auto_update=False, update_columns=(), insert_ignore=False
														
 
															+):
														
 
															+    """
														
 
															+    @summary: 适用于mysql， oracle数据库时间需要to_date 处理（TODO）
														
 
															+    ---------
														
 
															+    @param table:
														
 
															+    @param data: 表数据 json格式
														
 
															+    @param auto_update: 使用的是replace into， 为完全覆盖已存在的数据
														
 
															+    @param update_columns: 需要更新的列 默认全部，当指定值时，auto_update设置无效，当duplicate key冲突时更新指定的列
														
 
															+    @param insert_ignore: 数据存在忽略
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    keys = ["`{}`".format(key) for key in data.keys()]
														
 
															+    keys = list2str(keys).replace("'", "")
														
 
															+
														
 
															+    values = [format_sql_value(value) for value in data.values()]
														
 
															+    values = list2str(values)
														
 
															+
														
 
															+    if update_columns:
														
 
															+        if not isinstance(update_columns, (tuple, list)):
														
 
															+            update_columns = [update_columns]
														
 
															+        update_columns_ = ", ".join(
														
 
															+            ["{key}=values({key})".format(key=key) for key in update_columns]
														
 
															+        )
														
 
															+        sql = (
														
 
															+            "insert%s into `{table}` {keys} values {values} on duplicate key update %s"
														
 
															+            % (" ignore" if insert_ignore else "", update_columns_)
														
 
															+        )
														
 
															+
														
 
															+    elif auto_update:
														
 
															+        sql = "replace into `{table}` {keys} values {values}"
														
 
															+    else:
														
 
															+        sql = "insert%s into `{table}` {keys} values {values}" % (
														
 
															+            " ignore" if insert_ignore else ""
														
 
															+        )
														
 
															+
														
 
															+    sql = sql.format(table=table, keys=keys, values=values).replace("None", "null")
														
 
															+    return sql
														
 
															+
														
 
															+
														
 
															+def make_update_sql(table, data, condition):
														
 
															+    """
														
 
															+    @summary: 适用于mysql， oracle数据库时间需要to_date 处理（TODO）
														
 
															+    ---------
														
 
															+    @param table:
														
 
															+    @param data: 表数据 json格式
														
 
															+    @param condition: where 条件
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+    key_values = []
														
 
															+
														
 
															+    for key, value in data.items():
														
 
															+        value = format_sql_value(value)
														
 
															+        if isinstance(value, str):
														
 
															+            key_values.append("`{}`={}".format(key, repr(value)))
														
 
															+        elif value is None:
														
 
															+            key_values.append("`{}`={}".format(key, "null"))
														
 
															+        else:
														
 
															+            key_values.append("`{}`={}".format(key, value))
														
 
															+
														
 
															+    key_values = ", ".join(key_values)
														
 
															+
														
 
															+    sql = "update `{table}` set {key_values} where {condition}"
														
 
															+    sql = sql.format(table=table, key_values=key_values, condition=condition)
														
 
															+    return sql
														
 
															+
														
 
															+
														
 
															+def make_batch_sql(
														
 
															+    table, datas, auto_update=False, update_columns=(), update_columns_value=()
														
 
															+):
														
 
															+    """
														
 
															+    @summary: 生产批量的sql
														
 
															+    ---------
														
 
															+    @param table:
														
 
															+    @param datas: 表数据 [{...}]
														
 
															+    @param auto_update: 使用的是replace into， 为完全覆盖已存在的数据
														
 
															+    @param update_columns: 需要更新的列 默认全部，当指定值时，auto_update设置无效，当duplicate key冲突时更新指定的列
														
 
															+    @param update_columns_value: 需要更新的列的值 默认为datas里边对应的值, 注意 如果值为字符串类型 需要主动加单引号， 如 update_columns_value=("'test'",)
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+    if not datas:
														
 
															+        return
														
 
															+
														
 
															+    keys = list(datas[0].keys())
														
 
															+    values_placeholder = ["%s"] * len(keys)
														
 
															+
														
 
															+    values = []
														
 
															+    for data in datas:
														
 
															+        value = []
														
 
															+        for key in keys:
														
 
															+            current_data = data.get(key)
														
 
															+            current_data = format_sql_value(current_data)
														
 
															+
														
 
															+            value.append(current_data)
														
 
															+
														
 
															+        values.append(value)
														
 
															+
														
 
															+    keys = ["`{}`".format(key) for key in keys]
														
 
															+    keys = list2str(keys).replace("'", "")
														
 
															+
														
 
															+    values_placeholder = list2str(values_placeholder).replace("'", "")
														
 
															+
														
 
															+    if update_columns:
														
 
															+        if not isinstance(update_columns, (tuple, list)):
														
 
															+            update_columns = [update_columns]
														
 
															+        if update_columns_value:
														
 
															+            update_columns_ = ", ".join(
														
 
															+                [
														
 
															+                    "`{key}`={value}".format(key=key, value=value)
														
 
															+                    for key, value in zip(update_columns, update_columns_value)
														
 
															+                ]
														
 
															+            )
														
 
															+        else:
														
 
															+            update_columns_ = ", ".join(
														
 
															+                ["`{key}`=values(`{key}`)".format(key=key) for key in update_columns]
														
 
															+            )
														
 
															+        sql = "insert into `{table}` {keys} values {values_placeholder} on duplicate key update {update_columns}".format(
														
 
															+            table=table,
														
 
															+            keys=keys,
														
 
															+            values_placeholder=values_placeholder,
														
 
															+            update_columns=update_columns_,
														
 
															+        )
														
 
															+    elif auto_update:
														
 
															+        sql = "replace into `{table}` {keys} values {values_placeholder}".format(
														
 
															+            table=table, keys=keys, values_placeholder=values_placeholder
														
 
															+        )
														
 
															+    else:
														
 
															+        sql = "insert ignore into `{table}` {keys} values {values_placeholder}".format(
														
 
															+            table=table, keys=keys, values_placeholder=values_placeholder
														
 
															+        )
														
 
															+
														
 
															+    return sql, values
														
 
															+
														
 
															+
														
 
															+############### json相关 #######################
														
 
															+
														
 
															+
														
 
															+def key2underline(key: str, strict=True):
														
 
															+    """
														
 
															+    >>> key2underline("HelloWord")
														
 
															+    'hello_word'
														
 
															+    >>> key2underline("SHData", strict=True)
														
 
															+    's_h_data'
														
 
															+    >>> key2underline("SHData", strict=False)
														
 
															+    'sh_data'
														
 
															+    >>> key2underline("SHDataHi", strict=False)
														
 
															+    'sh_data_hi'
														
 
															+    >>> key2underline("SHDataHi", strict=True)
														
 
															+    's_h_data_hi'
														
 
															+    >>> key2underline("dataHi", strict=True)
														
 
															+    'data_hi'
														
 
															+    """
														
 
															+    regex = "[A-Z]*" if not strict else "[A-Z]"
														
 
															+    capitals = re.findall(regex, key)
														
 
															+
														
 
															+    if capitals:
														
 
															+        for capital in capitals:
														
 
															+            if not capital:
														
 
															+                continue
														
 
															+            if key.startswith(capital):
														
 
															+                if len(capital) > 1:
														
 
															+                    key = key.replace(
														
 
															+                        capital, capital[:-1].lower() + "_" + capital[-1].lower(), 1
														
 
															+                    )
														
 
															+                else:
														
 
															+                    key = key.replace(capital, capital.lower(), 1)
														
 
															+            else:
														
 
															+                if len(capital) > 1:
														
 
															+                    key = key.replace(capital, "_" + capital.lower() + "_", 1)
														
 
															+                else:
														
 
															+                    key = key.replace(capital, "_" + capital.lower(), 1)
														
 
															+
														
 
															+    return key.strip("_")
														
 
															+
														
 
															+
														
 
															+def key2hump(key):
														
 
															+    """
														
 
															+    下划线试变成首字母大写
														
 
															+    """
														
 
															+    return key.title().replace("_", "")
														
 
															+
														
 
															+
														
 
															+def format_json_key(json_data):
														
 
															+    json_data_correct = {}
														
 
															+    for key, value in json_data.items():
														
 
															+        key = key2underline(key)
														
 
															+        json_data_correct[key] = value
														
 
															+
														
 
															+    return json_data_correct
														
 
															+
														
 
															+
														
 
															+def quick_to_json(text):
														
 
															+    """
														
 
															+    @summary: 可快速将浏览器上的header转为json格式
														
 
															+    ---------
														
 
															+    @param text:
														
 
															+    ---------
														
 
															+    @result:
														
 
															+    """
														
 
															+
														
 
															+    contents = text.split("\n")
														
 
															+    json = {}
														
 
															+    for content in contents:
														
 
															+        if content == "\n":
														
 
															+            continue
														
 
															+
														
 
															+        content = content.strip()
														
 
															+        regex = ["(:?.*?):(.*)", "(.*?):? +(.*)", "([^:]*)"]
														
 
															+
														
 
															+        result = get_info(content, regex)
														
 
															+        result = result[0] if isinstance(result[0], tuple) else result
														
 
															+        try:
														
 
															+            json[result[0]] = eval(result[1].strip())
														
 
															+        except:
														
 
															+            json[result[0]] = result[1].strip()
														
 
															+
														
 
															+    return json
														
 
															+
														
 
															+
														
 
															+##############################
														
 
															+
														
 
															+
														
 
															+def print_pretty(object):
														
 
															+    pprint(object)
														
 
															+
														
 
															+
														
 
															+def print_params2json(url):
														
 
															+    params_json = {}
														
 
															+    params = url.split("?")[-1].split("&")
														
 
															+    for param in params:
														
 
															+        key_value = param.split("=", 1)
														
 
															+        params_json[key_value[0]] = key_value[1]
														
 
															+
														
 
															+    print(dumps_json(params_json))
														
 
															+
														
 
															+
														
 
															+def print_cookie2json(cookie_str_or_list):
														
 
															+    if isinstance(cookie_str_or_list, str):
														
 
															+        cookie_json = {}
														
 
															+        cookies = cookie_str_or_list.split("; ")
														
 
															+        for cookie in cookies:
														
 
															+            name, value = cookie.split("=")
														
 
															+            cookie_json[name] = value
														
 
															+    else:
														
 
															+        cookie_json = get_cookies_from_selenium_cookie(cookie_str_or_list)
														
 
															+
														
 
															+    print(dumps_json(cookie_json))
														
 
															+
														
 
															+
														
 
															+###############################
														
 
															+
														
 
															+
														
 
															+def flatten(x):
														
 
															+    """flatten(sequence) -> list
														
 
															+    Returns a single, flat list which contains all elements retrieved
														
 
															+    from the sequence and all recursively contained sub-sequences
														
 
															+    (iterables).
														
 
															+    Examples:
														
 
															+    >>> [1, 2, [3,4], (5,6)]
														
 
															+    [1, 2, [3, 4], (5, 6)]
														
 
															+    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])
														
 
															+    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]
														
 
															+    >>> flatten(["foo", "bar"])
														
 
															+    ['foo', 'bar']
														
 
															+    >>> flatten(["foo", ["baz", 42], "bar"])
														
 
															+    ['foo', 'baz', 42, 'bar']
														
 
															+    """
														
 
															+    return list(iflatten(x))
														
 
															+
														
 
															+
														
 
															+def iflatten(x):
														
 
															+    """iflatten(sequence) -> iterator
														
 
															+    Similar to ``.flatten()``, but returns iterator instead"""
														
 
															+    for el in x:
														
 
															+        if _is_listlike(el):
														
 
															+            for el_ in flatten(el):
														
 
															+                yield el_
														
 
															+        else:
														
 
															+            yield el
														
 
															+
														
 
															+
														
 
															+def _is_listlike(x):
														
 
															+    """
														
 
															+    >>> _is_listlike("foo")
														
 
															+    False
														
 
															+    >>> _is_listlike(5)
														
 
															+    False
														
 
															+    >>> _is_listlike(b"foo")
														
 
															+    False
														
 
															+    >>> _is_listlike([b"foo"])
														
 
															+    True
														
 
															+    >>> _is_listlike((b"foo",))
														
 
															+    True
														
 
															+    >>> _is_listlike({})
														
 
															+    True
														
 
															+    >>> _is_listlike(set())
														
 
															+    True
														
 
															+    >>> _is_listlike((x for x in range(3)))
														
 
															+    True
														
 
															+    >>> _is_listlike(six.moves.xrange(5))
														
 
															+    True
														
 
															+    """
														
 
															+    return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))
														
 
															+
														
 
															+
														
 
															+###################
														
 
															+
														
 
															+
														
 
															+def re_def_supper_class(obj, supper_class):
														
 
															+    """
														
 
															+    重新定义父类
														
 
															+    @param obj: 类 如 class A: 则obj为A 或者 A的实例 a.__class__
														
 
															+    @param supper_class: 父类
														
 
															+    @return:
														
 
															+    """
														
 
															+    obj.__bases__ = (supper_class,)
														
 
															+
														
 
															+
														
 
															+###################
														
 
															+freq_limit_record = {}
														
 
															+
														
 
															+
														
 
															+def reach_freq_limit(rate_limit, *key):
														
 
															+    """
														
 
															+    频率限制
														
 
															+    :param rate_limit: 限制时间 单位秒
														
 
															+    :param key: 频率限制的key
														
 
															+    :return: True / False
														
 
															+    """
														
 
															+    if rate_limit == 0:
														
 
															+        return False
														
 
															+
														
 
															+    msg_md5 = get_md5(*key)
														
 
															+    key = "rate_limit:{}".format(msg_md5)
														
 
															+    try:
														
 
															+        if get_redisdb().get(key):
														
 
															+            return True
														
 
															+
														
 
															+        get_redisdb().set(key, time.time(), ex=rate_limit)
														
 
															+    except redis.exceptions.ConnectionError as e:
														
 
															+        # 使用内存做频率限制
														
 
															+        global freq_limit_record
														
 
															+
														
 
															+        if key not in freq_limit_record:
														
 
															+            freq_limit_record[key] = time.time()
														
 
															+            return False
														
 
															+
														
 
															+        if time.time() - freq_limit_record.get(key) < rate_limit:
														
 
															+            return True
														
 
															+        else:
														
 
															+            freq_limit_record[key] = time.time()
														
 
															+
														
 
															+    return False
														
 
															+
														
 
															+
														
 
															+def wechat_warning(
														
 
															+    message,
														
 
															+    message_prefix=None,
														
 
															+    rate_limit=None,
														
 
															+    url=None,
														
 
															+    user_phone=None,
														
 
															+    all_users: bool = None,
														
 
															+):
														
 
															+    """企业微信报警"""
														
 
															+
														
 
															+    # 为了加载最新的配置
														
 
															+    rate_limit = rate_limit if rate_limit is not None else setting.WARNING_INTERVAL
														
 
															+    url = url or setting.WECHAT_WARNING_URL
														
 
															+    user_phone = user_phone or setting.WECHAT_WARNING_PHONE
														
 
															+    all_users = all_users if all_users is not None else setting.WECHAT_WARNING_ALL
														
 
															+
														
 
															+    if isinstance(user_phone, str):
														
 
															+        user_phone = [user_phone] if user_phone else []
														
 
															+
														
 
															+    if all_users is True or not user_phone:
														
 
															+        user_phone = ["@all"]
														
 
															+
														
 
															+    if not all([url, message]):
														
 
															+        return
														
 
															+
														
 
															+    if reach_freq_limit(rate_limit, url, user_phone, message_prefix or message):
														
 
															+        log.info("报警时间间隔过短，此次报警忽略。 内容 {}".format(message))
														
 
															+        return
														
 
															+
														
 
															+    data = {
														
 
															+        "msgtype": "text",
														
 
															+        "text": {"content": message, "mentioned_mobile_list": user_phone},
														
 
															+    }
														
 
															+
														
 
															+    headers = {"Content-Type": "application/json"}
														
 
															+
														
 
															+    try:
														
 
															+        response = requests.post(
														
 
															+            url, headers=headers, data=json.dumps(data).encode("utf8")
														
 
															+        )
														
 
															+        result = response.json()
														
 
															+        response.close()
														
 
															+        if result.get("errcode") == 0:
														
 
															+            return True
														
 
															+        else:
														
 
															+            raise Exception(result.get("errmsg"))
														
 
															+    except Exception as e:
														
 
															+        log.error("报警发送失败。 报警内容 {}, error: {}".format(message, e))
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+###################
														
 
															+
														
 
															+
														
 
															+def make_item(cls, data: dict):
														
 
															+    """提供Item类与原数据，快速构建Item实例
														
 
															+    :param cls: Item类
														
 
															+    :param data: 字典格式的数据
														
 
															+    """
														
 
															+    item = cls()
														
 
															+    for key, val in data.items():
														
 
															+        setattr(item, key, val)
														
 
															+    return item
														
 
															+
														
 
															+
														
 
															+###################
														
 
															+
														
 
															+
														
 
															+def aio_wrap(loop=None, executor=None):
														
 
															+    """
														
 
															+    wrap a normal sync version of a function to an async version
														
 
															+    """
														
 
															+    outer_loop = loop
														
 
															+    outer_executor = executor
														
 
															+
														
 
															+    def wrap(fn):
														
 
															+        @wraps(fn)
														
 
															+        async def run(*args, loop=None, executor=None, **kwargs):
														
 
															+            if loop is None:
														
 
															+                if outer_loop is None:
														
 
															+                    loop = asyncio.get_event_loop()
														
 
															+                else:
														
 
															+                    loop = outer_loop
														
 
															+            if executor is None:
														
 
															+                executor = outer_executor
														
 
															+            pfunc = partial(fn, *args, **kwargs)
														
 
															+            return await loop.run_in_executor(executor, pfunc)
														
 
															+
														
 
															+        return run
														
 
															+
														
 
															+    return wrap
														
 
															+
														
 
															+
														
 
															+######### number ##########
														
 
															+
														
 
															+
														
 
															+def ensure_int(n):
														
 
															+    """
														
 
															+    >>> ensure_int(None)
														
 
															+    0
														
 
															+    >>> ensure_int(False)
														
 
															+    0
														
 
															+    >>> ensure_int(12)
														
 
															+    12
														
 
															+    >>> ensure_int("72")
														
 
															+    72
														
 
															+    >>> ensure_int('')
														
 
															+    0
														
 
															+    >>> ensure_int('1')
														
 
															+    1
														
 
															+    """
														
 
															+    if not n:
														
 
															+        return 0
														
 
															+    return int(n)
														
 
															+
														
 
															+
														
 
															+def ensure_float(n):
														
 
															+    """
														
 
															+    >>> ensure_float(None)
														
 
															+    0.0
														
 
															+    >>> ensure_float(False)
														
 
															+    0.0
														
 
															+    >>> ensure_float(12)
														
 
															+    12.0
														
 
															+    >>> ensure_float("72")
														
 
															+    72.0
														
 
															+    """
														
 
															+    if not n:
														
 
															+        return 0.0
														
 
															+    return float(n)
														
 
															+
														
 
															+
														
 
															+def ensure_int64(n):
														
 
															+    """
														
 
															+    >>> ensure_int64(None)
														
 
															+    0
														
 
															+    >>> ensure_float(False)
														
 
															+    0
														
 
															+    >>> ensure_float(12)
														
 
															+    12
														
 
															+    >>> ensure_float("72")
														
 
															+    72
														
 
															+    """
														
 
															+    if not n:
														
 
															+        return bson.int64.Int64(0)
														
 
															+    return bson.int64.Int64(n)
														
 
															+
														
 
															+
														
 
															+def import_cls(cls_info):
														
 
															+    module, class_name = cls_info.rsplit(".", 1)
														
 
															+    cls = importlib.import_module(module).__getattribute__(class_name)
														
 
															+    return cls
														
--- a/A数据处理/site_monitor/utils/webdriver/__init__.py
+++ b/A数据处理/site_monitor/utils/webdriver/__init__.py
@@ -0,0 +1,12 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2022/9/7 4:39 PM
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+from .playwright_driver import PlaywrightDriver
														
 
															+from .webdirver import InterceptRequest, InterceptResponse
														
 
															+from .webdriver_pool import WebDriverPool
														
--- a/A数据处理/site_monitor/utils/webdriver/playwright_driver.py
+++ b/A数据处理/site_monitor/utils/webdriver/playwright_driver.py
@@ -0,0 +1,300 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2022/9/7 4:11 PM
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import json
														
 
															+import os
														
 
															+import re
														
 
															+from collections import defaultdict
														
 
															+from typing import Union, List
														
 
															+
														
 
															+try:
														
 
															+    from typing import Literal  # python >= 3.8
														
 
															+except ImportError:  # python <3.8
														
 
															+    from typing_extensions import Literal
														
 
															+
														
 
															+
														
 
															+from playwright.sync_api import Page, BrowserContext, ViewportSize, ProxySettings
														
 
															+from playwright.sync_api import Playwright, Browser
														
 
															+from playwright.sync_api import Response
														
 
															+from playwright.sync_api import sync_playwright
														
 
															+
														
 
															+from utils import tools
														
 
															+from utils.log import logger as log
														
 
															+from utils.webdriver.webdirver import *
														
 
															+
														
 
															+
														
 
															+class PlaywrightDriver(WebDriver):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        *,
														
 
															+        page_on_event_callback: dict = None,
														
 
															+        storage_state_path: str = None,
														
 
															+        driver_type: Literal["chromium", "firefox", "webkit"] = "webkit",
														
 
															+        url_regexes: list = None,
														
 
															+        save_all: bool = False,
														
 
															+        **kwargs
														
 
															+    ):
														
 
															+        """
														
 
															+
														
 
															+        Args:
														
 
															+            page_on_event_callback: page.on() 事件的回调 如 page_on_event_callback={"dialog": lambda dialog: dialog.accept()}
														
 
															+            storage_state_path: 保存浏览器状态的路径
														
 
															+            driver_type: 浏览器类型 chromium, firefox, webkit
														
 
															+            url_regexes: 拦截接口，支持正则，数组类型
														
 
															+            save_all: 是否保存所有拦截的接口, 默认只保存最后一个
														
 
															+            **kwargs:
														
 
															+        """
														
 
															+        super(PlaywrightDriver, self).__init__(**kwargs)
														
 
															+        self.driver: Playwright = None
														
 
															+        self.browser: Browser = None
														
 
															+        self.context: BrowserContext = None
														
 
															+        self.page: Page = None
														
 
															+        self.url = None
														
 
															+        self.storage_state_path = storage_state_path
														
 
															+
														
 
															+        self._driver_type = driver_type
														
 
															+        self._page_on_event_callback = page_on_event_callback
														
 
															+        self._url_regexes = url_regexes
														
 
															+        self._save_all = save_all
														
 
															+
														
 
															+        if self._save_all and self._url_regexes:
														
 
															+            log.warning(
														
 
															+                "获取完拦截的数据后, 请主动调用PlaywrightDriver的clear_cache()方法清空拦截的数据，否则数据会一直累加，导致内存溢出"
														
 
															+            )
														
 
															+            self._cache_data = defaultdict(list)
														
 
															+        else:
														
 
															+            self._cache_data = {}
														
 
															+
														
 
															+        self._setup()
														
 
															+
														
 
															+    def _setup(self):
														
 
															+        # 处理参数
														
 
															+        if self._proxy:
														
 
															+            proxy = self._proxy() if callable(self._proxy) else self._proxy
														
 
															+            proxy = self.format_context_proxy(proxy)
														
 
															+        else:
														
 
															+            proxy = None
														
 
															+
														
 
															+        user_agent = (
														
 
															+            self._user_agent() if callable(self._user_agent) else self._user_agent
														
 
															+        )
														
 
															+
														
 
															+        view_size = ViewportSize(
														
 
															+            width=self._window_size[0], height=self._window_size[1]
														
 
															+        )
														
 
															+
														
 
															+        # 初始化浏览器对象
														
 
															+        self.driver = sync_playwright().start()
														
 
															+        self.browser = getattr(self.driver, self._driver_type).launch(
														
 
															+            headless=self._headless,
														
 
															+            args=["--no-sandbox"],
														
 
															+            proxy=proxy,
														
 
															+            executable_path=self._executable_path,
														
 
															+            downloads_path=self._download_path,
														
 
															+        )
														
 
															+
														
 
															+        if self.storage_state_path and os.path.exists(self.storage_state_path):
														
 
															+            self.context = self.browser.new_context(
														
 
															+                user_agent=user_agent,
														
 
															+                screen=view_size,
														
 
															+                viewport=view_size,
														
 
															+                proxy=proxy,
														
 
															+                storage_state=self.storage_state_path,
														
 
															+                ignore_https_errors=True
														
 
															+            )
														
 
															+        else:
														
 
															+            self.context = self.browser.new_context(
														
 
															+                user_agent=user_agent,
														
 
															+                screen=view_size,
														
 
															+                viewport=view_size,
														
 
															+                proxy=proxy,
														
 
															+                ignore_https_errors=True
														
 
															+            )
														
 
															+
														
 
															+        if self._use_stealth_js:
														
 
															+            path = os.path.join(os.path.dirname(__file__), "../js/stealth.min.js")
														
 
															+            self.context.add_init_script(path=path)
														
 
															+
														
 
															+        self.page = self.context.new_page()
														
 
															+        self.page.set_default_timeout(self._timeout * 1000)
														
 
															+
														
 
															+        if self._page_on_event_callback:
														
 
															+            for event, callback in self._page_on_event_callback.items():
														
 
															+                self.page.on(event, callback)
														
 
															+
														
 
															+        if self._url_regexes:
														
 
															+            self.page.on("response", self.on_response)
														
 
															+
														
 
															+    def __enter__(self):
														
 
															+        return self
														
 
															+
														
 
															+    def __exit__(self, exc_type, exc_val, exc_tb):
														
 
															+        if exc_val:
														
 
															+            log.error(exc_val)
														
 
															+
														
 
															+        self.quit()
														
 
															+        return True
														
 
															+
														
 
															+    def format_context_proxy(self, proxy) -> ProxySettings:
														
 
															+        """
														
 
															+        Args:
														
 
															+            proxy: username:password@ip:port / ip:port
														
 
															+        Returns:
														
 
															+            {
														
 
															+                "server": "ip:port"
														
 
															+                "username": username,
														
 
															+                "password": password,
														
 
															+            }
														
 
															+            server: http://ip:port or socks5://ip:port. Short form ip:port is considered an HTTP proxy.
														
 
															+        """
														
 
															+
														
 
															+        if "@" in proxy:
														
 
															+            certification, _proxy = proxy.split("@")
														
 
															+            username, password = certification.split(":")
														
 
															+
														
 
															+            context_proxy = ProxySettings(
														
 
															+                server=_proxy,
														
 
															+                username=username,
														
 
															+                password=password,
														
 
															+            )
														
 
															+        else:
														
 
															+            context_proxy = ProxySettings(server=proxy)
														
 
															+
														
 
															+        return context_proxy
														
 
															+
														
 
															+    def save_storage_stage(self):
														
 
															+        if self.storage_state_path:
														
 
															+            os.makedirs(os.path.dirname(self.storage_state_path), exist_ok=True)
														
 
															+            self.context.storage_state(path=self.storage_state_path)
														
 
															+
														
 
															+    def quit(self):
														
 
															+        self.page.close()
														
 
															+        self.context.close()
														
 
															+        self.browser.close()
														
 
															+        self.driver.stop()
														
 
															+
														
 
															+    @property
														
 
															+    def domain(self):
														
 
															+        return tools.get_domain(self.url or self.page.url)
														
 
															+
														
 
															+    @property
														
 
															+    def cookies(self):
														
 
															+        cookies_json = {}
														
 
															+        for cookie in self.page.context.cookies():
														
 
															+            cookies_json[cookie["name"]] = cookie["value"]
														
 
															+
														
 
															+        return cookies_json
														
 
															+
														
 
															+    @cookies.setter
														
 
															+    def cookies(self, val: Union[dict, List[dict]]):
														
 
															+        """
														
 
															+        设置cookie
														
 
															+        Args:
														
 
															+            val: List[{name: str, value: str, url: Union[str, NoneType], domain: Union[str, NoneType], path: Union[str, NoneType], expires: Union[float, NoneType], httpOnly: Union[bool, NoneType], secure: Union[bool, NoneType], sameSite: Union["Lax", "None", "Strict", NoneType]}]
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        if isinstance(val, list):
														
 
															+            self.page.context.add_cookies(val)
														
 
															+        else:
														
 
															+            cookies = []
														
 
															+            for key, value in val.items():
														
 
															+                cookies.append(
														
 
															+                    {"name": key, "value": value, "url": self.url or self.page.url}
														
 
															+                )
														
 
															+            self.page.context.add_cookies(cookies)
														
 
															+
														
 
															+    @property
														
 
															+    def user_agent(self):
														
 
															+        return self.page.evaluate("() => navigator.userAgent")
														
 
															+
														
 
															+    def on_response(self, response: Response):
														
 
															+        for regex in self._url_regexes:
														
 
															+            if re.search(regex, response.request.url):
														
 
															+                intercept_request = InterceptRequest(
														
 
															+                    url=response.request.url,
														
 
															+                    headers=response.request.headers,
														
 
															+                    data=response.request.post_data,
														
 
															+                )
														
 
															+
														
 
															+                intercept_response = InterceptResponse(
														
 
															+                    request=intercept_request,
														
 
															+                    url=response.url,
														
 
															+                    headers=response.headers,
														
 
															+                    content=response.body(),
														
 
															+                    status_code=response.status,
														
 
															+                )
														
 
															+                if self._save_all:
														
 
															+                    self._cache_data[regex].append(intercept_response)
														
 
															+                else:
														
 
															+                    self._cache_data[regex] = intercept_response
														
 
															+
														
 
															+    def get_response(self, url_regex) -> InterceptResponse:
														
 
															+        if self._save_all:
														
 
															+            response_list = self._cache_data.get(url_regex)
														
 
															+            if response_list:
														
 
															+                return response_list[-1]
														
 
															+        return self._cache_data.get(url_regex)
														
 
															+
														
 
															+    def get_all_response(self, url_regex) -> List[InterceptResponse]:
														
 
															+        """
														
 
															+        获取所有匹配的响应, 仅在save_all=True时有效
														
 
															+        Args:
														
 
															+            url_regex:
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        response_list = self._cache_data.get(url_regex, [])
														
 
															+        if not isinstance(response_list, list):
														
 
															+            return [response_list]
														
 
															+        return response_list
														
 
															+
														
 
															+    def get_text(self, url_regex):
														
 
															+        return (
														
 
															+            self.get_response(url_regex).content.decode()
														
 
															+            if self.get_response(url_regex)
														
 
															+            else None
														
 
															+        )
														
 
															+
														
 
															+    def get_all_text(self, url_regex):
														
 
															+        """
														
 
															+        获取所有匹配的响应文本, 仅在save_all=True时有效
														
 
															+        Args:
														
 
															+            url_regex:
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        return [
														
 
															+            response.content.decode() for response in self.get_all_response(url_regex)
														
 
															+        ]
														
 
															+
														
 
															+    def get_json(self, url_regex):
														
 
															+        return (
														
 
															+            json.loads(self.get_text(url_regex))
														
 
															+            if self.get_response(url_regex)
														
 
															+            else None
														
 
															+        )
														
 
															+
														
 
															+    def get_all_json(self, url_regex):
														
 
															+        """
														
 
															+        获取所有匹配的响应json, 仅在save_all=True时有效
														
 
															+        Args:
														
 
															+            url_regex:
														
 
															+
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        return [json.loads(text) for text in self.get_all_text(url_regex)]
														
 
															+
														
 
															+    def clear_cache(self):
														
 
															+        self._cache_data = defaultdict(list)
														
--- a/A数据处理/site_monitor/utils/webdriver/webdirver.py
+++ b/A数据处理/site_monitor/utils/webdriver/webdirver.py
@@ -0,0 +1,81 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2022/9/7 4:27 PM
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+import abc
														
 
															+
														
 
															+import setting as setting
														
 
															+
														
 
															+
														
 
															+class InterceptRequest:
														
 
															+    def __init__(self, url, data, headers):
														
 
															+        self.url = url
														
 
															+        self.data = data
														
 
															+        self.headers = headers
														
 
															+
														
 
															+
														
 
															+class InterceptResponse:
														
 
															+    def __init__(self, request: InterceptRequest, url, headers, content, status_code):
														
 
															+        self.request = request
														
 
															+        self.url = url
														
 
															+        self.headers = headers
														
 
															+        self.content = content
														
 
															+        self.status_code = status_code
														
 
															+
														
 
															+
														
 
															+class WebDriver:
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        load_images=True,
														
 
															+        user_agent=None,
														
 
															+        proxy=None,
														
 
															+        headless=False,
														
 
															+        driver_type=None,
														
 
															+        timeout=16,
														
 
															+        window_size=(1024, 800),
														
 
															+        executable_path=None,
														
 
															+        custom_argument=None,
														
 
															+        download_path=None,
														
 
															+        auto_install_driver=True,
														
 
															+        use_stealth_js=True,
														
 
															+        **kwargs,
														
 
															+    ):
														
 
															+        """
														
 
															+        webdirver 封装，支持chrome、phantomjs 和 firefox
														
 
															+        Args:
														
 
															+            load_images: 是否加载图片
														
 
															+            user_agent: 字符串 或 无参函数，返回值为user_agent
														
 
															+            proxy: xxx.xxx.xxx.xxx:xxxx 或 无参函数，返回值为代理地址
														
 
															+            headless: 是否启用无头模式
														
 
															+            driver_type: CHROME 或 PHANTOMJS,FIREFOX
														
 
															+            timeout: 请求超时时间
														
 
															+            window_size: # 窗口大小
														
 
															+            executable_path: 浏览器路径，默认为默认路径
														
 
															+            custom_argument: 自定义参数 用于webdriver.Chrome(options=chrome_options, **kwargs)
														
 
															+            download_path: 文件下载保存路径；如果指定，不再出现“保留”“放弃”提示，仅对Chrome有效
														
 
															+            auto_install_driver: 自动下载浏览器驱动 支持chrome 和 firefox
														
 
															+            use_stealth_js: 使用stealth.min.js隐藏浏览器特征
														
 
															+            **kwargs:
														
 
															+        """
														
 
															+        self._load_images = load_images
														
 
															+        self._user_agent = user_agent or setting.DEFAULT_USERAGENT
														
 
															+        self._proxy = proxy
														
 
															+        self._headless = headless
														
 
															+        self._timeout = timeout
														
 
															+        self._window_size = window_size
														
 
															+        self._executable_path = executable_path
														
 
															+        self._custom_argument = custom_argument
														
 
															+        self._download_path = download_path
														
 
															+        self._auto_install_driver = auto_install_driver
														
 
															+        self._use_stealth_js = use_stealth_js
														
 
															+        self._driver_type = driver_type
														
 
															+        self._kwargs = kwargs
														
 
															+
														
 
															+    @abc.abstractmethod
														
 
															+    def quit(self):
														
 
															+        pass
														
--- a/A数据处理/site_monitor/utils/webdriver/webdriver_pool.py
+++ b/A数据处理/site_monitor/utils/webdriver/webdriver_pool.py
@@ -0,0 +1,115 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+Created on 2021/3/18 4:59 下午
														
 
															+---------
														
 
															+@summary:
														
 
															+---------
														
 
															+@author: Boris
														
 
															+@email: boris_liu@foxmail.com
														
 
															+"""
														
 
															+
														
 
															+import queue
														
 
															+import threading
														
 
															+
														
 
															+from utils.log import logger as log
														
 
															+from utils.tools import Singleton
														
 
															+from utils.webdriver.playwright_driver import PlaywrightDriver
														
 
															+
														
 
															+
														
 
															+@Singleton
														
 
															+class WebDriverPool:
														
 
															+    def __init__(
														
 
															+        self, pool_size=5, driver_cls=PlaywrightDriver, thread_safe=False, **kwargs
														
 
															+    ):
														
 
															+        """
														
 
															+
														
 
															+        Args:
														
 
															+            pool_size: driver池的大小
														
 
															+            driver: 驱动类型
														
 
															+            thread_safe: 是否线程安全
														
 
															+                是则每个线程拥有一个driver，pool_size无效，driver数量为线程数
														
 
															+                否则每个线程从池中获取driver
														
 
															+            **kwargs:
														
 
															+        """
														
 
															+        self.pool_size = pool_size
														
 
															+        self.driver_cls = driver_cls
														
 
															+        self.thread_safe = thread_safe
														
 
															+        self.kwargs = kwargs
														
 
															+
														
 
															+        self.queue = queue.Queue(maxsize=pool_size)
														
 
															+        self.lock = threading.RLock()
														
 
															+        self.driver_count = 0
														
 
															+        self.ctx = threading.local()
														
 
															+
														
 
															+    @property
														
 
															+    def driver(self):
														
 
															+        if not hasattr(self.ctx, "driver"):
														
 
															+            self.ctx.driver = None
														
 
															+        return self.ctx.driver
														
 
															+
														
 
															+    @driver.setter
														
 
															+    def driver(self, driver):
														
 
															+        self.ctx.driver = driver
														
 
															+
														
 
															+    @property
														
 
															+    def is_full(self):
														
 
															+        return self.driver_count >= self.pool_size
														
 
															+
														
 
															+    def create_driver(self, user_agent: str = None, proxy: str = None):
														
 
															+        kwargs = self.kwargs.copy()
														
 
															+        if user_agent:
														
 
															+            kwargs["user_agent"] = user_agent
														
 
															+        if proxy:
														
 
															+            kwargs["proxy"] = proxy
														
 
															+        return self.driver_cls(**kwargs)
														
 
															+
														
 
															+    def get(self, user_agent: str = None, proxy: str = None):
														
 
															+        """
														
 
															+        获取webdriver
														
 
															+        当webdriver为新实例时会使用 user_agen, proxy, cookie参数来创建
														
 
															+        Args:
														
 
															+            user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
														
 
															+            proxy: xxx.xxx.xxx.xxx
														
 
															+        Returns:
														
 
															+
														
 
															+        """
														
 
															+        if not self.is_full and not self.thread_safe:
														
 
															+            with self.lock:
														
 
															+                if not self.is_full:
														
 
															+                    driver = self.create_driver(user_agent, proxy)
														
 
															+                    self.queue.put(driver)
														
 
															+                    self.driver_count += 1
														
 
															+        elif self.thread_safe:
														
 
															+            if not self.driver:
														
 
															+                driver = self.create_driver(user_agent, proxy)
														
 
															+                self.driver = driver
														
 
															+                self.driver_count += 1
														
 
															+
														
 
															+        if self.thread_safe:
														
 
															+            driver = self.driver
														
 
															+        else:
														
 
															+            driver = self.queue.get()
														
 
															+
														
 
															+        return driver
														
 
															+
														
 
															+    def put(self, driver):
														
 
															+        if not self.thread_safe:
														
 
															+            self.queue.put(driver)
														
 
															+
														
 
															+    def remove(self, driver):
														
 
															+        if self.thread_safe:
														
 
															+            if self.driver:
														
 
															+                self.driver.quit()
														
 
															+                self.driver = None
														
 
															+        else:
														
 
															+            driver.quit()
														
 
															+        self.driver_count -= 1
														
 
															+
														
 
															+    def close(self):
														
 
															+        if self.thread_safe:
														
 
															+            log.info("暂不支持关闭需线程安全的driver")
														
 
															+
														
 
															+        while not self.queue.empty():
														
 
															+            driver = self.queue.get()
														
 
															+            driver.quit()
														
 
															+            self.driver_count -= 1