Răsfoiți Sursa

删除临时测试配置

dzr 1 lună în urmă
părinte
comite
fa88ac7f16
1 a modificat fișierele cu 0 adăugiri și 124 ștergeri
  1. 0 124
      setting.py

+ 0 - 124
setting.py

@@ -1,124 +0,0 @@
-# -*- coding: utf-8 -*-
-"""爬虫配置文件"""
-import datetime
-import os
-import sys
-
-# 数据保存失败表
-TAB_FAILED_ITEMS = "pyspider:s_failed_items"
-# 任务失败表
-TAB_FAILED_REQUESTS = "pyspider:z_failed_requests"
-# 采集任务生产表
-TASK_REQUEST_PRODUCE = "pyspider_listdata"
-# 失败任务记录表
-TASK_REQUEST_FAILED = "pyspider_listdata_err"
-# 爬虫采集数据指标汇总表
-SPIDER_HEARTBEAT_RECORD = "pyspider_heartbeat"
-
-# MONGO
-MONGO_IP = "172.20.45.130"
-MONGO_PORT = 27017
-MONGO_DB = "py_spider"
-
-# REDIS
-REDISDB_IP_PORTS = "172.20.45.129:3379"
-REDISDB_USER_PASS = "jianyu@python"
-REDISDB_DB = 3
-
-# rabbitMq
-RABBITMQ_IP_PORT = '172.31.31.204:5672'
-RABBITMQ_USER = 'root'
-RABBITMQ_USER_PASS = '123123'
-RABBITMQ_EXCHANGE = 'py_spider'
-RABBITMQ_EXCHANGE_TYPE = 'direct'
-RABBITMQ_VIRTUAL_HOST = '/'
-RABBITMQ_SOCKET_TIMEOUT = 60
-RABBITMQ_HEARTBEAT = 600
-
-# 数据入库的pipeline
-ITEM_PIPELINES = [
-    "feapder.pipelines.mongo_pipeline.MongoPipeline",
-    # "feapder.pipelines.redis_pipeline.RedisPipeline",
-    # "feapder.pipelines.rabbitmq_pipeline.RabbitMqPipeline",
-]
-# 导出数据时最大的失败次数,包括保存和更新,超过这个次数报警
-EXPORT_DATA_MAX_FAILED_TIMES = 5
-# 导出数据时最大的重试次数,包括保存和更新,超过这个次数则放弃重试
-EXPORT_DATA_MAX_RETRY_TIMES = 5
-
-COLLECTOR_TASK_COUNT = 100  # 每次获取任务数量
-
-# 爬虫
-SPIDER_THREAD_COUNT = 1  # 爬虫并发数,追求速度推荐32
-SPIDER_MAX_RETRY_TIMES = 3  # 每个请求最大重试次数
-
-# 浏览器渲染
-WEBDRIVER = dict(
-    pool_size=1,  # 浏览器的数量
-    load_images=False,  # 是否加载图片
-    user_agent=None,  # 字符串 或 无参函数,返回值为user_agent
-    proxy=None,  # xxx.xxx.xx.xxx:xxxx 或 无参函数,返回值为代理地址
-    headless=True,  # 是否为无头浏览器
-    driver_type="FIREFOX",  # CHROME、FIREFOX
-    timeout=3,  # 请求超时时间
-    window_size=(1280, 800),  # 窗口大小
-    executable_path='/Users/dongzhaorui/Desktop/dzr/pymain/py-tools/settings/geckodriver',  # 浏览器路径,默认为默认路径
-    render_time=0,  # 渲染时长,即打开网页等待指定时间后再获取源码
-    custom_argument=["--ignore-certificate-errors"],  # 自定义浏览器渲染参数
-    usages_local_driver=False,  # 是否加载本地驱动
-    server_addr="http://http://172.31.31.204:8899/wd/hub",  # selenium 远程服务地址
-    version="",  # 远程浏览器版本
-    service_log_path=os.devnull  # 日志路径
-)
-
-SAVE_FAILED_REQUEST = False
-RETRY_FAILED_REQUESTS = False
-
-# request网络请求超时时间
-REQUEST_TIMEOUT = 60
-
-# 设置代理,代理提取API ,返回的代理分割符为\r\n
-PROXY_EXTRACT_API = "http://proxy.spdata.jianyu360.com/proxy/getallip"
-PROXY_ENABLE = True
-# 自建代理池
-JY_PROXY_URL = "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
-JY_PROXY_AUTHOR = "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
-
-# 任务中心
-JY_TASK_URL = "http://pytask.spdata.jianyu360.com"
-
-# splash 渲染服务
-SPLASH_API = "http://splash.spdata.jianyu360.com/render.json"
-
-# 验证码
-CAPTCHA_URL = "http://pycaptcha.spdata.jianyu360.com"
-
-# oss配置
-ALI_BUCKET_CONFIG = {
-    "key_id": "LTAI4G5x9aoZx8dDamQ7vfZi",
-    "key_secret": "Bk98FsbPYXcJe72n1bG3Ssf73acuNh",
-    "endpoint": "oss-cn-beijing.aliyuncs.com",
-    "bucket_name": "jy-datafile"
-}
-
-# item去重
-ITEM_FILTER_ENABLE = False
-ITEM_FILTER_SETTING = dict(
-    filter_type=5,  # redis去重
-    expire_time=86400,  # 过期时间1天
-    redis_url="redis://default:jianyu@python@172.20.45.129:3379/2"
-)
-
-# 日志设置
-DTIME = datetime.datetime.now().strftime("%Y-%m-%d")
-LOG_NAME = os.path.split(sys.argv[0])[-1].split(".")[0]
-LOG_PATH = "log/%s/%s.log" % (DTIME, LOG_NAME)  # log存储路径
-LOG_LEVEL = "DEBUG"
-LOG_COLOR = True  # 是否带有颜色
-LOG_IS_WRITE_TO_CONSOLE = True  # 是否打印到控制台
-LOG_IS_WRITE_TO_FILE = True  # 是否写文件
-LOG_MODE = "w"  # 写文件的模式
-LOG_MAX_BYTES = 10 * 1024 * 1024  # 每个日志文件的最大字节数
-LOG_BACKUP_COUNT = 1  # 日志文件保留数量
-LOG_ENCODING = "utf8"  # 日志文件编码
-OTHERS_LOG_LEVAL = "ERROR"  # 第三方库的log等级 一般用不到