|
@@ -10,15 +10,27 @@ MONGO_PORT = 27080
|
|
|
MONGO_DB = "py_spider"
|
|
|
|
|
|
# REDIS
|
|
|
-# ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"]
|
|
|
REDISDB_IP_PORTS = "172.17.4.232:7361"
|
|
|
REDISDB_USER_PASS = "k5ZJR5KV4q7DRZ92DQ"
|
|
|
-REDISDB_DB = 10
|
|
|
-
|
|
|
-# 数据入库的pipeline,可自定义,默认RedisPipeline
|
|
|
+REDISDB_DB = 12
|
|
|
+# 爬虫采集信息存放目录
|
|
|
+REDIS_KEY = "py_spider"
|
|
|
+
|
|
|
+# rabbitMq
|
|
|
+RABBITMQ_IP_PORT = '172.17.4.232:5672'
|
|
|
+RABBITMQ_USER = 'root'
|
|
|
+RABBITMQ_USER_PASS = 'V0O0049qBI2rV1554jLZPiBZ8H3Bo4'
|
|
|
+RABBITMQ_EXCHANGE = 'py_spider'
|
|
|
+RABBITMQ_EXCHANGE_TYPE = 'direct'
|
|
|
+RABBITMQ_VIRTUAL_HOST = '/'
|
|
|
+RABBITMQ_SOCKET_TIMEOUT = 60
|
|
|
+RABBITMQ_HEARTBEAT = 600
|
|
|
+
|
|
|
+# 数据入库的pipeline
|
|
|
ITEM_PIPELINES = [
|
|
|
# "feapder.pipelines.mongo_pipeline.MongoPipeline",
|
|
|
- "feapder.pipelines.swordfish.redis_pipeline.RedisPipeline"
|
|
|
+ # "feapder.pipelines.redis_pipeline.RedisPipeline",
|
|
|
+ "feapder.pipelines.rabbitmq_pipeline.RabbitMqPipeline",
|
|
|
]
|
|
|
# 导出数据时最大的失败次数,包括保存和更新,超过这个次数报警
|
|
|
EXPORT_DATA_MAX_FAILED_TIMES = 5
|
|
@@ -28,6 +40,7 @@ EXPORT_DATA_MAX_RETRY_TIMES = 5
|
|
|
COLLECTOR_TASK_COUNT = 100 # 每次获取任务数量
|
|
|
|
|
|
# 爬虫
|
|
|
+SPIDER_HEARTBEAT = "spider_heartbeat" # 爬虫心跳
|
|
|
SPIDER_THREAD_COUNT = 1 # 爬虫并发数,追求速度推荐32
|
|
|
SPIDER_MAX_RETRY_TIMES = 3 # 每个请求最大重试次数
|
|
|
|
|
@@ -58,9 +71,6 @@ SAVE_FAILED_REQUEST = False
|
|
|
# request网络请求超时时间
|
|
|
REQUEST_TIMEOUT = 60
|
|
|
|
|
|
-# 调度器,存放item与request的根目录
|
|
|
-REDIS_KEY = "fwork"
|
|
|
-
|
|
|
# 设置代理,代理提取API ,返回的代理分割符为\r\n
|
|
|
PROXY_EXTRACT_API = "http://proxy.spdata.jianyu360.com/proxy/getallip"
|
|
|
PROXY_ENABLE = True
|
|
@@ -84,15 +94,6 @@ ITEM_FILTER_SETTING = dict(
|
|
|
expire_time=63072000, # 过期时间2年
|
|
|
)
|
|
|
|
|
|
-# 企业微信报警
|
|
|
-WECHAT_WARNING_URL = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=-4e26-a563-cd6b07b9db14" # 企业微信机器人api
|
|
|
-WECHAT_WARNING_PHONE = "swordFish" # 报警人 将会在群内@此人, 支持列表,可指定多人
|
|
|
-WECHAT_WARNING_ALL = True # 是否提示所有人, 默认为False
|
|
|
-# 时间间隔
|
|
|
-WARNING_INTERVAL = 360 # 相同报警的报警时间间隔,防止刷屏; 0表示不去重
|
|
|
-WARNING_LEVEL = "ERROR" # 报警级别, DEBUG / ERROR
|
|
|
-WARNING_FAILED_COUNT = 2 # 任务失败数 超过WARNING_FAILED_COUNT则报警
|
|
|
-
|
|
|
# 日志设置
|
|
|
DTIME = datetime.datetime.now().strftime("%Y-%m-%d")
|
|
|
LOG_NAME = os.path.split(sys.argv[0])[-1].split(".")[0]
|
|
@@ -106,10 +107,6 @@ LOG_MAX_BYTES = 10 * 1024 * 1024 # 每个日志文件的最大字节数
|
|
|
LOG_BACKUP_COUNT = 20 # 日志文件保留数量
|
|
|
LOG_ENCODING = "utf8" # 日志文件编码
|
|
|
OTHERS_LOG_LEVAL = "ERROR" # 第三方库的log等级 一般用不到
|
|
|
-# elk服务
|
|
|
-LOG_IS_SEND_TO_LOGSTASH = False
|
|
|
-LOGSTASH_IP = "47.95.151.156" # 已失效("47.95.151.156")
|
|
|
-LOGSTASH_PORT = 5044
|
|
|
|
|
|
# 自建代理池
|
|
|
SWORDFISH_PROXY_URL = "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
|
|
@@ -119,7 +116,7 @@ SWORDFISH_PROXY_AUTHOR = "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
|
|
|
SWORDFISH_RENDER_URL = "http://splash.spdata.jianyu360.com/render.json"
|
|
|
|
|
|
# 爬虫心跳
|
|
|
-HEARTBEAT_TABLE = "spider_heartbeat" # 爬虫采集心跳记录表名
|
|
|
+RECORD_SPIDER_HEARTBEAT = "spider_heartbeat" # 爬虫采集心跳记录表名
|
|
|
|
|
|
# 远程bucket配置
|
|
|
ALI_BUCKET_CONFIG = {
|