123456789101112131415161718192021222324252627282930313233 |
- # coding: utf-8
- import logging
- import requests
- from fake_useragent import UserAgent
- from pymongo import MongoClient
- from db import RedisFilter
- # UA
- ua = UserAgent()
- # redis
- dedup = RedisFilter('redis://:k5ZJR5KV4q7DRZ92DQ@172.17.189.142:7361/4')
- # mgo
- dbm = MongoClient("172.17.4.87", 27080).hp_news
- news_keyword_coll = dbm.news_Keywords
- news_list_coll = dbm.news_list
- news_detail_coll = dbm.news_detail
- def get_proxy():
- headers = {"Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"}
- url = "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
- proxy = requests.get(url, headers=headers, timeout=3).json()
- proxy = proxy.get("data")
- logging.info("切换代理:{}".format(proxy))
- if not proxy:
- raise ValueError('未获取代理ip')
- return proxy.get("http")
|