tools.py 804 B

123456789101112131415161718192021222324252627282930313233
  1. # coding: utf-8
  2. import logging
  3. import requests
  4. from fake_useragent import UserAgent
  5. from pymongo import MongoClient
  6. from db import RedisFilter
  7. # UA
  8. ua = UserAgent()
  9. # redis
  10. dedup = RedisFilter('redis://:k5ZJR5KV4q7DRZ92DQ@172.17.189.142:7361/4')
  11. # mgo
  12. dbm = MongoClient("172.17.4.87", 27080).hp_news
  13. news_keyword_coll = dbm.news_Keywords
  14. news_list_coll = dbm.news_list
  15. news_detail_coll = dbm.news_detail
  16. def get_proxy():
  17. headers = {"Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"}
  18. url = "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
  19. proxy = requests.get(url, headers=headers, timeout=3).json()
  20. proxy = proxy.get("data")
  21. logging.info("切换代理:{}".format(proxy))
  22. if not proxy:
  23. raise ValueError('未获取代理ip')
  24. return proxy.get("http")