routes.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. from flask import Flask, request, jsonify, abort
  2. from flask_httpauth import HTTPBasicAuth
  3. from werkzeug.security import generate_password_hash, check_password_hash
  4. from common.databases import mongo_table
  5. from common.log import logger
  6. from services import (
  7. accountManagePool,
  8. get_base_url,
  9. socks5ProxyPool,
  10. httpProxyPool,
  11. )
  12. '''以下模块以动态方式加载入全局变量,请勿删除'''
  13. try:
  14. from services import zbytb
  15. from services import ybw
  16. # from services import nmpa
  17. from services import site_monitor
  18. except ImportError as e:
  19. print(f"缺少全局变量, 原因:{e.args}")
  20. app = Flask(__name__)
  21. '''认证方式'''
  22. auth = HTTPBasicAuth()
  23. '''chrome代理状态记录'''
  24. ChromeUser: dict = {}
  25. '''用户表'''
  26. Users = mongo_table('py_spider', 'spider_scheduler_auth')
  27. @auth.verify_password
  28. def verify_password(username, password):
  29. item = Users.find_one({'username': username})
  30. if item is not None:
  31. user = {
  32. item['username']: generate_password_hash(item['password'])
  33. }
  34. if username in user and check_password_hash(user.get(username), password):
  35. return username
  36. return None
  37. @app.route('/')
  38. @auth.login_required
  39. def index():
  40. return ' Hello, {}!<br><br> <a href="{}">代理池使用情况</a>'.format(
  41. auth.username(),
  42. get_base_url() + '/crawl/proxy/query'
  43. )
  44. @app.route('/proxy', methods=['GET'])
  45. def chrome_proxy_plugin():
  46. global ChromeUser
  47. client = request.args.get('clientid')
  48. ip = request.remote_addr
  49. if client is None:
  50. return jsonify(data={})
  51. if client not in ChromeUser:
  52. ChromeUser.setdefault(client, {'chrome_use_proxy': True, 'ip': ip})
  53. else:
  54. config: dict = ChromeUser.get(client)
  55. config.update({'chrome_use_proxy': True})
  56. ChromeUser.update({client: config})
  57. logger.info(f"ChromeUser: {ChromeUser}")
  58. return jsonify(data=ChromeUser.get(client))
  59. @app.route('/proxy/test', methods=['GET'])
  60. def chrome_proxy_plugin_check():
  61. global ChromeUser
  62. client = request.args.get('clientid')
  63. if client is None or client not in ChromeUser:
  64. return 'false'
  65. else:
  66. config: dict = ChromeUser.get(client)
  67. if config.get('chrome_use_proxy'):
  68. config.update({'chrome_use_proxy': False})
  69. ChromeUser.update({client: config})
  70. return 'true'
  71. else:
  72. return 'false'
  73. @app.route('/proxy/user/show', methods=['GET'])
  74. @auth.login_required
  75. def show_chrome_proxy_plugin_user():
  76. return jsonify(data=ChromeUser)
  77. @app.route('/crawl/proxy/<scheme>/fetch', methods=['GET'])
  78. @auth.login_required
  79. def get_proxy(scheme):
  80. # logger.info(f'[访问ip]{request.remote_addr}, class:{scheduler_class_name}')
  81. pk = request.args.get('pk') # 代理类型:1=部署lua下载器节点类 2=python专用代理
  82. result = {}
  83. try:
  84. if scheme == 'http':
  85. proxies = httpProxyPool.proxies(pk=pk)
  86. elif scheme == 'socks5':
  87. proxies = socks5ProxyPool.proxies(pk=pk)
  88. else:
  89. abort(404)
  90. # logger.debug(f'[调用{scheme}代理]{proxies}')
  91. if proxies is not None:
  92. result.update(proxies)
  93. except (KeyError, IndexError):
  94. pass
  95. return jsonify(data=result)
  96. @app.route('/crawl/proxy/query', methods=['GET'])
  97. @auth.login_required
  98. def show_proxy():
  99. pk = request.args.get('pk')
  100. socks_pool = socks5ProxyPool.get_proxy_pool(pk=pk)
  101. http_pool = httpProxyPool.get_proxy_pool(pk=pk)
  102. pool = [*socks_pool, *http_pool]
  103. return jsonify(data=pool)
  104. @app.route('/crawl/proxy/getips', methods=['GET'])
  105. @auth.login_required
  106. def show_proxy_ips():
  107. pk = request.args.get('pk')
  108. socks_ips = socks5ProxyPool.get_all_proxy_ip('socks5', pk=pk)
  109. http_ips = httpProxyPool.get_all_proxy_ip('http', pk=pk)
  110. return jsonify(data={'socks': socks_ips, 'http': http_ips})
  111. @app.route('/upload/data/<scheduler_class_name>/<table>', methods=['POST'])
  112. @auth.login_required
  113. def upload_data(scheduler_class_name, table):
  114. data_json = request.json
  115. logger.info(f"[接收数据]{data_json}")
  116. try:
  117. scheduler_class = globals()[scheduler_class_name]
  118. scheduler_class.save_data(table, data_json)
  119. return 'success'
  120. except KeyError:
  121. return 'failure'
  122. @app.route('/crawl/<scheduler_class_name>/task/fetch', methods=['GET'])
  123. def get_crawl_task(scheduler_class_name):
  124. task = {}
  125. try:
  126. scheduler_class = globals()[scheduler_class_name]
  127. result = scheduler_class.get_crawl_task()
  128. if result is not None:
  129. task = result
  130. except KeyError:
  131. pass
  132. return jsonify(data=task)
  133. @app.route('/crawl/<scheduler_class_name>/task/total', methods=['GET'])
  134. def get_crawl_task_total(scheduler_class_name):
  135. total = {'total': 0}
  136. try:
  137. scheduler_class = globals()[scheduler_class_name]
  138. total.update({'total': scheduler_class.task_total})
  139. except KeyError:
  140. pass
  141. return jsonify(data=total)
  142. @app.route('/competing_goods/account/fetch', methods=['GET'])
  143. @auth.login_required
  144. def competing_goods_account_lock():
  145. req_ip = request.remote_addr
  146. site = request.args.get('site')
  147. crawl_type = request.args.get('crawl_type')
  148. result = accountManagePool.lock_account(site, crawl_type, req_ip)
  149. return jsonify(data=result)
  150. @app.route('/competing_goods/account/release', methods=['GET'])
  151. @auth.login_required
  152. def competing_goods_account_release():
  153. req_ip = request.remote_addr
  154. uid = request.args.get('uid')
  155. crawl_type = request.args.get('crawl_type')
  156. if uid in [None, '']:
  157. abort(404) # Unauthorized 未授权
  158. res = accountManagePool.release_account(uid, crawl_type, req_ip)
  159. return jsonify(data=res)