routes.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. from flask import Flask, request, jsonify, abort
  2. from flask_httpauth import HTTPBasicAuth
  3. from werkzeug.security import generate_password_hash, check_password_hash
  4. from common.databases import mongo_table
  5. from common.log import logger
  6. from services import (
  7. accountManagePool,
  8. get_base_url
  9. )
  10. '''以下模块以动态方式加载入全局变量,请勿删除'''
  11. try:
  12. from services import zbytb
  13. from services import ybw
  14. # from services import nmpa
  15. except ImportError as e:
  16. print(f"缺少全局变量, 原因:{e.args}")
  17. app = Flask(__name__)
  18. '''认证方式'''
  19. auth = HTTPBasicAuth()
  20. '''chrome代理状态记录'''
  21. ChromeUser: dict = {}
  22. '''用户表'''
  23. Users = mongo_table('py_spider', 'spider_scheduler_auth')
  24. @auth.verify_password
  25. def verify_password(username, password):
  26. item = Users.find_one({'username': username})
  27. if item is not None:
  28. user = {item['username']: generate_password_hash(item['password'])}
  29. if username in user and check_password_hash(user.get(username), password):
  30. return username
  31. return None
  32. @app.route('/')
  33. @auth.login_required
  34. def index():
  35. return ' Hello, {}!<br><br> <a href="{}">代理池使用情况</a>'.format(
  36. auth.username(),
  37. get_base_url() + '/crawl/proxy/query'
  38. )
  39. @app.route('/proxy', methods=['GET'])
  40. def chrome_proxy_plugin():
  41. global ChromeUser
  42. client = request.args.get('clientid')
  43. ip = request.remote_addr
  44. if client is None:
  45. return jsonify(data={})
  46. if client not in ChromeUser:
  47. ChromeUser.setdefault(client, {'chrome_use_proxy': True, 'ip': ip})
  48. else:
  49. config: dict = ChromeUser.get(client)
  50. config.update({'chrome_use_proxy': True})
  51. ChromeUser.update({client: config})
  52. logger.info(f"ChromeUser: {ChromeUser}")
  53. return jsonify(data=ChromeUser.get(client))
  54. @app.route('/proxy/test', methods=['GET'])
  55. def chrome_proxy_plugin_check():
  56. global ChromeUser
  57. client = request.args.get('clientid')
  58. if client is None or client not in ChromeUser:
  59. return 'false'
  60. else:
  61. config: dict = ChromeUser.get(client)
  62. if config.get('chrome_use_proxy'):
  63. config.update({'chrome_use_proxy': False})
  64. ChromeUser.update({client: config})
  65. return 'true'
  66. else:
  67. return 'false'
  68. @app.route('/proxy/user/show', methods=['GET'])
  69. @auth.login_required
  70. def show_chrome_proxy_plugin_user():
  71. return jsonify(data=ChromeUser)
  72. @app.route('/upload/data/<scheduler_class_name>/<table>', methods=['POST'])
  73. @auth.login_required
  74. def upload_data(scheduler_class_name, table):
  75. data_json = request.json
  76. logger.info(f"[接收数据]{data_json}")
  77. try:
  78. scheduler_class = globals()[scheduler_class_name]
  79. scheduler_class.save_data(table, data_json)
  80. return 'success'
  81. except KeyError:
  82. return 'failure'
  83. @app.route('/task/<scheduler_class_name>/fetch', methods=['GET'])
  84. def get_crawl_task(scheduler_class_name):
  85. task = {}
  86. try:
  87. scheduler_class = globals()[scheduler_class_name]
  88. result = scheduler_class.get_crawl_task()
  89. if result is not None:
  90. task = result
  91. except KeyError:
  92. pass
  93. return jsonify(data=task)
  94. @app.route('/task/<scheduler_class_name>/total', methods=['GET'])
  95. def get_crawl_task_total(scheduler_class_name):
  96. total = {'total': 0}
  97. try:
  98. scheduler_class = globals()[scheduler_class_name]
  99. total.update({'total': scheduler_class.task_total})
  100. except KeyError:
  101. pass
  102. return jsonify(data=total)
  103. @app.route('/competing_goods/account/fetch', methods=['GET'])
  104. @auth.login_required
  105. def competing_goods_account_lock():
  106. req_ip = request.remote_addr
  107. site = request.args.get('site')
  108. crawl_type = request.args.get('crawl_type')
  109. result = accountManagePool.lock_account(site, crawl_type, req_ip)
  110. return jsonify(data=result)
  111. @app.route('/competing_goods/account/release', methods=['GET'])
  112. @auth.login_required
  113. def competing_goods_account_release():
  114. req_ip = request.remote_addr
  115. uid = request.args.get('uid')
  116. crawl_type = request.args.get('crawl_type')
  117. if uid in [None, '']:
  118. abort(404) # Unauthorized 未授权
  119. res = accountManagePool.release_account(uid, crawl_type, req_ip)
  120. return jsonify(data=res)