build_tools.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. from flask import Flask, request, jsonify, abort
  2. from flask_httpauth import HTTPBasicAuth
  3. from werkzeug.security import generate_password_hash, check_password_hash
  4. from common.databases import mongo_table
  5. from common.log import logger
  6. from services import (
  7. accountManagePool,
  8. get_base_url,
  9. socks5ProxyPool,
  10. httpProxyPool,
  11. )
  12. '''以下模块以动态方式加载入全局变量,请勿删除'''
  13. try:
  14. from services import zbytb
  15. from services import ybw
  16. # from services import nmpa
  17. from services import site_monitor
  18. except ImportError as e:
  19. print(f"缺少全局变量, 原因:{e.args}")
  20. app = Flask(__name__)
  21. '''认证方式'''
  22. auth = HTTPBasicAuth()
  23. '''chrome代理状态记录'''
  24. ChromeUser: dict = {}
  25. '''用户表'''
  26. Users = mongo_table('py_spider', 'spider_scheduler_auth')
  27. @auth.verify_password
  28. def verify_password(username, password):
  29. item = Users.find_one({'username': username})
  30. if item is not None:
  31. user = {
  32. item['username']: generate_password_hash(item['password'])
  33. }
  34. if username in user and check_password_hash(user.get(username), password):
  35. return username
  36. return None
  37. @app.route('/')
  38. @auth.login_required
  39. def index():
  40. return ' Hello, {}!<br><br> <a href="{}">代理池使用情况</a>'.format(
  41. auth.username(),
  42. get_base_url() + '/crawl/proxy/query'
  43. )
  44. @app.route('/proxy', methods=['GET'])
  45. def chrome_proxy_plugin():
  46. global ChromeUser
  47. client = request.args.get('clientid')
  48. ip = request.remote_addr
  49. if client is None:
  50. return jsonify(data={})
  51. if client not in ChromeUser:
  52. ChromeUser.setdefault(client, {'chrome_use_proxy': True, 'ip': ip})
  53. else:
  54. config: dict = ChromeUser.get(client)
  55. config.update({'chrome_use_proxy': True})
  56. ChromeUser.update({client: config})
  57. logger.info(f"ChromeUser: {ChromeUser}")
  58. return jsonify(data=ChromeUser.get(client))
  59. @app.route('/proxy/test', methods=['GET'])
  60. def chrome_proxy_plugin_check():
  61. global ChromeUser
  62. client = request.args.get('clientid')
  63. if client is None or client not in ChromeUser:
  64. return 'false'
  65. else:
  66. config: dict = ChromeUser.get(client)
  67. if config.get('chrome_use_proxy'):
  68. config.update({'chrome_use_proxy': False})
  69. ChromeUser.update({client: config})
  70. return 'true'
  71. else:
  72. return 'false'
  73. @app.route('/proxy/user/show', methods=['GET'])
  74. @auth.login_required
  75. def show_chrome_proxy_plugin_user():
  76. return jsonify(data=ChromeUser)
  77. @app.route('/crawl/proxy/<scheme>/fetch', methods=['GET'])
  78. @auth.login_required
  79. def get_proxy(scheme):
  80. # logger.info(f'[访问ip]{request.remote_addr}, class:{scheduler_class_name}')
  81. result = {}
  82. try:
  83. proxies = None
  84. if scheme == 'http':
  85. proxies = httpProxyPool.proxies()
  86. elif scheme == 'socks5':
  87. proxies = socks5ProxyPool.proxies()
  88. else:
  89. abort(404)
  90. logger.info(f'[调用{scheme}代理]{proxies}')
  91. if proxies is not None:
  92. result.update(proxies)
  93. except KeyError:
  94. pass
  95. return jsonify(data=result)
  96. @app.route('/crawl/proxy/query', methods=['GET'])
  97. @auth.login_required
  98. def show_proxy():
  99. socks_pool = socks5ProxyPool.get_proxy_pool()
  100. http_pool = httpProxyPool.get_proxy_pool()
  101. pool = [*socks_pool, *http_pool]
  102. return jsonify(data=pool)
  103. @app.route('/crawl/proxy/getips', methods=['GET'])
  104. @auth.login_required
  105. def show_proxy_ips():
  106. socks_ips = socks5ProxyPool.get_all_proxy_ip('socks5')
  107. http_ips = httpProxyPool.get_all_proxy_ip('http')
  108. ip_dict = {'socks': socks_ips, 'http': http_ips}
  109. return jsonify(data=ip_dict)
  110. @app.route('/upload/data/<scheduler_class_name>/<table>', methods=['POST'])
  111. @auth.login_required
  112. def upload_data(scheduler_class_name, table):
  113. data_json = request.json
  114. logger.info(f"[接收数据]{data_json}")
  115. try:
  116. scheduler_class = globals()[scheduler_class_name]
  117. scheduler_class.save_data(table, data_json)
  118. return 'success'
  119. except KeyError:
  120. return 'failure'
  121. @app.route('/crawl/<scheduler_class_name>/task/fetch', methods=['GET'])
  122. def get_crawl_task(scheduler_class_name):
  123. task = {}
  124. try:
  125. scheduler_class = globals()[scheduler_class_name]
  126. result = scheduler_class.get_crawl_task()
  127. if result is not None:
  128. task = result
  129. except KeyError:
  130. pass
  131. return jsonify(data=task)
  132. @app.route('/crawl/<scheduler_class_name>/task/total', methods=['GET'])
  133. def get_crawl_task_total(scheduler_class_name):
  134. total = {'total': 0}
  135. try:
  136. scheduler_class = globals()[scheduler_class_name]
  137. total.update({'total': scheduler_class.task_total})
  138. except KeyError:
  139. pass
  140. return jsonify(data=total)
  141. @app.route('/competing_goods/account/fetch', methods=['GET'])
  142. @auth.login_required
  143. def competing_goods_account_lock():
  144. req_ip = request.remote_addr
  145. site = request.args.get('site')
  146. crawl_type = request.args.get('crawl_type')
  147. result = accountManagePool.lock_account(site, crawl_type, req_ip)
  148. return jsonify(data=result)
  149. @app.route('/competing_goods/account/release', methods=['GET'])
  150. @auth.login_required
  151. def competing_goods_account_release():
  152. req_ip = request.remote_addr
  153. uid = request.args.get('uid')
  154. crawl_type = request.args.get('crawl_type')
  155. if uid in [None, '']:
  156. abort(404) # Unauthorized 未授权
  157. res = accountManagePool.release_account(uid, crawl_type, req_ip)
  158. return jsonify(data=res)
  159. # if __name__ == '__main__':
  160. # app.run(host='0.0.0.0', port=1405, debug=True, use_reloader=False)