build_tools.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. from flask import Flask, request, jsonify, abort
  2. from flask_httpauth import HTTPBasicAuth
  3. from werkzeug.security import generate_password_hash, check_password_hash
  4. from common.databases import mongo_table
  5. from common.log import logger
  6. from services import (
  7. accountManagePool,
  8. get_base_url,
  9. socks5ProxyPool,
  10. httpProxyPool,
  11. )
  12. '''以下模块以动态方式加载入全局变量,请勿删除'''
  13. try:
  14. from services import zbytb
  15. from services import ybw
  16. # from services import nmpa
  17. from services import site_monitor
  18. except ImportError as e:
  19. print(f"缺少全局变量, 原因:{e.args}")
  20. app = Flask(__name__)
  21. '''认证方式'''
  22. auth = HTTPBasicAuth()
  23. '''chrome代理状态记录'''
  24. ChromeUser: dict = {}
  25. '''用户表'''
  26. Users = mongo_table('py_spider', 'spider_scheduler_auth')
  27. @auth.verify_password
  28. def verify_password(username, password):
  29. item = Users.find_one({'username': username})
  30. if item is not None:
  31. user = {
  32. item['username']: generate_password_hash(item['password'])
  33. }
  34. if username in user and check_password_hash(user.get(username), password):
  35. return username
  36. return None
  37. @app.route('/')
  38. @auth.login_required
  39. def index():
  40. return ' Hello, {}!<br><br> <a href="{}">代理池使用情况</a>'.format(
  41. auth.username(),
  42. get_base_url() + '/crawl/proxy/query'
  43. )
  44. @app.route('/proxy', methods=['GET'])
  45. def chrome_proxy_plugin():
  46. global ChromeUser
  47. client = request.args.get('clientid')
  48. ip = request.remote_addr
  49. if client is None:
  50. return jsonify(data={})
  51. if client not in ChromeUser:
  52. ChromeUser.setdefault(client, {'chrome_use_proxy': True, 'ip': ip})
  53. else:
  54. config: dict = ChromeUser.get(client)
  55. config.update({'chrome_use_proxy': True})
  56. ChromeUser.update({client: config})
  57. logger.info(f"ChromeUser: {ChromeUser}")
  58. return jsonify(data=ChromeUser.get(client))
  59. @app.route('/proxy/test', methods=['GET'])
  60. def chrome_proxy_plugin_check():
  61. global ChromeUser
  62. client = request.args.get('clientid')
  63. if client is None or client not in ChromeUser:
  64. return 'false'
  65. else:
  66. config: dict = ChromeUser.get(client)
  67. if config.get('chrome_use_proxy'):
  68. config.update({'chrome_use_proxy': False})
  69. ChromeUser.update({client: config})
  70. return 'true'
  71. else:
  72. return 'false'
  73. @app.route('/proxy/user/show', methods=['GET'])
  74. @auth.login_required
  75. def show_chrome_proxy_plugin_user():
  76. return jsonify(data=ChromeUser)
  77. @app.route('/crawl/proxy/<scheme>/fetch', methods=['GET'])
  78. @auth.login_required
  79. def get_proxy(scheme):
  80. # logger.info(f'[访问ip]{request.remote_addr}, class:{scheduler_class_name}')
  81. pk = request.args.get('pk') # 代理类型:1=部署lua下载器节点类 2=python专用代理
  82. result = {}
  83. proxies = None
  84. try:
  85. if scheme == 'http':
  86. proxies = httpProxyPool.proxies(pk=pk)
  87. elif scheme == 'socks5':
  88. proxies = socks5ProxyPool.proxies(pk=pk)
  89. else:
  90. abort(404)
  91. logger.info(f'[调用{scheme}代理]{proxies}')
  92. if proxies is not None:
  93. result.update(proxies)
  94. except (KeyError, IndexError):
  95. pass
  96. return jsonify(data=result)
  97. @app.route('/crawl/proxy/query', methods=['GET'])
  98. @auth.login_required
  99. def show_proxy():
  100. pk = request.args.get('pk')
  101. socks_pool = socks5ProxyPool.get_proxy_pool(pk=pk)
  102. http_pool = httpProxyPool.get_proxy_pool(pk=pk)
  103. pool = [*socks_pool, *http_pool]
  104. return jsonify(data=pool)
  105. @app.route('/crawl/proxy/getips', methods=['GET'])
  106. @auth.login_required
  107. def show_proxy_ips():
  108. pk = request.args.get('pk')
  109. socks_ips = socks5ProxyPool.get_all_proxy_ip('socks5', pk=pk)
  110. http_ips = httpProxyPool.get_all_proxy_ip('http', pk=pk)
  111. return jsonify(data={'socks': socks_ips, 'http': http_ips})
  112. @app.route('/upload/data/<scheduler_class_name>/<table>', methods=['POST'])
  113. @auth.login_required
  114. def upload_data(scheduler_class_name, table):
  115. data_json = request.json
  116. logger.info(f"[接收数据]{data_json}")
  117. try:
  118. scheduler_class = globals()[scheduler_class_name]
  119. scheduler_class.save_data(table, data_json)
  120. return 'success'
  121. except KeyError:
  122. return 'failure'
  123. @app.route('/crawl/<scheduler_class_name>/task/fetch', methods=['GET'])
  124. def get_crawl_task(scheduler_class_name):
  125. task = {}
  126. try:
  127. scheduler_class = globals()[scheduler_class_name]
  128. result = scheduler_class.get_crawl_task()
  129. if result is not None:
  130. task = result
  131. except KeyError:
  132. pass
  133. return jsonify(data=task)
  134. @app.route('/crawl/<scheduler_class_name>/task/total', methods=['GET'])
  135. def get_crawl_task_total(scheduler_class_name):
  136. total = {'total': 0}
  137. try:
  138. scheduler_class = globals()[scheduler_class_name]
  139. total.update({'total': scheduler_class.task_total})
  140. except KeyError:
  141. pass
  142. return jsonify(data=total)
  143. @app.route('/competing_goods/account/fetch', methods=['GET'])
  144. @auth.login_required
  145. def competing_goods_account_lock():
  146. req_ip = request.remote_addr
  147. site = request.args.get('site')
  148. crawl_type = request.args.get('crawl_type')
  149. result = accountManagePool.lock_account(site, crawl_type, req_ip)
  150. return jsonify(data=result)
  151. @app.route('/competing_goods/account/release', methods=['GET'])
  152. @auth.login_required
  153. def competing_goods_account_release():
  154. req_ip = request.remote_addr
  155. uid = request.args.get('uid')
  156. crawl_type = request.args.get('crawl_type')
  157. if uid in [None, '']:
  158. abort(404) # Unauthorized 未授权
  159. res = accountManagePool.release_account(uid, crawl_type, req_ip)
  160. return jsonify(data=res)
  161. # if __name__ == '__main__':
  162. # app.run(host='0.0.0.0', port=1405, debug=True, use_reloader=False)