123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- from flask import Flask, request, jsonify, abort
- from flask_httpauth import HTTPBasicAuth
- from werkzeug.security import generate_password_hash, check_password_hash
- from common.databases import mongo_table
- from common.log import logger
- from services import (
- accountManagePool,
- get_base_url
- )
- '''以下模块以动态方式加载入全局变量,请勿删除'''
- try:
- from services import zbytb
- from services import ybw
- # from services import nmpa
- except ImportError as e:
- print(f"缺少全局变量, 原因:{e.args}")
- app = Flask(__name__)
- '''认证方式'''
- auth = HTTPBasicAuth()
- '''chrome代理状态记录'''
- ChromeUser: dict = {}
- '''用户表'''
- Users = mongo_table('py_spider', 'spider_scheduler_auth')
- @auth.verify_password
- def verify_password(username, password):
- item = Users.find_one({'username': username})
- if item is not None:
- user = {item['username']: generate_password_hash(item['password'])}
- if username in user and check_password_hash(user.get(username), password):
- return username
- return None
- @app.route('/')
- @auth.login_required
- def index():
- return ' Hello, {}!<br><br> <a href="{}">代理池使用情况</a>'.format(
- auth.username(),
- get_base_url() + '/crawl/proxy/query'
- )
- @app.route('/proxy', methods=['GET'])
- def chrome_proxy_plugin():
- global ChromeUser
- client = request.args.get('clientid')
- ip = request.remote_addr
- if client is None:
- return jsonify(data={})
- if client not in ChromeUser:
- ChromeUser.setdefault(client, {'chrome_use_proxy': True, 'ip': ip})
- else:
- config: dict = ChromeUser.get(client)
- config.update({'chrome_use_proxy': True})
- ChromeUser.update({client: config})
- logger.info(f"ChromeUser: {ChromeUser}")
- return jsonify(data=ChromeUser.get(client))
- @app.route('/proxy/test', methods=['GET'])
- def chrome_proxy_plugin_check():
- global ChromeUser
- client = request.args.get('clientid')
- if client is None or client not in ChromeUser:
- return 'false'
- else:
- config: dict = ChromeUser.get(client)
- if config.get('chrome_use_proxy'):
- config.update({'chrome_use_proxy': False})
- ChromeUser.update({client: config})
- return 'true'
- else:
- return 'false'
- @app.route('/proxy/user/show', methods=['GET'])
- @auth.login_required
- def show_chrome_proxy_plugin_user():
- return jsonify(data=ChromeUser)
- @app.route('/upload/data/<scheduler_class_name>/<table>', methods=['POST'])
- @auth.login_required
- def upload_data(scheduler_class_name, table):
- data_json = request.json
- logger.info(f"[接收数据]{data_json}")
- try:
- scheduler_class = globals()[scheduler_class_name]
- scheduler_class.save_data(table, data_json)
- return 'success'
- except KeyError:
- return 'failure'
- @app.route('/task/<scheduler_class_name>/fetch', methods=['GET'])
- def get_crawl_task(scheduler_class_name):
- task = {}
- try:
- scheduler_class = globals()[scheduler_class_name]
- result = scheduler_class.get_crawl_task()
- if result is not None:
- task = result
- except KeyError:
- pass
- return jsonify(data=task)
- @app.route('/task/<scheduler_class_name>/total', methods=['GET'])
- def get_crawl_task_total(scheduler_class_name):
- total = {'total': 0}
- try:
- scheduler_class = globals()[scheduler_class_name]
- total.update({'total': scheduler_class.task_total})
- except KeyError:
- pass
- return jsonify(data=total)
- @app.route('/competing_goods/account/fetch', methods=['GET'])
- @auth.login_required
- def competing_goods_account_lock():
- req_ip = request.remote_addr
- site = request.args.get('site')
- crawl_type = request.args.get('crawl_type')
- result = accountManagePool.lock_account(site, crawl_type, req_ip)
- return jsonify(data=result)
- @app.route('/competing_goods/account/release', methods=['GET'])
- @auth.login_required
- def competing_goods_account_release():
- req_ip = request.remote_addr
- uid = request.args.get('uid')
- crawl_type = request.args.get('crawl_type')
- if uid in [None, '']:
- abort(404) # Unauthorized 未授权
- res = accountManagePool.release_account(uid, crawl_type, req_ip)
- return jsonify(data=res)
|