|
@@ -0,0 +1,427 @@
|
|
|
+import io
|
|
|
+import json
|
|
|
+import threading
|
|
|
+import time
|
|
|
+import uuid
|
|
|
+from collections import namedtuple
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+import execjs
|
|
|
+import requests
|
|
|
+from requests import Session
|
|
|
+from requests.utils import dict_from_cookiejar
|
|
|
+
|
|
|
+from utils.execptions import CrawlError
|
|
|
+from loguru import logger
|
|
|
+import setting
|
|
|
+
|
|
|
+LOCK = threading.RLock()
|
|
|
+
|
|
|
+_node_modules = (setting.ROOT_PATH / 'node_modules').resolve()
|
|
|
+JSON_LOGIN_COOKIE = (setting.ROOT_PATH / 'login_cookie.json').resolve()
|
|
|
+
|
|
|
+User = namedtuple('User', ['phone', 'passwd'])
|
|
|
+
|
|
|
+
|
|
|
+def _open_file():
|
|
|
+ try:
|
|
|
+ fp = open(JSON_LOGIN_COOKIE, encoding='utf-8')
|
|
|
+ except FileNotFoundError:
|
|
|
+ fp = open(JSON_LOGIN_COOKIE, 'w+', encoding='utf-8')
|
|
|
+ return fp
|
|
|
+
|
|
|
+
|
|
|
+def load_login_cookies(user_name: str):
|
|
|
+ fp = _open_file()
|
|
|
+ try:
|
|
|
+ cookies: dict = json.load(fp).get(user_name)
|
|
|
+ return cookies
|
|
|
+ except json.decoder.JSONDecodeError:
|
|
|
+ pass
|
|
|
+ fp.close()
|
|
|
+
|
|
|
+
|
|
|
+def save_login_cookies(user_name: str, login_cookie: dict):
|
|
|
+ with LOCK:
|
|
|
+ # 文件存在就读取,不存在就创建
|
|
|
+ fp = _open_file()
|
|
|
+ # 内容存在就加载到内存,不存在就设置为空字典
|
|
|
+ try:
|
|
|
+ user_maps: dict = json.load(fp)
|
|
|
+ except json.decoder.JSONDecodeError:
|
|
|
+ user_maps = {}
|
|
|
+ # print(user_maps)
|
|
|
+
|
|
|
+ if user_name not in user_maps:
|
|
|
+ user_maps.setdefault(user_name, login_cookie)
|
|
|
+ else:
|
|
|
+ cookies = {user_name: login_cookie}
|
|
|
+ user_maps.update(cookies)
|
|
|
+
|
|
|
+ wp = open(JSON_LOGIN_COOKIE, 'w+', encoding='utf-8')
|
|
|
+ wp.write(json.dumps(user_maps, indent=4))
|
|
|
+ fp.close()
|
|
|
+ wp.close()
|
|
|
+
|
|
|
+
|
|
|
+def update_login_cookies(user_name: str, update_val: dict):
|
|
|
+ """
|
|
|
+ 更新登录 cookie 内容
|
|
|
+
|
|
|
+ Args:
|
|
|
+ user_name: 账号
|
|
|
+ update_val: 需要更新的cookie内容
|
|
|
+
|
|
|
+ """
|
|
|
+ with LOCK:
|
|
|
+ fp = open(JSON_LOGIN_COOKIE, encoding='utf-8')
|
|
|
+ user_maps: dict = json.load(fp)
|
|
|
+ login_cookies: dict = user_maps.get(user_name)
|
|
|
+ if login_cookies is not None and len(update_val) > 0:
|
|
|
+ login_cookies.update(update_val)
|
|
|
+ user_login_info = {user_name: login_cookies}
|
|
|
+ user_maps.update(user_login_info)
|
|
|
+ wp = open(JSON_LOGIN_COOKIE, 'w+', encoding='utf-8')
|
|
|
+ wp.write(json.dumps(user_maps, indent=4))
|
|
|
+ wp.close()
|
|
|
+ fp.close()
|
|
|
+
|
|
|
+
|
|
|
+def convert1(plaintext):
|
|
|
+ """
|
|
|
+ AES CBC模式 Pkcs7填充 加密
|
|
|
+
|
|
|
+ @param plaintext: 加密文本
|
|
|
+ @return:
|
|
|
+ """
|
|
|
+ js_str = '''
|
|
|
+ const CryptoJS = require('crypto-js');
|
|
|
+ function convert1(txt) {
|
|
|
+ var a = '434D643932666D644B454E304E646C616535334D6435666E';
|
|
|
+ a = CryptoJS.enc.Hex.parse(a)
|
|
|
+ b = CryptoJS.enc.Hex.parse("30393138313633304D4D474C435A5059")
|
|
|
+ var enc = CryptoJS.AES.encrypt(txt, a, {
|
|
|
+ iv: b,
|
|
|
+ mode: CryptoJS.mode.CBC,
|
|
|
+ padding: CryptoJS.pad.Pkcs7
|
|
|
+ })
|
|
|
+ return enc.ciphertext.toString()
|
|
|
+ }
|
|
|
+ '''
|
|
|
+ ctx = execjs.compile(js_str, cwd=_node_modules)
|
|
|
+ return ctx.call('convert1', plaintext)
|
|
|
+
|
|
|
+
|
|
|
+def recognition_captcha(image_stream, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 验证码识别
|
|
|
+
|
|
|
+ @param image_stream: 验证码图片流
|
|
|
+ @param proxies: 代理
|
|
|
+ @param timeout: 超时时间
|
|
|
+ @return:
|
|
|
+ """
|
|
|
+ url = "http://pycaptcha.spdata.jianyu360.com/v1/images/verify"
|
|
|
+ img_headers = {'accept': 'application/json'}
|
|
|
+ image_file = {'file': image_stream}
|
|
|
+ r = requests.post(url,
|
|
|
+ headers=img_headers,
|
|
|
+ files=image_file,
|
|
|
+ stream=True,
|
|
|
+ proxies=proxies,
|
|
|
+ timeout=timeout)
|
|
|
+ json_resp = r.json()
|
|
|
+ if "msg" in json_resp and "success" == json_resp["msg"]:
|
|
|
+ return str(json_resp["r"]["code"]).upper()
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def download_captcha(image, session: Session, save_to_local=False, proxies=None, timeout=None):
|
|
|
+ """下载验证码"""
|
|
|
+ js_str = '''
|
|
|
+ function changeYzmL() {
|
|
|
+ var randomNum = ('000000' + Math.floor(Math.random() * 999999)).slice(-6);
|
|
|
+ var time = new Date();
|
|
|
+ var nowTime = String(time.getFullYear()) + String(time.getMonth() + 1) + String(time.getDate()) + String(
|
|
|
+ time.getHours()) + String(time.getMinutes()) + String(time.getSeconds());
|
|
|
+ return "https://www.chinabidding.cn/cblcn/member.Login/captcha?randomID=" + randomNum + "&t=" + nowTime
|
|
|
+ }
|
|
|
+ '''
|
|
|
+ ctx = execjs.compile(js_str)
|
|
|
+ url = ctx.call('changeYzmL')
|
|
|
+ headers = {
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
|
|
|
+ 'Referer': 'https://www.chinabidding.cn/public/2020/html/login.html?source=1',
|
|
|
+ }
|
|
|
+ r = session.get(url, headers=headers, stream=True, proxies=proxies, timeout=timeout)
|
|
|
+ stream = io.BytesIO()
|
|
|
+ stream.write(r.content)
|
|
|
+ if save_to_local:
|
|
|
+ with open(image, 'wb') as f:
|
|
|
+ f.write(r.content)
|
|
|
+ logger.info(f'[验证码]下载成功')
|
|
|
+ return stream
|
|
|
+
|
|
|
+
|
|
|
+def captcha(session, phone, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 验证码下载与识别
|
|
|
+ @param session: requests.session会话对象
|
|
|
+ @param phone: 验证码图片命名规则
|
|
|
+ @param proxies: 代理
|
|
|
+ @param timeout: 超时时间
|
|
|
+ @return:
|
|
|
+ """
|
|
|
+ name = f'{phone}.jpg'
|
|
|
+ img_stream = download_captcha(name, session, proxies=proxies, timeout=timeout)
|
|
|
+ code = recognition_captcha(img_stream.getvalue(), proxies=proxies, timeout=timeout)
|
|
|
+ logger.info(f'[验证码识别]{code}')
|
|
|
+ return convert1(code)
|
|
|
+
|
|
|
+
|
|
|
+def login_session(phone: str, password: str, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 登录会话
|
|
|
+
|
|
|
+ @param phone: 登录手机号
|
|
|
+ @param password: 登录密码
|
|
|
+ @param proxies: 代理
|
|
|
+ @param timeout: 超时时间
|
|
|
+ @return: requests.session()
|
|
|
+ """
|
|
|
+ logger.info('账号登录:{phone}', phone=phone)
|
|
|
+ session = requests.session()
|
|
|
+ # 生成浏览器身份id
|
|
|
+ gr_user_id = uuid.uuid4()
|
|
|
+ gr_session_id = uuid.uuid4()
|
|
|
+ login_ts = int(time.time())
|
|
|
+ session.cookies['gr_user_id'] = str(gr_user_id)
|
|
|
+ session.cookies['b5897e326c6777f3_gr_session_id'] = str(gr_session_id)
|
|
|
+ session.cookies[f'b5897e326c6777f3_gr_session_id_{gr_session_id}'] = 'true'
|
|
|
+ session.cookies['Hm_lvt_0bf7d2e4ce4104fa77e95b012f750771'] = str(login_ts)
|
|
|
+ session.cookies['Hm_lpvt_0bf7d2e4ce4104fa77e95b012f750771'] = str(login_ts)
|
|
|
+
|
|
|
+ # 验证码识别
|
|
|
+ yzm = captcha(session, phone, proxies=proxies, timeout=timeout)
|
|
|
+ '''
|
|
|
+ 1、下载验证码操作行为时,会导致服务器记录该次请求的客户端身份id;(服务器保存处理大约需要1s以上)
|
|
|
+ '''
|
|
|
+ time.sleep(2)
|
|
|
+ now_time_str = '''
|
|
|
+ function t() {
|
|
|
+ var time = new Date();
|
|
|
+ var nowTime = String(time.getFullYear()) + String(time.getMonth() + 1) + String(time.getDate()) + String(
|
|
|
+ time.getHours()) + String(time.getMinutes()) + String(time.getSeconds());
|
|
|
+ return nowTime
|
|
|
+ }
|
|
|
+ '''
|
|
|
+ now_time_ctx = execjs.compile(now_time_str)
|
|
|
+ now_time = now_time_ctx.call('t')
|
|
|
+ data = {
|
|
|
+ 'phone': convert1(phone),
|
|
|
+ 'password': convert1(password),
|
|
|
+ 'yzm': yzm,
|
|
|
+ 't': now_time
|
|
|
+ }
|
|
|
+ '''
|
|
|
+ 2、提交登录用户信息时,必须保证提交的会话与下载验证码的会话保持一致,否则服务器验证码无法通过验证
|
|
|
+ '''
|
|
|
+ headers = {
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
|
|
|
+ 'Referer': 'https://www.chinabidding.cn/public/2020/html/login.html?source=1',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
|
+ 'X-Requested-With': 'XMLHttpRequest',
|
|
|
+ }
|
|
|
+ url = 'https://www.chinabidding.cn/yuan/login/loginnew/login'
|
|
|
+ r = session.post(url, headers=headers, data=data, proxies=proxies, timeout=timeout)
|
|
|
+ assert r.status_code == 200
|
|
|
+ logger.info(f'登录信息: {r.json()}')
|
|
|
+ return r, session
|
|
|
+
|
|
|
+
|
|
|
+def login_session_by_cookies(cookies, url, headers, data=None, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 使用cookies获取 login session
|
|
|
+
|
|
|
+ @param dict cookies: 用户登录后的cookies
|
|
|
+ @param str url: 登录检查地址
|
|
|
+ @param dict headers: 请求头
|
|
|
+ @param data: 毫秒级时间戳
|
|
|
+ @param proxies: 代理
|
|
|
+ @param timeout: 超时时间
|
|
|
+ @return: 身份信息和请求结束的响应对象
|
|
|
+ """
|
|
|
+ session = requests.session()
|
|
|
+ r = session.post(url,
|
|
|
+ headers=headers,
|
|
|
+ data=data,
|
|
|
+ cookies=cookies,
|
|
|
+ proxies=proxies,
|
|
|
+ timeout=timeout)
|
|
|
+ assert r.status_code == 200
|
|
|
+ return r, session
|
|
|
+
|
|
|
+
|
|
|
+def login_check_and_get_meta(session=None, allow_output_log=True, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 检查账号登录状态和获取账号身份数据
|
|
|
+
|
|
|
+ @param Session session: 账号登录后的 session
|
|
|
+ @param allow_output_log: 是否打印日志
|
|
|
+ @param proxies: 代理
|
|
|
+ @param timeout: 超时时间
|
|
|
+ @return: 账号身份数据
|
|
|
+ """
|
|
|
+ url = "https://www.chinabidding.cn/cblcn/Home/logincheckAndGetMeta"
|
|
|
+ payload = f"t={int(round(time.time() * 1000))}"
|
|
|
+ headers = {
|
|
|
+ 'Host': 'www.chinabidding.cn',
|
|
|
+ 'sec-ch-ua': '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
|
|
|
+ 'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
|
+ 'X-Requested-With': 'XMLHttpRequest',
|
|
|
+ 'sec-ch-ua-mobile': '?0',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
|
|
|
+ 'sec-ch-ua-platform': '"Windows"',
|
|
|
+ 'Origin': 'https://www.chinabidding.cn',
|
|
|
+ 'Sec-Fetch-Site': 'same-origin',
|
|
|
+ 'Sec-Fetch-Mode': 'cors',
|
|
|
+ 'Sec-Fetch-Dest': 'empty',
|
|
|
+ 'Referer': 'https://www.chinabidding.cn/public/2020/html/login.html?source=1',
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
|
+ }
|
|
|
+ r = session.post(url, headers=headers, data=payload, proxies=proxies, timeout=timeout)
|
|
|
+ assert r.status_code == 200
|
|
|
+ member: dict = r.json().get('member')
|
|
|
+ if allow_output_log:
|
|
|
+ logger.info("账号信息:{}", json.dumps(member, indent=4, ensure_ascii=False))
|
|
|
+ return member, r
|
|
|
+
|
|
|
+
|
|
|
+def login_check(account=None, refer=None, allow_output_log=True, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 用户身份信息状态检查
|
|
|
+
|
|
|
+ Args:
|
|
|
+ account(str): 用户账号
|
|
|
+ refer: 引用页
|
|
|
+ allow_output_log: 是否打印日志
|
|
|
+ proxies: 代理
|
|
|
+ timeout: 超时时间
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 登录有效时返回False,登录无效时返回True
|
|
|
+ """
|
|
|
+ url = "https://www.chinabidding.cn/cblcn/Home/newLoginCheck"
|
|
|
+ ts = int(time.time())
|
|
|
+ ts2tms = int(round(ts * 1000))
|
|
|
+ payload = f"t={ts2tms}"
|
|
|
+ headers = {
|
|
|
+ 'Host': 'www.chinabidding.cn',
|
|
|
+ 'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="97", "Chromium";v="97"',
|
|
|
+ 'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
|
|
+ 'sec-ch-ua-mobile': '?0',
|
|
|
+ 'X-Requested-With': 'XMLHttpRequest',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
|
|
|
+ 'sec-ch-ua-platform': '"Windows"',
|
|
|
+ 'Origin': 'https://www.chinabidding.cn',
|
|
|
+ 'Sec-Fetch-Site': 'same-origin',
|
|
|
+ 'Sec-Fetch-Mode': 'cors',
|
|
|
+ 'Sec-Fetch-Dest': 'empty',
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
|
+ }
|
|
|
+ if refer is not None:
|
|
|
+ headers.update({'Referer': refer})
|
|
|
+
|
|
|
+ cookies = load_login_cookies(account)
|
|
|
+ if cookies is None:
|
|
|
+ '''没有该账号的cookies信息,请检查 login_cookie.json 配置文件'''
|
|
|
+ return True
|
|
|
+
|
|
|
+ ts = int(time.time())
|
|
|
+ r, session = login_session_by_cookies(cookies, url, headers, data=payload, proxies=proxies, timeout=timeout)
|
|
|
+
|
|
|
+ try:
|
|
|
+ member = r.json()
|
|
|
+ except json.decoder.JSONDecodeError:
|
|
|
+ raise CrawlError(code=10021, reason="系统繁忙,请等待一会儿,自动刷新。")
|
|
|
+
|
|
|
+ if allow_output_log:
|
|
|
+ logger.info("账号信息:{}", json.dumps(member, indent=4, ensure_ascii=False))
|
|
|
+
|
|
|
+ '''处理本地 cookies'''
|
|
|
+ login_cookies: dict = dict_from_cookiejar(r.cookies)
|
|
|
+ request_ts = dict(
|
|
|
+ # 上一次时间访问时间戳(秒)
|
|
|
+ Hm_lvt_0bf7d2e4ce4104fa77e95b012f750771=cookies.get("Hm_lpvt_0bf7d2e4ce4104fa77e95b012f750771",""),
|
|
|
+ # 当前访问时间戳(秒)
|
|
|
+ Hm_lpvt_0bf7d2e4ce4104fa77e95b012f750771=str(ts)
|
|
|
+ )
|
|
|
+ login_cookies.update(request_ts)
|
|
|
+ update_login_cookies(account, login_cookies)
|
|
|
+ if member is not None and len(member) > 1:
|
|
|
+ '''登录有效'''
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ '''登录失效'''
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
+def login_session_check(session, account, allow_output_log=True, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 账号登录状态
|
|
|
+
|
|
|
+ @param Session session: 登录后的用户 session
|
|
|
+ @param str account: 账号
|
|
|
+ @param allow_output_log: 是否打印日志
|
|
|
+ @param proxies: 代理
|
|
|
+ @param timeout: 超时时间
|
|
|
+ @return: 身份检查是否有效的布尔值
|
|
|
+ """
|
|
|
+ member, r = login_check_and_get_meta(session,
|
|
|
+ allow_output_log,
|
|
|
+ proxies=proxies,
|
|
|
+ timeout=timeout)
|
|
|
+ if member is not None and len(member) > 0:
|
|
|
+ login_cookies: dict = dict_from_cookiejar(r.cookies)
|
|
|
+ update_login_cookies(account, login_cookies)
|
|
|
+ '''身份有效'''
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ '''身份无效'''
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
+def login(phone, password, proxies=None, timeout=None):
|
|
|
+ """
|
|
|
+ 登录
|
|
|
+
|
|
|
+ @param str phone: 账号
|
|
|
+ @param str password: 密码
|
|
|
+ @param dict proxies: 代理
|
|
|
+ @param int|tuple timeout: 超时时间
|
|
|
+ @return: 登录会话和网络状态码
|
|
|
+ """
|
|
|
+ r, session = login_session(phone, password, proxies=proxies, timeout=timeout)
|
|
|
+ if r.json()['code'] == 200:
|
|
|
+ member, _ = login_check_and_get_meta(session)
|
|
|
+ login_cookies: dict = dict_from_cookiejar(session.cookies)
|
|
|
+ if member is not None:
|
|
|
+ record_id = str(member['record_id'])
|
|
|
+ login_meta = dict(
|
|
|
+ b5897e326c6777f3_gr_cs1=record_id,
|
|
|
+ b5897e326c6777f3_gr_last_sent_cs1=record_id,
|
|
|
+ b5897e326c6777f3_gr_last_sent_sid_with_cs1=login_cookies['b5897e326c6777f3_gr_session_id']
|
|
|
+ )
|
|
|
+ login_cookies.update(login_meta)
|
|
|
+ save_login_cookies(phone, login_cookies)
|
|
|
+ return session, 200
|
|
|
+ else:
|
|
|
+ '''
|
|
|
+ 514 IP限制
|
|
|
+ '''
|
|
|
+ logger.error(f'[登录失败]{r.json()["code"]}-{r.json()["msg"]},账号:{phone}')
|
|
|
+ if r.json()["code"] == 514:
|
|
|
+ time.sleep(300)
|
|
|
+ return requests.session(), int(r.json()["code"])
|