浏览代码

添加账号池

dongzhaorui 1 年之前
父节点
当前提交
7ce5e99264
共有 2 个文件被更改,包括 61 次插入46 次删除
  1. 61 21
      qlm/source_qianlima.py
  2. 0 25
      qlm/utils/config_parms.py

+ 61 - 21
qlm/source_qianlima.py

@@ -8,21 +8,19 @@ import time
 
 import requests
 
-from utils.config_parms import *
+from utils.config_parms import (
+    account_pool,
+    area_dict,
+    city_dict,
+    province_dict,
+    channel_dict,
+    REQUEST_DATA_MAP
+)
 from utils.databases import mongo_table, redis_client
 from utils.log import logger
 from utils.sessions_521 import http_session_521
 from utils.tools import sha1
 
-qlm = mongo_table('qlm', 'data_merge')
-r = redis_client()
-redis_key = 'qianlima_2024'
-
-session = requests.session()
-proxies = {
-    'http': 'socks5://119.3.159.234:8860',
-    'https': 'socks5://119.3.159.234:8860',
-}
 
 '''
 https://search.vip.qianlima.com/index.html#?sortType=6&isSearchWord=1&tab_index=0
@@ -33,6 +31,20 @@ https://search.vip.qianlima.com/index.html#?sortType=6&isSearchWord=1&tab_index=
 4 = 审批项目
 '''
 
+qlm = mongo_table('qlm', 'data_merge')
+r = redis_client()
+redis_key = 'qianlima_2024'
+
+proxies = {
+    'http': 'socks5://119.3.159.234:8860',
+    'https': 'socks5://119.3.159.234:8860',
+}
+session = requests.session()
+
+account_id = 1  # 账号标识
+captcha_appear_times = 0  # 图形验证出现次数
+stop_use_account = False
+
 
 class AccountViolationRiskError(Exception):
     pass
@@ -59,48 +71,74 @@ def get_today_of_day(offset, fmt='%Y-%m-%d'):
     return date.strftime(fmt)
 
 
+def switch_account():
+    global account_id, stop_use_account
+
+    logger.info(f'切换账号...{account_id}')
+    if account_id < len(account_pool):
+        account_id += 1  # 切换账号
+    else:
+        account_id = 1  # 重置账号
+        stop_use_account = True
+
+
 def request(url, data, retries=5):
-    global session, cookies, proxies
+    global session, proxies, account_id, stop_use_account
+
     resp, msg = None, ''
     usages, usages_521 = 0, 1
     while usages < retries:
+        (_, account), = account_pool[account_id].items()
         request_params = {}
         request_params.setdefault('data', data)
-        request_params.setdefault('headers', headers)
-        request_params.setdefault('cookies', cookies)
+        request_params.setdefault('headers', account['headers'])
+        request_params.setdefault('cookies', account['cookies'])
         request_params.setdefault('proxies', proxies)
         request_params.setdefault('timeout', 60)
         try:
             resp = session.post(url, **request_params)
             if resp.status_code == 521:
                 while usages_521 < retries:
-                    success, _, cookies = http_session_521(session, url, headers, cookies, data=data, proxies=proxies)
+                    success, _, cookies = http_session_521(session, url,
+                                                           headers=account['headers'],
+                                                           cookies=account['cookies'],
+                                                           data=data,
+                                                           proxies=proxies)
                     if success:
                         break
                     msg = f'反爬破解失败,次数:{usages_521}'
                     logger.warning(msg)
                     time.sleep(1)
                     usages_521 += 1
+
                 usages += 1
+
+            elif resp.status_code == 429:
+                if stop_use_account:
+                    msg = f'访问频繁,图形验证,异常状态码:{resp.status_code}'
+                    logger.error(msg)
+                    logger.warning(resp.content.decode())
+                    break
+                else:
+                    switch_account()
+
             elif resp.status_code in [401, 403, 404]:
                 msg = f'账号登录已失效或封停,异常状态码:{resp.status_code}'
                 logger.error(msg)
                 break
-            elif resp.status_code in [429]:
-                msg = f'图形验证,异常状态码:{resp.status_code}'
-                logger.error(msg)
-                logger.warning(resp.content.decode())
-                break
+
             elif str(resp.status_code).startswith('4'):
                 msg = f'公网IP被封禁,异常状态码:{resp.status_code}'
                 logger.error(msg)
                 break
+
             else:
                 break
         except requests.RequestException as e:
             msg = f'访问失败,原因:{e.__class__.__name__}'
             logger.error(msg)
             usages += 1
+
     return resp, msg
 
 
@@ -125,8 +163,10 @@ def downloader(begin_date, end_date, category, address, page, page_size):
     data['numPerPage'] = page_size
     data = json.dumps(data)
 
+    # 请求资源响应自定义状态, 成功=success 失败=failure 停止=stop IP封停=disable等
+    request_status = 'failure'
+
     response, err = request(url, data)
-    request_status = 'failure'  # 资源请求结果状态, 成功=success 失败=failure 停止=stop 封停=disable
     if response is None:
         request_status = 'server_error'
         return request_status, err
@@ -192,7 +232,7 @@ def downloader(begin_date, end_date, category, address, page, page_size):
 
     if request_status in ['stop', 'success']:
         if page == 1:
-            logger.info(f'千里马 {begin_date} 发布 {row_count} 条数据')
+            logger.info(f'千里马 {begin_date} 网站发布 {row_count} 条数据')
         logger.info(f'入库 {len(results)} 条')
 
     return request_status, err

文件差异内容过多而无法显示
+ 0 - 25
qlm/utils/config_parms.py


部分文件因为文件数量过多而无法显示