8 月之前 · 013ab9cebc
--- a/zyjc/rgg/.DS_Store
+++ b/zyjc/rgg/.DS_Store
--- a/zyjc/rgg/__init__.py
+++ b/zyjc/rgg/__init__.py
@@ -0,0 +1,8 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on 2024-10-17 
			
 
				+---------
			
 
				+@summary:  
			
 
				+---------
			
 
				+
			
 
				+"""
			
--- a/zyjc/rgg/account.py
+++ b/zyjc/rgg/account.py
@@ -0,0 +1,109 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on 2024-10-10 
			
 
				+---------
			
 
				+@summary:  
			
 
				+---------
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from DrissionPage import ChromiumPage, ChromiumOptions
			
 
				+
			
 
				+account_pool = [
			
 
				+    ('DUDUDU101613', 'Admin330022'),
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def auto_login(username, password, headless=False, proxy=False, auto_quit=False):
			
 
				+    co = ChromiumOptions()
			
 
				+
			
 
				+    co.auto_port(tmp_path=f'./download/{username}')
			
 
				+    co.set_user_data_path(f'./chrome/{username}')
			
 
				+
			
 
				+    # 禁用密码保存弹窗
			
 
				+    co.set_argument('--disable-infobars')
			
 
				+    co.set_argument('--disable-extensions')
			
 
				+    co.set_argument('--disable-popup-blocking')
			
 
				+
			
 
				+    if proxy:
			
 
				+        proxies = {
			
 
				+            'https': 'socks5://27.54.248.242:8860',
			
 
				+            'http': 'socks5://27.54.248.242:8860'
			
 
				+        }
			
 
				+        co.set_argument('--proxy-server', value=proxies['https'])
			
 
				+    else:
			
 
				+        proxies = None
			
 
				+
			
 
				+    if headless:
			
 
				+        co.set_user_agent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36')
			
 
				+        co.set_argument('--headless', value='new')
			
 
				+        co.set_argument('--incognito')
			
 
				+        co.set_argument('--no-sandbox')
			
 
				+        co.set_argument('--disable-gpu')
			
 
				+        co.set_argument('--disable-dev-shm-usage')
			
 
				+
			
 
				+    page = ChromiumPage(addr_or_opts=co)
			
 
				+    try:
			
 
				+        targets = [
			
 
				+            'website-seo/v2/cm/getcatid',
			
 
				+            'rest/detail/alltypesdetail/detail',
			
 
				+            'rest/account/companySpace/checkNewUser'
			
 
				+        ]
			
 
				+        page.listen.start(targets=targets, res_type=['Document', 'XHR'])  # 开启路由监听
			
 
				+
			
 
				+        success = page.get('https://vip.qianlima.com/')  # 访问用户管理界面
			
 
				+        if not success:
			
 
				+            return
			
 
				+
			
 
				+        login = page.wait.ele_displayed('x://span[text()="登录"]', timeout=5)
			
 
				+        if login:
			
 
				+            page.ele('x://input[@name="username"]').input(username, clear=True)
			
 
				+            page.ele('x://input[@name="password"]').input(password, clear=True)
			
 
				+            page.ele('x://span[text()="登录"]/parent::*').click()
			
 
				+
			
 
				+        loaded = page.wait.ele_displayed(f'x://p[contains(text(), "{username}")]')  # 等待页面加载完成
			
 
				+        if not loaded:
			
 
				+            print(f'登录失败>{username}')
			
 
				+            return
			
 
				+
			
 
				+        # page.get('http://www.qianlima.com/zb/detail/20241016_454396207.html')
			
 
				+
			
 
				+        packet = page.listen.wait()
			
 
				+        root = Path(__file__).parent
			
 
				+        if not (root / 'account').exists():
			
 
				+            (root / 'account').mkdir(exist_ok=True)
			
 
				+
			
 
				+        file = (root / f'account/{username}.json').absolute()
			
 
				+        with open(file, 'w') as f:
			
 
				+            print(packet.url)  # 打印数据包url
			
 
				+            # print(packet.response.body)
			
 
				+            headers = dict(packet.request.headers)
			
 
				+            print(f'** headers ** \n{json.dumps(headers, indent=4)}')
			
 
				+            cookies = page.cookies(as_dict=True)
			
 
				+            print(f'** cookies ** \n{json.dumps(cookies, indent=4)}')
			
 
				+            user = {
			
 
				+                'cookies': cookies,
			
 
				+                'headers': headers,
			
 
				+                'proxies': proxies
			
 
				+            }
			
 
				+            f.write(json.dumps(user, indent=4))
			
 
				+            if not auto_quit:
			
 
				+                f.flush()
			
 
				+                while True:
			
 
				+                    if input("退出>"):
			
 
				+                        break
			
 
				+
			
 
				+    except KeyboardInterrupt:
			
 
				+        pass
			
 
				+
			
 
				+    finally:
			
 
				+        page.quit()
			
 
				+        print('关闭浏览器')
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    for username, password in account_pool:
			
 
				+        auto_login(username, password, proxy=True, auto_quit=True, headless=True)
			
--- a/zyjc/rgg/backup/bloomfilter.f
+++ b/zyjc/rgg/backup/bloomfilter.f
--- a/zyjc/rgg/clean_html.py
+++ b/zyjc/rgg/clean_html.py
@@ -0,0 +1,177 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import re
			
 
				+
			
 
				+__all__ = ['cleaner']
			
 
				+
			
 
				+'''独立元素'''
			
 
				+INDEPENDENT_TAGS = {
			
 
				+    '<head>[\s\S]*?</head>': '',
			
 
				+    '<html>|<html [^>]*>|</html>': '',
			
 
				+    '<body>|<body [^>]*>|</body>': '',
			
 
				+    '<meta[^<>]*>|<meta [^<>]*>|<meta[^<>]*>[\s\S]*?</meta>|</meta>': '',  # 元数据
			
 
				+    '&(nbsp|e[mn]sp|thinsp|zwn?j|#13);': '',  # 空格
			
 
				+    '\\xa0|\\u3000': '',  # 空格
			
 
				+    '<!--[\s\S]*?-->': '',  # 注释
			
 
				+    '<style[^<>]*>[\s\S]*?</style>': '',  # 样式
			
 
				+    '<script[^<>]*>[\s\S]*?</script>': '',  # JavaScript
			
 
				+    '<input>': '',  # 输入框
			
 
				+    '</input>': '',  # 输入框
			
 
				+    '<img[^>]*>': '<br>',  # 图片
			
 
				+}
			
 
				+'''行内元素'''
			
 
				+INLINE_TAGS = {
			
 
				+    '<a>|<a [^>]*>|</a>': '',  # 超链接
			
 
				+    '<link>|<link [^>]*>|</link>': '',  # 超链接
			
 
				+    '<span>|<span [^>]*>|</span>': '',  # span
			
 
				+    '<label>|<label [^>]*>|</label>': '<br>',  # label
			
 
				+    '<font>|<font [^>]*>|</font>': '',  # font
			
 
				+    'data:image(.*?) ': '',  # 图片base64
			
 
				+}
			
 
				+'''块级元素'''
			
 
				+BLOCK_TAGS = {
			
 
				+    '<div>\s*?</div>': '',
			
 
				+    '<h[1-6][^>]*>|</h[1-6]>': '',  # 标题
			
 
				+    '<p>|<p [^>]*>': '<br>',  # 段落
			
 
				+    '</p>': '',  # 段落
			
 
				+    '<div>|<div [^>]*>': '<br>',  # 分割
			
 
				+    '</div>': '',  # 分割 division
			
 
				+    '<o:p>|<o:p [^>]*>|</o:p>': ''  # OFFICE微软WORD段落
			
 
				+}
			
 
				+'''其他'''
			
 
				+OTHER = {
			
 
				+    '<?xml[^>]*>|<?xml [^>]*>|<?xml:.*?>': '',
			
 
				+    '<epointform>': '',
			
 
				+    '<!doctype html>|<!doctype html [^>]*>': '',
			
 
				+    '【关闭】|关闭': '',
			
 
				+    '【打印】|打印本页': '',
			
 
				+    '【字体：[\s\S]*】': '',
			
 
				+    '文章来源：[\u4e00-\u9fa5]+': '',
			
 
				+    '浏览次数：.*[<]+': '',
			
 
				+    '（责任编辑：.*?）': '',
			
 
				+    '分享到[：]': '',
			
 
				+}
			
 
				+'''样式'''
			
 
				+CSS_STYLE = {
			
 
				+    'style="[\s\S]*?"|style ="[\s\S]*?"': '',
			
 
				+    'bgcolor="[\s\S]*?"|bgcolor ="[\s\S]*?"': '',
			
 
				+    'bordercolor="[\s\S]*?"|bordercolor ="[\s\S]*?"': '',
			
 
				+    'class="[\s\S]*?"|class ="[\s\S]*?"': '',
			
 
				+    'align="[\s\S]*?"|align ="[\s\S]*?"': '',
			
 
				+    'cellpadding="(\d+)"|cellspacing="(\d+)"': '',
			
 
				+}
			
 
				+'''空白符'''
			
 
				+BLANKS = {
			
 
				+    '\n\s*\n': '\n',
			
 
				+    '\s*\n\s*': '\n',
			
 
				+    '[^\S\n]': ' ',
			
 
				+    '\s+': ' ',
			
 
				+}
			
 
				+'''css标签集合'''
			
 
				+TAGS = {'table', 'tr', 'td', 'div', 'span', 'p'}
			
 
				+'''css属性集合'''
			
 
				+ATTRS = {'id', 'class', 'style', 'width'}
			
 
				+'''特殊样式的标签'''
			
 
				+SPECIAL_TAGS = {
			
 
				+    re.compile('(?i)<[^>]+style="display: none".*[^>]+>'): '<br>',
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _repair_tag():
			
 
				+    """异常的标签组合,用来替换非标准页面的标签"""
			
 
				+    _repairs = {}
			
 
				+    for tag in TAGS:
			
 
				+        for attr in ATTRS:
			
 
				+            key = '{}{}'.format(tag, attr)
			
 
				+            val = '{} {}'.format(tag, attr)
			
 
				+            _repairs[key] = val
			
 
				+    return _repairs
			
 
				+
			
 
				+
			
 
				+def _escape_character(html):
			
 
				+    """转义字符"""
			
 
				+    html = html.replace('&lt;', '<')
			
 
				+    html = html.replace('&gt;', '>')
			
 
				+    html = html.replace('&quot;', '"')
			
 
				+    html = html.replace('&amp;', '&')
			
 
				+    return html
			
 
				+
			
 
				+
			
 
				+def _lowercase_tag(html):
			
 
				+    """元素标签转成小写，不影响页面文本"""
			
 
				+    tags = re.findall("<[^>]+>", html)
			
 
				+    tag_sets = set(tags)
			
 
				+
			
 
				+    if len(tag_sets) > 10000:
			
 
				+        from bs4 import BeautifulSoup
			
 
				+        soup = BeautifulSoup(html, 'lxml')
			
 
				+        html = str(soup.body.next_element)
			
 
				+    else:
			
 
				+        for tag in tag_sets:
			
 
				+            html = html.replace(tag, str(tag).lower())
			
 
				+
			
 
				+    repair_tags = _repair_tag()  # 标签修复
			
 
				+    for err, right in repair_tags.items():
			
 
				+        html = html.replace(err, right)
			
 
				+
			
 
				+    return html
			
 
				+
			
 
				+
			
 
				+def _clear_special_tag(html):
			
 
				+    """删除特殊元素标签"""
			
 
				+    for tag, repl in SPECIAL_TAGS.items():
			
 
				+        html = tag.sub(repl, html)
			
 
				+    return html
			
 
				+
			
 
				+
			
 
				+def _clear_input_tag(html, display=False):
			
 
				+    """提取value值，替换input标签"""
			
 
				+    if not display:
			
 
				+        html = html.replace('<input', '<input style="border-color: transparent;"')  # 不显示输入框边框
			
 
				+
			
 
				+    tag = re.compile(r'<input .*?>', re.S)
			
 
				+    value = re.compile(r'value=["|\'](.*?)["|\']')
			
 
				+
			
 
				+    lst = re.findall(tag, html) or []
			
 
				+    for ipt in lst:
			
 
				+        val = re.findall(value, ipt)
			
 
				+        if val and 'hidden' not in ipt and 'hide' not in ipt and 'display: none' not in ipt:
			
 
				+            html = html.replace(ipt, val[0])
			
 
				+    return html
			
 
				+
			
 
				+
			
 
				+def cleaner(html, special=None, completely=False, del_tag=False, **kwargs):
			
 
				+    """
			
 
				+    源码清洗
			
 
				+
			
 
				+    :param html: 清洗的页面
			
 
				+    :param special: 额外指定页面清洗规则
			
 
				+    :param completely: 是否完全清洗页面
			
 
				+    :param del_tag: 删除标签
			
 
				+    :return: 页面源码
			
 
				+    """
			
 
				+    special = set() if special is None else special
			
 
				+    OTHER.update(special)
			
 
				+    remove_tags = {
			
 
				+        **INDEPENDENT_TAGS,
			
 
				+        **INLINE_TAGS,
			
 
				+        **BLOCK_TAGS,
			
 
				+        **OTHER,
			
 
				+        **CSS_STYLE,
			
 
				+        **BLANKS,
			
 
				+    }
			
 
				+
			
 
				+    html = _lowercase_tag(html)
			
 
				+    if del_tag:
			
 
				+        html = _clear_special_tag(html)
			
 
				+
			
 
				+    for tag, repl in remove_tags.items():
			
 
				+        html = re.sub(tag, repl, html)
			
 
				+
			
 
				+    if completely:
			
 
				+        html = re.sub(r'<canvas[^<>]*>[\s\S]*?</canvas>', '', html)  # 画布
			
 
				+        html = re.sub(r'<iframe[^<>]*>[\s\S]*?</iframe>', '', html)  # 内框架
			
 
				+        html = re.sub('<([^<>\u4e00-\u9fa5]|微软雅黑|宋体|仿宋)+>', '', html)
			
 
				+
			
 
				+    html = _escape_character(html)
			
 
				+    html = _clear_input_tag(html, **kwargs)
			
 
				+    return html
			
--- a/zyjc/rgg/log.py
+++ b/zyjc/rgg/log.py
@@ -0,0 +1,14 @@
 
				+from pathlib import Path
			
 
				+
			
 
				+from loguru import logger
			
 
				+
			
 
				+_absolute = Path(__file__).absolute().parent.parent
			
 
				+_log_path = (_absolute / 'logs/log_{time:YYYYMMDD}.log').resolve()
			
 
				+logger.add(
			
 
				+    _log_path,
			
 
				+    format='{time:YYYY-MM-DD HH:mm:ss} - {level} - {thread.name} - {name}:{function}:{line} - {message}',
			
 
				+    level='INFO',
			
 
				+    rotation='00:00',
			
 
				+    retention='1 week',
			
 
				+    encoding='utf-8',
			
 
				+)
			
--- a/zyjc/rgg/net.py
+++ b/zyjc/rgg/net.py
@@ -0,0 +1,270 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on 2024-10-10 
			
 
				+---------
			
 
				+@summary:  千里马详情页专用下载器
			
 
				+---------
			
 
				+
			
 
				+"""
			
 
				+import copy
			
 
				+import functools
			
 
				+from datetime import datetime
			
 
				+
			
 
				+import execjs
			
 
				+import requests
			
 
				+
			
 
				+from rgg.log import logger
			
 
				+
			
 
				+_cookies = {}
			
 
				+_headers = {}
			
 
				+_proxies = None
			
 
				+
			
 
				+
			
 
				+def _account_supervision(func):
			
 
				+    @functools.wraps(func)
			
 
				+    def wrapper(*args, **kwargs):
			
 
				+        err = None
			
 
				+        for _ in range(3):
			
 
				+            try:
			
 
				+                return func(*args, **kwargs)
			
 
				+            except requests.exceptions.RequestException as e:
			
 
				+                err = e
			
 
				+
			
 
				+            except AssertionError:
			
 
				+                logger.error('账号异常')
			
 
				+                send_wechat_warning('浙移集成|访问失败|账号异常')
			
 
				+                return
			
 
				+
			
 
				+        if err is not None:
			
 
				+            # logger.exception(f'账号异常,原因:{err}')
			
 
				+            raise err
			
 
				+
			
 
				+    return wrapper
			
 
				+
			
 
				+
			
 
				+def set_cookies(ck):
			
 
				+    global _cookies
			
 
				+    _cookies = ck
			
 
				+
			
 
				+
			
 
				+def set_headers(h):
			
 
				+    global _headers
			
 
				+    _headers = h
			
 
				+
			
 
				+
			
 
				+def set_proxies(p):
			
 
				+    global _proxies
			
 
				+    _proxies = p
			
 
				+
			
 
				+
			
 
				+def get_proxies(scheme=None):
			
 
				+    global _proxies
			
 
				+    if _proxies is None:
			
 
				+        return
			
 
				+
			
 
				+    return _proxies if scheme is None else (_proxies or {}).get(scheme).replace('socks5://', '')
			
 
				+
			
 
				+
			
 
				+def _extract_cid(href):
			
 
				+    script = '''
			
 
				+    function extractCid(url) {
			
 
				+        if(url.indexOf('/zb/detail') != -1){
			
 
				+            var cidArr = url.split('_');
			
 
				+            if (cidArr.length > 1) {
			
 
				+                var cid = cidArr[1].replace('.html', '');
			
 
				+                if (cid.indexOf('-') != -1) {
			
 
				+                    cid = cid.split("-")[1];
			
 
				+                }
			
 
				+                return cid
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        if (url.indexOf('-') != -1) {
			
 
				+            t = url.lastIndexOf("-")
			
 
				+            n = url.substring(t + 1)
			
 
				+            cid = n.split(".html")[0]
			
 
				+            return cid
			
 
				+        }
			
 
				+        
			
 
				+    }
			
 
				+    '''
			
 
				+    ctx = execjs.compile(script)
			
 
				+    result = ctx.call('extractCid', href)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def _extract_referer(href, cid):
			
 
				+    global _cookies, _proxies
			
 
				+    href = str(href).replace('http:', 'https:')
			
 
				+
			
 
				+    url = 'https://www.qianlima.com/website-seo/v2/cm/getcatid/' + cid
			
 
				+    headers = {
			
 
				+        'Accept': '*/*',
			
 
				+        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,sq;q=0.7',
			
 
				+        'Cache-Control': 'no-cache',
			
 
				+        'Connection': 'keep-alive',
			
 
				+        'Pragma': 'no-cache',
			
 
				+        'Referer': href,
			
 
				+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
			
 
				+        'X-Requested-With': 'XMLHttpRequest',
			
 
				+    }
			
 
				+    requests_params = dict(headers=headers, cookies=_cookies, proxies=_proxies)
			
 
				+    response = requests.get(url, timeout=10, **requests_params)
			
 
				+    assert response.status_code == 200
			
 
				+    text = response.content.decode()
			
 
				+
			
 
				+    script = '''
			
 
				+    function extractDetailUrl(cid, dataStr) {
			
 
				+        var data = JSON.parse(dataStr)
			
 
				+        var catId = data.data;
			
 
				+        var pageName;
			
 
				+        switch (catId) {
			
 
				+            case 301:
			
 
				+            case 601:
			
 
				+                pageName = "tenderDetail.html";
			
 
				+                break;
			
 
				+            case 202:
			
 
				+                pageName = "projectDetail.html";
			
 
				+                break;
			
 
				+            case 201:
			
 
				+                pageName = "tenderDetail.html";
			
 
				+                break;
			
 
				+            case 101:
			
 
				+                pageName = "projectDetail.html";
			
 
				+                break;
			
 
				+            default:
			
 
				+                pageName = "tenderDetail.html";
			
 
				+                break;
			
 
				+        }
			
 
				+        return 'https://detail.vip.qianlima.com/' + pageName + '?id=' + cid;
			
 
				+    }
			
 
				+    '''
			
 
				+    ctx = execjs.compile(script)
			
 
				+    result = ctx.call('extractDetailUrl', cid, text)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def _download_detail(href, referer=False, timeout=10):
			
 
				+    global _cookies, _headers, _proxies
			
 
				+    headers = copy.deepcopy(_headers)
			
 
				+    cid = _extract_cid(href)
			
 
				+    if not cid:
			
 
				+        raise ValueError('cid is not exist')
			
 
				+
			
 
				+    url = 'https://detail.vip.qianlima.com/rest/detail/alltypesdetail/detail/' + cid
			
 
				+    if referer:
			
 
				+        referer = _extract_referer(href, cid)
			
 
				+        headers['Referer'] = referer
			
 
				+
			
 
				+    requests_params = dict(headers=headers, cookies=_cookies, proxies=_proxies)
			
 
				+    response = requests.post(url, timeout=timeout, **requests_params)
			
 
				+    assert response.status_code == 200
			
 
				+    result = response.json()
			
 
				+    data = result['data']
			
 
				+    if not data:
			
 
				+        logger.warning(f'下载异常|{result}')
			
 
				+        return data
			
 
				+
			
 
				+    logger.info(f'下载成功|{href}')
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+@_account_supervision
			
 
				+def download_html(href, **kwargs):
			
 
				+    result = _download_detail(href, **kwargs)
			
 
				+    if not result:
			
 
				+        return
			
 
				+
			
 
				+    return result['content']
			
 
				+
			
 
				+
			
 
				+@_account_supervision
			
 
				+def download_json(href, **kwargs):
			
 
				+    result = _download_detail(href, timeout=30, **kwargs)
			
 
				+    if result is None:
			
 
				+        return False
			
 
				+
			
 
				+    '''{"code":700053,"msg":"该条信息已被撤销,请重新检索","data":null}'''
			
 
				+    if 'code' in result and result['code'] == 700053:
			
 
				+        logger.warning(f'检索失败|{result}')
			
 
				+        return
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+@_account_supervision
			
 
				+def download_list(keywords, page, page_size, **kwargs):
			
 
				+    global _cookies, _headers, _proxies
			
 
				+    today = datetime.now().strftime('%Y-%m-%d')
			
 
				+    begin_time = kwargs.get('begin_time') or today
			
 
				+    end_time = kwargs.get('end_time') or today
			
 
				+
			
 
				+    url = 'https://search.vip.qianlima.com/rest/service/website/search/solr'
			
 
				+    data = {
			
 
				+        "keywords": keywords,  # 检索标题
			
 
				+        "timeType": 4,  # 自定义时间类型
			
 
				+        "beginTime": begin_time,
			
 
				+        "endTime": end_time,
			
 
				+        "filtermode": 2,
			
 
				+        "searchMode": 0,
			
 
				+        "currentPage": page,  # 页码
			
 
				+        "numPerPage": page_size,  # 每页最大条目数
			
 
				+        "sortType": 6,
			
 
				+        "allType": -1,
			
 
				+        "noticeSegmentTypeStr": "",
			
 
				+        "beginAmount": "",
			
 
				+        "endAmount": "",
			
 
				+        "purchasingUnitIdList": "",
			
 
				+        "threeClassifyTagStr": "",
			
 
				+        "fourLevelCategoryIdListStr": "",
			
 
				+        "threeLevelCategoryIdListStr": "",
			
 
				+        "levelId": "",
			
 
				+        "tab": 0,
			
 
				+        "searchDataType": 0,
			
 
				+        "types": "-1",
			
 
				+        "showContent": 1,
			
 
				+        "hasTenderTransferProject": 1,
			
 
				+        "newAreas": "",
			
 
				+        "hasChooseSortType": 1,
			
 
				+        "summaryType": 0
			
 
				+    }
			
 
				+    response = requests.post(
			
 
				+        url,
			
 
				+        cookies=_cookies,
			
 
				+        headers=_headers,
			
 
				+        json=data,
			
 
				+        timeout=60,
			
 
				+        proxies=_proxies
			
 
				+    )
			
 
				+    assert response.status_code == 200
			
 
				+    result = response.json()
			
 
				+
			
 
				+    try:
			
 
				+        result['data']['rowCount']
			
 
				+    except TypeError:
			
 
				+        logger.error(f'下载失败|{keywords}|第{page}页|{result}')
			
 
				+        return
			
 
				+
			
 
				+    lst = result['data']
			
 
				+    if not lst:
			
 
				+        logger.warning(f'数据异常|{keywords}|第{page}页|{result}')
			
 
				+        return
			
 
				+
			
 
				+    logger.debug(f'下载成功|{keywords}|第{page}页')
			
 
				+    return result['data']['data']
			
 
				+
			
 
				+
			
 
				+def send_wechat_warning(msg, send=True):
			
 
				+    markdown = f'千里马会员账号采集异常，请相关同事注意。'
			
 
				+    markdown += f'\n>异常详情:<font color=\"warning\">**{msg}**</font>'
			
 
				+
			
 
				+    if not send:
			
 
				+        logger.info(markdown)
			
 
				+        return
			
 
				+
			
 
				+    url = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=bf53d666-bfa7-4176-b3e2-2d4b9d8a3bea'
			
 
				+    headers_ = {'Content-Type': 'application/json'}
			
 
				+    json_data = {'msgtype': 'markdown', 'markdown': {'content': markdown}}
			
 
				+    request_params = dict(headers=headers_, json=json_data, timeout=10)
			
 
				+    response = requests.post(url, **request_params)
			
 
				+    logger.info(response.json())