1234567891011121314151617181920212223242526272829303132333435 |
- # -*- coding: utf-8 -*-
- """
- Created on 2025-07-31
- ---------
- @summary:
- """
- import re
- import requests
- from untils.get_imgcode import jy_ocr
- def ocr_captcha(headers, proxies=None, max_retries=3):
- with requests.session() as s:
- s.proxies = proxies if proxies is not None else {}
- src = re.compile("'src', '(.*?)'", flags=re.S) # src
- href = 'http://wssc.hubeigp.gov.cn/simple_captcha'
- code = ''
- for _ in range(max_retries):
- try:
- resp = s.get(href, headers=headers, timeout=30, verify=False)
- resp.raise_for_status()
- text = resp.content.decode()
- img_url = "http://wssc.hubeigp.gov.cn" + "".join(src.findall(text))
- img = s.get(img_url, headers=headers, timeout=30, verify=False)
- img.raise_for_status()
- code = jy_ocr(image=img.content)
- if code and len(code) == 6:
- return code, s.cookies.get_dict()
- except requests.exceptions.RequestException:
- break
- return code, s.cookies.get_dict()
|