Browse Source

添加zhipu大模型

dongzhaorui 5 tháng trước cách đây
mục cha
commit
7d215fbb8c
1 tập tin đã thay đổi với 66 bổ sung42 xóa
  1. 66 42
      FworkSpider/untils/get_imgcode.py

+ 66 - 42
FworkSpider/untils/get_imgcode.py

@@ -3,80 +3,101 @@ import requests
 import feapder.setting as setting
 
 __all__ = [
-    "swordfish_platform",
+    "jy_ocr",
     "chaojiying_platform",
     "chaojiying_report",
     "get_code",
     "get_code_det",
     "arithmetic_captcha",
+    "swordfish_platform"
 ]
 
-headers = {"accept": "application/json"}
+_headers = {"accept": "application/json"}
 
 
 def _pack_file(file):
     """包装验证码格式"""
     if isinstance(file, str) and file.startswith("data:image"):
-        img_file = {"file": file}
+        files = {"file": file}
     elif isinstance(file, bytes):
-        img_file = {"file": file}
+        files = {"file": file}
     else:
         with open(file, "rb") as f:
             img_bytes = f.read()
-        img_file = {"file": img_bytes}
-    return img_file
+        files = {"file": img_bytes}
+    return files
 
 
-def _simple_captcha(file):
+def _ocr_image(url, image):
+    files = _pack_file(image)
+    r = requests.post(url, headers=_headers, files=files, stream=True, timeout=10)
+    rp_json = r.json()
+    if "msg" in rp_json and "success" == rp_json["msg"]:
+        return str(rp_json["r"]["code"])
+    return None
+
+
+def _simple_captcha(image):
     """
     普通验证码
 
-    @param file: 验证码 - 可以是图片或者图片base64编码
+    @param image: 验证码 - 图片/base64图片编码
     @return:
     """
-    url = f"{setting.CAPTCHA_URL}/v1/images/verify"
-    files = _pack_file(file)
-    r = requests.post(url, headers=headers, files=files, stream=True, timeout=10)
-    rp_json = r.json()
-    if "msg" in rp_json and "success" == rp_json["msg"]:
-        return str(rp_json["r"]["code"])
-    return None
+    return _ocr_image(f"{setting.CAPTCHA_URL}/v1/images/verify", image)
 
 
-def _arithmetic_captcha(file):
+def _arithmetic_captcha(image):
     """算术验证码"""
-    url = f"{setting.CAPTCHA_URL}/v1/images/arithmetic"
-    files = _pack_file(file)
-    r = requests.post(url, headers=headers, files=files, stream=True, timeout=10)
-    json_resp = r.json()
-    if "msg" in json_resp and "success" == json_resp["msg"]:
-        return str(json_resp["r"]["code"])
-    return None
+    return _ocr_image(f"{setting.CAPTCHA_URL}/v1/images/arithmetic", image)
 
 
-def _get_click_verify_captcha(file):
+def _get_click_verify_captcha(image):
     """点触式验证码"""
     url = f"{setting.CAPTCHA_URL}/v1/images/verify_det"
-    files = _pack_file(file)
-    r = requests.post(url, headers=headers, files=files, stream=True, timeout=10)
+    files = _pack_file(image)
+    r = requests.post(url, headers=_headers, files=files, stream=True, timeout=10)
     return r.json()
 
 
-def swordfish_platform(file, mode="simple"):
+def _zhipu_ocr_captcha(image, text=None):
+    files = _pack_file(image)
+    if not text:
+        url = f"{setting.CAPTCHA_URL}/v1/images/verify_z"
+        params = None
+    else:
+        url = f"{setting.CAPTCHA_URL}/v1/images/verify_a"
+        params = {"text": text}
+
+    r = requests.post(url, headers=_headers, params=params, files=files, stream=True, timeout=10)
+    rp_json = r.json()
+    if "msg" in rp_json and "success" == rp_json["msg"]:
+        return str(rp_json["r"]["code"])
+    return None
+
+
+def jy_ocr(image, mode="simple", text=None):
     """剑鱼验证码识别平台"""
     if mode.lower() == "arithmetic":
-        return _arithmetic_captcha(file)
+        return _arithmetic_captcha(image)
     elif mode.lower() == "det":
-        return _get_click_verify_captcha(file)
+        return _get_click_verify_captcha(image)
+    elif mode.lower() == "zhipu":
+        return _zhipu_ocr_captcha(image, text)
     else:
-        return _simple_captcha(file)
+        return _simple_captcha(image)
+
 
+swordfish_platform = jy_ocr
 
-def chaojiying_platform(file, pic_type: int, spidercode=None):
+
+def chaojiying_platform(file, pic_type, spidercode=None):
     """
     超级鹰识别平台
 
     pic_type,详情查询地址: https://www.chaojiying.com/price.html
+    @param str|bytes file: 验证码图片
+    @param int pic_type: 验证码图片类型
     @param str spidercode: 爬虫代码
     """
     files = _pack_file(file)
@@ -84,7 +105,6 @@ def chaojiying_platform(file, pic_type: int, spidercode=None):
     if spidercode is not None:
         url = f"{setting.CAPTCHA_URL}/v1/images/discern?pic_type={pic_type}&jy_code={spidercode}"
 
-    headers = {'accept': 'application/json'}
     data = {
         'grant_type': '',
         'username': 'jianyu001',
@@ -93,7 +113,7 @@ def chaojiying_platform(file, pic_type: int, spidercode=None):
         'client_id': '',
         'client_secret': ''
     }
-    response = requests.post(url, headers=headers, data=data, files=files, timeout=10)
+    response = requests.post(url, headers=_headers, data=data, files=files, timeout=10)
     json_resp = response.json()
     '''code 返回0时,打码平台正常返回数据'''
     pic_str = json_resp["r"]["pic_str"]
@@ -103,8 +123,13 @@ def chaojiying_platform(file, pic_type: int, spidercode=None):
     return None, pic_id
 
 
-def chaojiying_report(pic_id: str):
-    """超级鹰平台识别验证码错误时,提交识别错误的验证码pic_id"""
+def chaojiying_report(pic_id):
+    """
+    超级鹰平台识别验证码错误时,提交识别错误的验证码pic_id
+
+    @param str pic_id: 超级鹰验证码识别图片id
+    @return:
+    """
     url = f"{setting.CAPTCHA_URL}/v1/images/report_err?pic_id={pic_id}"
     headers = {
         'accept': 'application/json',
@@ -126,14 +151,13 @@ def chaojiying_report(pic_id: str):
     return response.json()
 
 
-def get_code(file_path: str) -> dict:
-    return swordfish_platform(file_path) or {}
+def get_code(image):
+    return jy_ocr(image) or {}
 
 
-def get_code_det(image_bytes) -> dict:
-    return swordfish_platform(image_bytes, mode="det")
+def get_code_det(image):
+    return jy_ocr(image, mode="det")
 
 
-# 算术
-def arithmetic_captcha(image_stream):
-    return swordfish_platform(image_stream, mode="arithmetic")
+def arithmetic_captcha(image):
+    return jy_ocr(image, mode="arithmetic")