Prechádzať zdrojové kódy

添加智普大模型图片识别

dzr 7 mesiacov pred
rodič
commit
07b9a12af6
4 zmenil súbory, kde vykonal 101 pridanie a 38 odobranie
  1. 3 3
      docker-compose.yml
  2. 1 0
      requirements.txt
  3. 50 35
      services/apis.py
  4. 47 0
      services/zhipu.py

+ 3 - 3
docker-compose.yml

@@ -1,6 +1,6 @@
 version: "3" # docker版本对应的docker-compose版本
 services:
-  server:
+  captcha:
     container_name: pycaptcha
     image: pycaptcha:latest
     volumes:
@@ -20,8 +20,8 @@ services:
     logging:
       driver: "json-file"
       options:
-        max-size: "200k"
-        max-file: "10"
+        max-size: "500M"
+        max-file: "1"
     deploy:
       resources:
         limits:

+ 1 - 0
requirements.txt

@@ -8,3 +8,4 @@ aiofiles==23.1.0
 python-multipart==0.0.6
 redis==3.5.3
 loguru
+zhipuai

+ 50 - 35
services/apis.py

@@ -23,33 +23,38 @@ from services.chaojiying import CJ
 from services.defults import FAKE_USERS_DB
 from services.limiter import limiter
 from services.utils import calculate
+from services.zhipu import ZhipuFileExtract
 
 images_router = APIRouter(prefix="/images")
 
 
-@images_router.post("/verify", summary="Ocr")
-async def simple_captcha(file: UploadFile = File(...)):
-    start = time.time()
-    img_bytes = await file.read()
-    ocr = ddddocr.DdddOcr(det=False, ocr=True, show_ad=False)
-    if img_bytes.startswith(b'data:image'):
-        src = img_bytes.decode()
+def prepare_image(file):
+    if file.startswith(b'data:image'):
+        src = file.decode()
         result = re.search("data:image/(?P<ext>.*?);base64,(?P<data>.*)", src, re.DOTALL)
         if result:
             # ext = result.groupdict().get("ext")
-            img_base64 = result.groupdict().get("data")
+            return result.groupdict().get("data")  # img_base64
         else:
-            raise Exception("Do not parse!")
-        verify_code = ocr.classification(img_base64)
+            raise TypeError("transform image type failed!")
     else:
-        verify_code = ocr.classification(img_bytes)
+        assert isinstance(file, bytes)
+        return file  # img_bytes
+
 
+@images_router.post("/verify", summary="Ocr")
+async def simple_captcha(file: UploadFile = File(...)):
+    start = time.time()
+    img_bytes = await file.read()
+    ocr = ddddocr.DdddOcr(det=False, ocr=True, show_ad=False)
+    image = prepare_image(img_bytes)
+    ret = ocr.classification(image)
     return {
         "msg": "success",
         "code": 0,
         "r": {
             "time": float("{:.2f}".format(time.time() - start)),
-            "code": verify_code
+            "code": ret
         }
     }
 
@@ -65,24 +70,14 @@ async def arithmetic_captcha(file: UploadFile = File(...)):
                           show_ad=False,
                           import_onnx_path=onnx_path,
                           charsets_path=charsets_path)
-    if img_bytes.startswith(b'data:image'):
-        src = img_bytes.decode()
-        result = re.search("data:image/(?P<ext>.*?);base64,(?P<data>.*)", src, re.DOTALL)
-        if result:
-            img_base64 = result.groupdict().get("data")
-        else:
-            raise Exception("Do not parse!")
-        verify_code = ocr.classification(img_base64)
-    else:
-        verify_code = ocr.classification(img_bytes)
-
-    verify_code = verify_code.replace("x", "*")
+    image = prepare_image(img_bytes)
+    ret = image.replace("x", "*")
     return {
         "msg": "success",
         "code": 0,
         "r": {
             "time": float("{:.2f}".format(time.time() - start)),
-            "code": calculate(verify_code)
+            "code": calculate(ret)
         }
     }
 
@@ -92,17 +87,13 @@ async def det_captcha(file: UploadFile = File(..., description="验证码图片"
     det = ddddocr.DdddOcr(det=True, show_ad=False)
     ocr = ddddocr.DdddOcr(ocr=True, show_ad=False)
     img_bytes = await file.read()
-    if img_bytes.startswith(b'data:image'):
-        src = img_bytes.decode()
-        result = re.search("data:image/(?P<ext>.*?);base64,(?P<data>.*)", src, re.DOTALL)
-        if result:
-            img_base64 = result.groupdict().get("data")
-        else:
-            raise Exception("Do not parse!")
-        poses = det.detection(img_base64=img_base64)
-        img_bytes = base64.b64decode(img_base64)
+    image = prepare_image(img_bytes)
+    if isinstance(image, bytes):
+        poses = det.detection(img_bytes=image)
     else:
-        poses = det.detection(img_bytes=img_bytes)
+        assert isinstance(image, str)
+        poses = det.detection(img_base64=image)
+        img_bytes = base64.b64decode(image)
 
     img_byte = io.BytesIO(img_bytes)
     file_array = np.frombuffer(img_byte.getbuffer(), np.uint8)
@@ -195,3 +186,27 @@ async def reset_limiter():
         "code": 0,
         "r": {}
     }
+
+
+@images_router.post("/verify_z", summary="Ocr")
+async def zhipu_ocr(file: UploadFile = File(...)):
+    start = time.time()
+    img_bytes = await file.read()
+    image = prepare_image(img_bytes)
+
+    client = ZhipuFileExtract(
+        api_key="4abf8f53d1daed90f8a03fc982899418.dyK8ivPgzm3M1Nx5"
+    )
+    ret = client.glm_4v_flash(
+        image,
+        text='这是一张100以内数学计算图片验证码,图片内容计算结果是什么?你只需要返回图中内容计算结果即可,不要解释,不要返回任何其它内容'
+    )
+
+    return {
+        "msg": "success",
+        "code": 0,
+        "r": {
+            "time": float("{:.2f}".format(time.time() - start)),
+            "code": ret
+        }
+    }

+ 47 - 0
services/zhipu.py

@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2024-10-31 
+---------
+@summary:  智普文件抽取服务
+---------
+@author: Dzr
+"""
+import base64
+
+from zhipuai import ZhipuAI
+
+
+class ZhipuFileExtract:
+
+    def __init__(self, api_key):
+        self._client = ZhipuAI(
+            api_key=api_key,
+            base_url="https://open.bigmodel.cn/api/paas/v4"
+        )
+
+    def glm_4v_flash(self, image: bytes, text=None):
+        if text is None:
+            text = "这是一张字体图片,图片内容是什么?你只需要返回图中内容即可,不要解释,不要返回任何其它内容"
+
+        img_base = base64.b64encode(image).decode('utf-8')
+        response = self._client.chat.completions.create(
+            model="glm-4v-flash",  # 填写需要调用的模型名称
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": img_base
+                            }
+                        },
+                        {
+                            "type": "text",
+                            "text": text
+                        }
+                    ]
+                }
+            ]
+        )
+        return response.choices[0].message.content