Selaa lähdekoodia

切换新oss服务

dzr 1 päivä sitten
vanhempi
commit
0a51588c0c
3 muutettua tiedostoa jossa 45 lisäystä ja 38 poistoa
  1. 2 2
      lzz_theme/utils/aliyun.py
  2. 7 10
      lzz_theme/utils/attachment.py
  3. 36 26
      lzz_theme/zgydcgyzbw/zgyd_details.py

+ 2 - 2
lzz_theme/utils/aliyun.py

@@ -146,14 +146,14 @@ class JyOssClient:
             "stream": stream
         }
 
-        ret = {"error_msg": "附件上传错误", "error_code": -1}
+        ret = {"error_msg": "附件上传失败", "error_code": -1}
         for _ in range(retries):
             ret = self._oss_client.upload(args)
             if ret["error_code"] == 0:
                 return ret
 
         if err_show:
-            raise AttachmentError(reason=ret.get("error_msg") or "附件上传错误")
+            raise AttachmentError(reason=ret.get("error_msg") or "附件上传失败")
 
         return ret
 

+ 7 - 10
lzz_theme/utils/attachment.py

@@ -17,7 +17,7 @@ import tqdm
 import urllib3
 
 from utils.tools import *
-from utils.aliyun import AliYunService
+from utils.aliyun import JyOssClient
 
 
 urllib3.disable_warnings()
@@ -38,19 +38,18 @@ def clear_file_type_suffix(filename: str, filetype: str):
 class AttachmentDownloader:
 
     def __init__(self, max_retries=3):
-        self.dir_name = "file"
+        self._directory = "file"
 
         self._max_retries = max_retries
-        # self._oss = JyOssClient()
-        self._oss = AliYunService()
+        self._oss = JyOssClient()
 
     def create_file(self, filename, filetype):
-        os.makedirs(self.dir_name, mode=0o777, exist_ok=True)
+        os.makedirs(self._directory, mode=0o777, exist_ok=True)
         file = "{filename}.{filetype}".format(
             filename=get_sha1("{}_{}".format(filename, uuid.uuid4())),
             filetype=filetype
         )
-        return "{}/{}".format(self.dir_name, file)
+        return "{}/{}".format(self._directory, file)
 
     @staticmethod
     def clean_attachment(filepath):
@@ -212,8 +211,7 @@ class AttachmentDownloader:
                 attachment["fid"] = "{}.{}".format(fid, filetype)
                 attachment["size"] = self.getsize(stream)
                 attachment["url"] = "oss"
-                # self._oss.upload("file", attachment["fid"], stream)
-                self._oss.push_oss_from_stream(attachment["fid"], stream)
+                self._oss.upload("file", attachment["fid"], stream)
             except Exception as e:
                 logger.error(
                     "[{}]上传失败,原因:{}".format(filename, type(e).__name__)
@@ -264,8 +262,7 @@ class AttachmentDownloader:
                 attachment["size"] = self.getsize(file)
                 attachment["ftype"] = filetype
                 attachment["url"] = "oss"
-                # self._oss.upload("file", attachment["fid"], stream)
-                self._oss.push_oss_from_local(attachment["fid"], file)
+                self._oss.upload("file", attachment["fid"], stream)
             except Exception as e:
                 logger.error(
                     "[{}]上传失败,原因:{}".format(filename, type(e).__name__)

+ 36 - 26
lzz_theme/zgydcgyzbw/zgyd_details.py

@@ -11,7 +11,7 @@ import os
 sys.path.append(os.path.dirname(os.getcwd()))
 from utils.attachment import AttachmentDownloader
 from utils.tools import *
-from utils.aliyun import AliYunService
+from utils.aliyun import JyOssClient
 import requests
 import json
 import base64
@@ -101,19 +101,21 @@ class Details:
             res = requests.post(url, headers=headers, data=data, timeout=60, proxies=self.proxy, verify=False)
 
             file_list = res.json().get('data')
-
-            if file_list:
-
-                for info in file_list:
-                    file_name = info.get('filename')
-                    file_url = f"https://b2b.10086.cn/api-b2b/api-file/file/downloadFileOnAuth?authFlag={info.get('authFlag')}&fileId={info.get('fileId')}&fileUuid={info.get('uuid')}"
-                    file_type = extract_file_type(file_name, file_url)
-
-                    attachment = AttachmentDownloader().fetch_attachment(
-                        file_name=file_name, file_type=file_type, download_url=file_url,
-                        proxies=self.proxy)
-                    if attachment.__contains__("fid"):
-                        attachments[str(len(attachments) + 1)] = attachment
+            if not file_list:
+                return attachments
+
+            downloader = AttachmentDownloader()
+            for info in file_list:
+                file_name = info.get('filename')
+                file_url = f"https://b2b.10086.cn/api-b2b/api-file/file/downloadFileOnAuth?authFlag={info.get('authFlag')}&fileId={info.get('fileId')}&fileUuid={info.get('uuid')}"
+                file_type = extract_file_type(file_name, file_url)
+                attachment = downloader.fetch_attachment(
+                    file_name=file_name,
+                    file_type=file_type,
+                    download_url=file_url,
+                    proxies=self.proxy
+                )
+                attachments[str(len(attachments) + 1)] = attachment
         except:
             pass
 
@@ -122,22 +124,27 @@ class Details:
     def detail_get(self, response, item):
 
         detail_info = response.json().get('data')
-        html = detail_info.get('noticeContent')
+
+        attr2 = detail_info.get('uuid')
+        attachments = self.get_attachments(attr2)
+
         html_file = {}
+        html = detail_info.get('noticeContent')
         if len(html) > 100 and text_search(html).total == 0:
             stream = base64.b64decode(html)
-            fnm = item['title']
-            if len(fnm) > 20:
-                fnm = "附件"
+
+            fnm = "附件" if len(item['title']) > 20 else item['title']
             file = f"file/{fnm}.pdf"
             directory = os.path.dirname(file)
             if not os.path.exists(directory):
                 os.makedirs(directory)
+
             with open(file, 'wb') as f:
                 f.write(stream)
+
             content_hash = get_sha1(stream)
             fid = "{}.{}".format(content_hash, 'pdf')
-            AliYunService().push_oss_from_local(fid, file)
+            JyOssClient().upload("file", fid, stream)
             html_file = {
                 "filename": item['title'],
                 "org_url": item['href'],
@@ -146,24 +153,23 @@ class Details:
                 "ftype": "pdf",
                 "url": "oss"
             }
+
             try:
                 os.remove(file)
                 os.rmdir(file.replace('.pdf',''))
             except FileNotFoundError:
                 pass
-            html = "详情请访问原网页!"
 
-        item["contenthtml"] = html
+            html = "详情请访问原网页!"
 
-        attr2 = detail_info.get('uuid')
-        attachments = self.get_attachments(attr2)
         if html_file:
             attachments[str(len(attachments) + 1)] = html_file
+
+        item["contenthtml"] = html
         if attachments:
             item["projectinfo"] = {"attachments": attachments}
 
         item = format_fileds(item)
-
         try:
             self.zt_details.insert_one(item)
             logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
@@ -172,8 +178,12 @@ class Details:
 
     def fetch_request(self, item):
         request_params = item.get("request_params")
-        response = requests.post(url=item.get("parse_url"), headers=self.headers, json=request_params,
-                                proxies=self.proxy, timeout=(30, 60), verify=False)
+        response = requests.post(url=item.get("parse_url"),
+                                 headers=self.headers,
+                                 json=request_params,
+                                 proxies=self.proxy,
+                                 timeout=(30, 60),
+                                 verify=False)
 
         return response