Просмотр исходного кода

修复指纹验证失败导致采集进程异常中断错误

dzr 3 месяцев назад
Родитель
Сommit
cd3222bbc9
2 измененных файлов с 50 добавлено и 16 удалено
  1. 25 8
      a_gtcgpt_cggg/gtcgpt_details.py
  2. 25 8
      a_gtcgpt_cgjg/gtcgpt_details.py

+ 25 - 8
a_gtcgpt_cggg/gtcgpt_details.py

@@ -9,15 +9,17 @@ Created on 2025-04-09
 import re
 
 import feapder
+import feapder.utils.tools as tools
 from items.spider_item import DataBakItem
 from untils.tools import get_proxy
 
-from fingerprint import get_fingerprint
+from fingerprint import get_fingerprint, fetch_alteon_pcgmh, check_fingerprint
 
 
 class Spider(feapder.BiddingDetailSpider):
 
     def start_callback(self):
+        self.alteon_pcgmh = None
         self.cookies = None
         self.proxy = get_proxy()
 
@@ -27,7 +29,6 @@ class Spider(feapder.BiddingDetailSpider):
             request_params = item.get("request_params")
             yield feapder.Request(url=item.get("parse_url"),
                                   proxies=False,
-                                  callback=eval(item.get("parse")),
                                   item=item,
                                   deal_detail=item.get("deal_detail"),
                                   **request_params)
@@ -44,9 +45,12 @@ class Spider(feapder.BiddingDetailSpider):
             'X-Requested-With': 'XMLHttpRequest',
         }
 
+        if self.alteon_pcgmh is None:
+            self.alteon_pcgmh = fetch_alteon_pcgmh(self.proxy)
+
         if self.cookies is None:
             self.cookies = {
-                'AlteonPcgmh': '0a03b7f3bb36ad3f1f41',
+                'AlteonPcgmh': self.alteon_pcgmh,
                 'mhId': request.params['mhId'],
             }
 
@@ -55,20 +59,33 @@ class Spider(feapder.BiddingDetailSpider):
         request.cookies = self.cookies
 
     def validate(self, request, response):
-        data = response.json.get('data')
-        if not data:
+        if response.json['code'] == '0-0203':
+            referer = request.item.get('href')
+            check_fingerprint(request.params['mhId'], self.cookies, referer, self.proxy)
+            request.callback = self.request_retry
+            return True
+        elif response.json.get('data'):
+            request.callback = tools.resolve_method(self, request.item['parse'])
+            return True
+        else:
             raise ValueError('数据不能为空!')
-        return True
+
+    def request_retry(self, request, response):
+        if 'parse' not in request.item:
+            raise AttributeError('request.item not attribute "parse"')
+
+        yield request
 
     def detail_get(self, request, response):
-        items = request.item
+        item = request.item
+        data_item = DataBakItem(**item)
         html = response.json.get('data').get('noticeContent').get('notCont')
         html = re.sub('data:image(.*?) ', '', html, flags=re.S | re.M)
-        data_item = DataBakItem(**items)
         data_item.contenthtml = html
         yield data_item
 
     def exception_request(self, request, response):
+        self.alteon_pcgmh = None
         self.cookies = None
         self.proxy = get_proxy()
         request.params['mhId'] = get_fingerprint()

+ 25 - 8
a_gtcgpt_cgjg/gtcgpt_details.py

@@ -9,15 +9,17 @@ Created on 2025-04-09
 import re
 
 import feapder
+import feapder.utils.tools as tools
 from items.spider_item import DataBakItem
 from untils.tools import get_proxy
 
-from fingerprint import get_fingerprint
+from fingerprint import get_fingerprint, fetch_alteon_pcgmh, check_fingerprint
 
 
 class Spider(feapder.BiddingDetailSpider):
 
     def start_callback(self):
+        self.alteon_pcgmh = None
         self.cookies = None
         self.proxy = get_proxy()
 
@@ -27,7 +29,6 @@ class Spider(feapder.BiddingDetailSpider):
             request_params = item.get("request_params")
             yield feapder.Request(url=item.get("parse_url"),
                                   proxies=False,
-                                  callback=eval(item.get("parse")),
                                   item=item,
                                   deal_detail=item.get("deal_detail"),
                                   **request_params)
@@ -44,9 +45,12 @@ class Spider(feapder.BiddingDetailSpider):
             'X-Requested-With': 'XMLHttpRequest',
         }
 
+        if self.alteon_pcgmh is None:
+            self.alteon_pcgmh = fetch_alteon_pcgmh(self.proxy)
+
         if self.cookies is None:
             self.cookies = {
-                'AlteonPcgmh': '0a03b7f3bb36ad3f1f41',
+                'AlteonPcgmh': self.alteon_pcgmh,
                 'mhId': request.params['mhId'],
             }
 
@@ -55,20 +59,33 @@ class Spider(feapder.BiddingDetailSpider):
         request.cookies = self.cookies
 
     def validate(self, request, response):
-        data = response.json.get('data')
-        if not data:
+        if response.json['code'] == '0-0203':
+            referer = request.item.get('href')
+            check_fingerprint(request.params['mhId'], self.cookies, referer, self.proxy)
+            request.callback = self.request_retry
+            return True
+        elif response.json.get('data'):
+            request.callback = tools.resolve_method(self, request.item['parse'])
+            return True
+        else:
             raise ValueError('数据不能为空!')
-        return True
+
+    def request_retry(self, request, response):
+        if 'parse' not in request.item:
+            raise AttributeError('request.item not attribute "parse"')
+
+        yield request
 
     def detail_get(self, request, response):
-        items = request.item
+        item = request.item
+        data_item = DataBakItem(**item)
         html = response.json.get('data').get('noticeContent').get('notCont')
         html = re.sub('data:image(.*?) ', '', html, flags=re.S | re.M)
-        data_item = DataBakItem(**items)
         data_item.contenthtml = html
         yield data_item
 
     def exception_request(self, request, response):
+        self.alteon_pcgmh = None
         self.cookies = None
         self.proxy = get_proxy()
         request.params['mhId'] = get_fingerprint()