|
@@ -9,15 +9,17 @@ Created on 2025-04-09
|
|
import re
|
|
import re
|
|
|
|
|
|
import feapder
|
|
import feapder
|
|
|
|
+import feapder.utils.tools as tools
|
|
from items.spider_item import DataBakItem
|
|
from items.spider_item import DataBakItem
|
|
from untils.tools import get_proxy
|
|
from untils.tools import get_proxy
|
|
|
|
|
|
-from fingerprint import get_fingerprint
|
|
|
|
|
|
+from fingerprint import get_fingerprint, fetch_alteon_pcgmh, check_fingerprint
|
|
|
|
|
|
|
|
|
|
class Spider(feapder.BiddingDetailSpider):
|
|
class Spider(feapder.BiddingDetailSpider):
|
|
|
|
|
|
def start_callback(self):
|
|
def start_callback(self):
|
|
|
|
+ self.alteon_pcgmh = None
|
|
self.cookies = None
|
|
self.cookies = None
|
|
self.proxy = get_proxy()
|
|
self.proxy = get_proxy()
|
|
|
|
|
|
@@ -27,7 +29,6 @@ class Spider(feapder.BiddingDetailSpider):
|
|
request_params = item.get("request_params")
|
|
request_params = item.get("request_params")
|
|
yield feapder.Request(url=item.get("parse_url"),
|
|
yield feapder.Request(url=item.get("parse_url"),
|
|
proxies=False,
|
|
proxies=False,
|
|
- callback=eval(item.get("parse")),
|
|
|
|
item=item,
|
|
item=item,
|
|
deal_detail=item.get("deal_detail"),
|
|
deal_detail=item.get("deal_detail"),
|
|
**request_params)
|
|
**request_params)
|
|
@@ -44,9 +45,12 @@ class Spider(feapder.BiddingDetailSpider):
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ if self.alteon_pcgmh is None:
|
|
|
|
+ self.alteon_pcgmh = fetch_alteon_pcgmh(self.proxy)
|
|
|
|
+
|
|
if self.cookies is None:
|
|
if self.cookies is None:
|
|
self.cookies = {
|
|
self.cookies = {
|
|
- 'AlteonPcgmh': '0a03b7f3bb36ad3f1f41',
|
|
|
|
|
|
+ 'AlteonPcgmh': self.alteon_pcgmh,
|
|
'mhId': request.params['mhId'],
|
|
'mhId': request.params['mhId'],
|
|
}
|
|
}
|
|
|
|
|
|
@@ -55,20 +59,33 @@ class Spider(feapder.BiddingDetailSpider):
|
|
request.cookies = self.cookies
|
|
request.cookies = self.cookies
|
|
|
|
|
|
def validate(self, request, response):
|
|
def validate(self, request, response):
|
|
- data = response.json.get('data')
|
|
|
|
- if not data:
|
|
|
|
|
|
+ if response.json['code'] == '0-0203':
|
|
|
|
+ referer = request.item.get('href')
|
|
|
|
+ check_fingerprint(request.params['mhId'], self.cookies, referer, self.proxy)
|
|
|
|
+ request.callback = self.request_retry
|
|
|
|
+ return True
|
|
|
|
+ elif response.json.get('data'):
|
|
|
|
+ request.callback = tools.resolve_method(self, request.item['parse'])
|
|
|
|
+ return True
|
|
|
|
+ else:
|
|
raise ValueError('数据不能为空!')
|
|
raise ValueError('数据不能为空!')
|
|
- return True
|
|
|
|
|
|
+
|
|
|
|
+ def request_retry(self, request, response):
|
|
|
|
+ if 'parse' not in request.item:
|
|
|
|
+ raise AttributeError('request.item not attribute "parse"')
|
|
|
|
+
|
|
|
|
+ yield request
|
|
|
|
|
|
def detail_get(self, request, response):
|
|
def detail_get(self, request, response):
|
|
- items = request.item
|
|
|
|
|
|
+ item = request.item
|
|
|
|
+ data_item = DataBakItem(**item)
|
|
html = response.json.get('data').get('noticeContent').get('notCont')
|
|
html = response.json.get('data').get('noticeContent').get('notCont')
|
|
html = re.sub('data:image(.*?) ', '', html, flags=re.S | re.M)
|
|
html = re.sub('data:image(.*?) ', '', html, flags=re.S | re.M)
|
|
- data_item = DataBakItem(**items)
|
|
|
|
data_item.contenthtml = html
|
|
data_item.contenthtml = html
|
|
yield data_item
|
|
yield data_item
|
|
|
|
|
|
def exception_request(self, request, response):
|
|
def exception_request(self, request, response):
|
|
|
|
+ self.alteon_pcgmh = None
|
|
self.cookies = None
|
|
self.cookies = None
|
|
self.proxy = get_proxy()
|
|
self.proxy = get_proxy()
|
|
request.params['mhId'] = get_fingerprint()
|
|
request.params['mhId'] = get_fingerprint()
|