|
@@ -7,6 +7,7 @@ import time
|
|
|
|
|
|
import requests
|
|
import requests
|
|
|
|
|
|
|
|
+from db.RedisDB import RedisFilter
|
|
from utils.config_parms import (
|
|
from utils.config_parms import (
|
|
account_pool,
|
|
account_pool,
|
|
area_dict,
|
|
area_dict,
|
|
@@ -15,7 +16,7 @@ from utils.config_parms import (
|
|
channel_dict,
|
|
channel_dict,
|
|
REQUEST_DATA_MAP
|
|
REQUEST_DATA_MAP
|
|
)
|
|
)
|
|
-from utils.databases import mongo_table, redis_client
|
|
|
|
|
|
+from utils.databases import mongo_table
|
|
from utils.log import logger
|
|
from utils.log import logger
|
|
from utils.sessions_521 import http_session_521
|
|
from utils.sessions_521 import http_session_521
|
|
from utils.tools import sha1, get_today_of_day
|
|
from utils.tools import sha1, get_today_of_day
|
|
@@ -30,8 +31,7 @@ https://search.vip.qianlima.com/index.html#?sortType=6&isSearchWord=1&tab_index=
|
|
'''
|
|
'''
|
|
|
|
|
|
qlm = mongo_table('qlm', 'data_merge')
|
|
qlm = mongo_table('qlm', 'data_merge')
|
|
-r = redis_client()
|
|
|
|
-redis_key = 'qianlima_2024'
|
|
|
|
|
|
+dedup = RedisFilter('redis://:k5ZJR5KV4q7DRZ92DQ@172.17.189.142:7361/2')
|
|
|
|
|
|
session = requests.session()
|
|
session = requests.session()
|
|
|
|
|
|
@@ -176,8 +176,9 @@ def downloader(begin_date, end_date, category, address, page, page_size, account
|
|
items = resp_json['data']['data']
|
|
items = resp_json['data']['data']
|
|
for item in items:
|
|
for item in items:
|
|
cid = sha1(str(item['contentid']))
|
|
cid = sha1(str(item['contentid']))
|
|
- if not r.hexists(redis_key, cid):
|
|
|
|
- r.hset(redis_key, cid, '')
|
|
|
|
|
|
+
|
|
|
|
+ if not dedup.get(item['contentid']):
|
|
|
|
+ dedup.add(item['contentid'])
|
|
if 'popTitle' in item:
|
|
if 'popTitle' in item:
|
|
item['title'] = item['popTitle']
|
|
item['title'] = item['popTitle']
|
|
else:
|
|
else:
|