|
@@ -4,7 +4,6 @@ from typing import List, Mapping
|
|
|
|
|
|
from common.databases import insert_one, update_one_by_domain
|
|
|
from common.log import logger
|
|
|
-from common.tools import delay_by
|
|
|
from constants import (
|
|
|
ORGANIZATION,
|
|
|
KEYWORD,
|
|
@@ -63,12 +62,13 @@ class BasicService:
|
|
|
self.url_groups = SEED_URL
|
|
|
self.competing_groups = COMPETING_GOODS
|
|
|
|
|
|
- @staticmethod
|
|
|
- def loops_interval(interval, enable_debug_log=False):
|
|
|
- t_name = threading.currentThread().getName()
|
|
|
- next_run_time = delay_by((interval or 300))
|
|
|
+ @property
|
|
|
+ def thread_name(self):
|
|
|
+ return threading.currentThread().getName()
|
|
|
+
|
|
|
+ def loops_interval(self, interval, enable_debug_log=False):
|
|
|
if enable_debug_log:
|
|
|
- logger.debug(f'运行结束:<{t_name}>,下次运行时间:{next_run_time}')
|
|
|
+ logger.debug(f'Thread-<{self.thread_name}> is closed.')
|
|
|
time.sleep(interval)
|
|
|
|
|
|
@staticmethod
|
|
@@ -113,22 +113,21 @@ class BasicService:
|
|
|
return item
|
|
|
|
|
|
def _push_data(self, purpose: str, task: Task, collection):
|
|
|
- t_name = threading.currentThread().getName()
|
|
|
if purpose == 'query':
|
|
|
item = self.make_retrieve_item(task)
|
|
|
insert_one(collection, item)
|
|
|
- logger.info(f'<{t_name}> - 上传查询结果 - {item["_id"]}')
|
|
|
+ logger.info(f'<{self.thread_name}> - 查询结果 - {item["_id"]}')
|
|
|
elif purpose == 'domain':
|
|
|
item = self.make_domain_item(task)
|
|
|
insert_one(collection, item)
|
|
|
- logger.info(f'<{t_name}> - 上传挖掘结果 - {item["_id"]}')
|
|
|
+ logger.info(f'<{self.thread_name}> - 寻源结果 - {task["domain"]}')
|
|
|
elif purpose == 'remove':
|
|
|
item = self.make_duplicate_removal(task)
|
|
|
update_one_by_domain(collection, item)
|
|
|
- logger.info(f'<{t_name}> - 上传去重特征 - {item["domain"]}')
|
|
|
+ logger.info(f'<{self.thread_name}> - 添加过滤 - {task["url"]}')
|
|
|
else:
|
|
|
insert_one(collection, task)
|
|
|
- logger.info(f'<{t_name}> - 上传记录数据 - {task["_id"]}')
|
|
|
+ logger.info(f'<{self.thread_name}> - 记录数据 - {task["_id"]}')
|
|
|
|
|
|
def push_remove(self, task: Task):
|
|
|
"""数据去重表"""
|
|
@@ -140,10 +139,6 @@ class BasicService:
|
|
|
|
|
|
def push_domain(self, task: Task):
|
|
|
"""数据挖掘结果,推送保存"""
|
|
|
- if task['groups'] == self.url_groups:
|
|
|
- duplicate = str(task['origin']).count(task['domain']) > 0
|
|
|
- if duplicate:
|
|
|
- return False
|
|
|
if not self.collector.data(task['domain']):
|
|
|
self._push_data('domain', task, MGO_DOMAIN)
|
|
|
self.collector.add_data(task['domain'])
|