|
@@ -2,7 +2,7 @@ import threading
|
|
|
import time
|
|
|
from typing import List, Mapping
|
|
|
|
|
|
-from common.databases import insert_one
|
|
|
+from common.databases import insert_one, update_one_by_domain
|
|
|
from common.log import logger
|
|
|
from common.tools import delay_by
|
|
|
from constants import (
|
|
@@ -34,9 +34,6 @@ class BasicSearch:
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
- keyword_weight=9,
|
|
|
- url_weight=8,
|
|
|
- org_weight=7,
|
|
|
scheduler=None,
|
|
|
validator=None,
|
|
|
downloader=None,
|
|
@@ -54,10 +51,9 @@ class BasicSearch:
|
|
|
self.projection = {'name': 1}
|
|
|
self.sort = [('_id', -1)]
|
|
|
# 权重
|
|
|
- self.org_weight = org_weight
|
|
|
- self.url_weight = url_weight
|
|
|
- self.keyword_weight = keyword_weight
|
|
|
- self.retrieve_weight = 0
|
|
|
+ self.org_weight = (kwargs.pop('org_weight', None) or 7)
|
|
|
+ self.url_weight = (kwargs.pop('url_weight', None) or 8)
|
|
|
+ self.keyword_weight = (kwargs.pop('keyword_weight', None) or 9)
|
|
|
# 分类
|
|
|
self.visit_classify = VISIT_CLASSIFY
|
|
|
self.query_classify = QUERY_CLASSIFY
|
|
@@ -68,10 +64,11 @@ class BasicSearch:
|
|
|
self.competing_groups = COMPETING_GOODS
|
|
|
|
|
|
@staticmethod
|
|
|
- def loops_interval(interval):
|
|
|
+ def loops_interval(interval, enable_debug_log=False):
|
|
|
t_name = threading.currentThread().getName()
|
|
|
next_run_time = delay_by((interval or 300))
|
|
|
- logger.debug(f'运行结束:<{t_name}>,下次运行时间:{next_run_time}')
|
|
|
+ if enable_debug_log:
|
|
|
+ logger.debug(f'运行结束:<{t_name}>,下次运行时间:{next_run_time}')
|
|
|
time.sleep(interval)
|
|
|
|
|
|
@staticmethod
|
|
@@ -127,8 +124,8 @@ class BasicSearch:
|
|
|
logger.info(f'<{t_name}> - 上传挖掘结果 - {item["_id"]}')
|
|
|
elif purpose == 'remove':
|
|
|
item = self.make_duplicate_removal(task)
|
|
|
- insert_one(collection, item)
|
|
|
- logger.info(f'<{t_name}> - 上传去重特征 - {item["_id"]}')
|
|
|
+ update_one_by_domain(collection, item)
|
|
|
+ logger.info(f'<{t_name}> - 上传去重特征 - {item["domain"]}')
|
|
|
else:
|
|
|
insert_one(collection, task)
|
|
|
logger.info(f'<{t_name}> - 上传记录数据 - {task["_id"]}')
|