|
@@ -24,12 +24,12 @@ import (
|
|
|
var (
|
|
|
lock, lockrule, lockclear sync.RWMutex
|
|
|
|
|
|
- cut = ju.NewCut() //获取正文并清理
|
|
|
- ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
- TaskList map[string]*ExtractTask //任务列表
|
|
|
- ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
- saveLimit = 200 //抽取日志批量保存
|
|
|
- PageSize = 5000 //查询分页
|
|
|
+ cut = ju.NewCut() //获取正文并清理
|
|
|
+ ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
+ TaskList map[string]*ExtractTask //任务列表
|
|
|
+ ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
+ saveLimit = 200 //抽取日志批量保存
|
|
|
+ PageSize = 5000 //查询分页
|
|
|
Fields = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
|
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
|
)
|
|
@@ -438,6 +438,9 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
}
|
|
|
//函数清理
|
|
|
for key, val := range j.Result {
|
|
|
+ tmpExtFields := make([]*ju.ExtField, 0)
|
|
|
+ tmpWeight := -999 //记录最大权重
|
|
|
+ tmpIndex := -999 //记录最大权重下标
|
|
|
for _, v := range val {
|
|
|
lockclear.Lock()
|
|
|
cfn := e.ClearFn[key]
|
|
@@ -456,6 +459,22 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
}
|
|
|
lockclear.Unlock()
|
|
|
}
|
|
|
+ //项目编号,采购单位权重清理
|
|
|
+ if (key == "projectcode" || key == "buyer") && len(val) > 1 {
|
|
|
+ for i, v := range val {
|
|
|
+ if v.Weight == 0 {
|
|
|
+ tmpExtFields = append(tmpExtFields, v)
|
|
|
+ continue
|
|
|
+ } else if v.Weight > tmpWeight {
|
|
|
+ tmpWeight = v.Weight
|
|
|
+ tmpIndex = i
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if tmpIndex != -999 {
|
|
|
+ tmpExtFields = append(tmpExtFields, val[tmpIndex])
|
|
|
+ j.Result[key] = tmpExtFields
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
PackageDetail(j, e) //处理分包信息
|
|
|
// bs, _ := json.Marshal(j.Result)
|
|
@@ -627,7 +646,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
|
|
|
}
|
|
|
if tmps, ok := v.([]map[string]interface{}); ok {
|
|
|
for _, tmp := range tmps {
|
|
|
- field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), Type: qu.ObjToString(tmp["type"]), RuleText: qu.ObjToString(tmp["ruletext"]), SourceValue: tmp["sourcevalue"], Value: tmp["value"]}
|
|
|
+ field := &ju.ExtField{Weight: qu.IntAll(tmp["weight"]), Field: k, Code: qu.ObjToString(tmp["code"]), Type: qu.ObjToString(tmp["type"]), RuleText: qu.ObjToString(tmp["ruletext"]), SourceValue: tmp["sourcevalue"], Value: tmp["value"]}
|
|
|
if tmp["blocktag"] != nil {
|
|
|
btag := make(map[string]string)
|
|
|
for k := range tmp["blocktag"].(map[string]bool) {
|
|
@@ -1348,7 +1367,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
|
|
|
func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
|
|
|
defer qu.Catch()
|
|
|
i := redis.GetInt(field, field+"_"+fv) //查找redis
|
|
|
- if i == 0 { //reids未找到,执行规则匹配
|
|
|
+ if i == 0 { //reids未找到,执行规则匹配
|
|
|
val[field+"_isredis"] = false
|
|
|
e.RuleMatch(field, fv, val) //规则匹配
|
|
|
} else { //redis找到,打标识存库
|