|
@@ -27,12 +27,12 @@ import (
|
|
var (
|
|
var (
|
|
lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
|
|
lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
|
|
|
|
|
|
- cut = ju.NewCut() //获取正文并清理
|
|
|
|
- ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
|
- TaskList map[string]*ExtractTask //任务列表
|
|
|
|
- ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
|
- saveLimit = 100 //抽取日志批量保存
|
|
|
|
- PageSize = 5000 //查询分页
|
|
|
|
|
|
+ cut = ju.NewCut() //获取正文并清理
|
|
|
|
+ ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
|
+ TaskList map[string]*ExtractTask //任务列表
|
|
|
|
+ ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
|
+ saveLimit = 100 //抽取日志批量保存
|
|
|
|
+ PageSize = 5000 //查询分页
|
|
Fields = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1}`
|
|
Fields = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1}`
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
)
|
|
)
|
|
@@ -2087,7 +2087,45 @@ func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[strin
|
|
for _, val := range result {
|
|
for _, val := range result {
|
|
ju.Sort(val)
|
|
ju.Sort(val)
|
|
}
|
|
}
|
|
- j.Result = JsonDataMergeProcessing(j, e)
|
|
|
|
|
|
+ if !(len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0) {
|
|
|
|
+ marshalbt, _ := json.Marshal(j.Jsondata)
|
|
|
|
+ tmpjddata := make(map[string]interface{})
|
|
|
|
+ json.Unmarshal(marshalbt,&tmpjddata)
|
|
|
|
+ for _, jdkey := range ju.JsonData {
|
|
|
|
+ if (*j.Jsondata)[jdkey] != nil && (*j.Jsondata)[jdkey] != "" && len(j.Result[jdkey]) >= 5 {
|
|
|
|
+ for tmpk, tmpv := range j.Result[jdkey][:5] {
|
|
|
|
+ if jdkey == "budget" || jdkey == "bidamount" {
|
|
|
|
+ lockclear.Lock()
|
|
|
|
+ cfn := e.ClearFn[jdkey]
|
|
|
|
+ lockclear.Unlock()
|
|
|
|
+ if len(cfn) == 0 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[jdkey], ""})
|
|
|
|
+ if tmpv.Value == newNum[0] {
|
|
|
|
+ extField := &ju.ExtField{Code: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), Field: jdkey, ExtFrom: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), SourceValue: (*j.Jsondata)[jdkey], Value: newNum[0], Score: 100, IsTrue: newNum[len(newNum)-1].(bool)}
|
|
|
|
+ j.Result[jdkey] = append(j.Result[jdkey], extField)
|
|
|
|
+ ju.Sort(j.Result[jdkey])
|
|
|
|
+ delete((*j.Jsondata), jdkey)
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }else {
|
|
|
|
+ if (*j.Jsondata)[jdkey] == tmpv.Value{
|
|
|
|
+ extField := &ju.ExtField{Code: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), Field: jdkey, ExtFrom: "JsonData_" + jdkey + "_j.Result" + fmt.Sprint(tmpk), SourceValue: (*j.Jsondata)[jdkey], Value: tmpv.Value, Score: 100}
|
|
|
|
+ j.Result[jdkey] = append(j.Result[jdkey], extField)
|
|
|
|
+ ju.Sort(j.Result[jdkey])
|
|
|
|
+ delete((*j.Jsondata), jdkey)
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(*j.Jsondata)>0{
|
|
|
|
+ j.Result = JsonDataMergeProcessing(j, e)
|
|
|
|
+ }
|
|
|
|
+ j.Jsondata = &tmpjddata
|
|
|
|
+ }
|
|
return doc, result, _id
|
|
return doc, result, _id
|
|
}
|
|
}
|
|
|
|
|