|
@@ -22,13 +22,13 @@ import (
|
|
|
)
|
|
|
|
|
|
var (
|
|
|
- lock sync.RWMutex
|
|
|
- cut = ju.NewCut() //获取正文并清理
|
|
|
- ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
- TaskList map[string]*ExtractTask //任务列表
|
|
|
- ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
- saveLimit = 200 //抽取日志批量保存
|
|
|
- PageSize = 5000 //查询分页
|
|
|
+ lock sync.RWMutex
|
|
|
+ cut = ju.NewCut() //获取正文并清理
|
|
|
+ ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
+ TaskList map[string]*ExtractTask //任务列表
|
|
|
+ ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
+ saveLimit = 200 //抽取日志批量保存
|
|
|
+ PageSize = 5000 //查询分页
|
|
|
Fields = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1}`
|
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
|
)
|
|
@@ -81,9 +81,9 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
|
|
|
continue
|
|
|
}
|
|
|
var j, jf *ju.Job
|
|
|
- if ext.IsFileField &&v["projectinfo"] != nil {
|
|
|
- v["isextFile"] = true
|
|
|
- j, jf = PreInfo(v)
|
|
|
+ if ext.IsFileField && v["projectinfo"] != nil {
|
|
|
+ v["isextFile"] = true
|
|
|
+ j, jf = PreInfo(v)
|
|
|
} else {
|
|
|
j, _ = PreInfo(v)
|
|
|
}
|
|
@@ -306,7 +306,7 @@ func file2text(doc *map[string]interface{}) {
|
|
|
//抽取
|
|
|
func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
|
|
|
e.ExtractDetail(j)
|
|
|
- if jf !=nil &&jf.IsFile {
|
|
|
+ if jf != nil && jf.IsFile {
|
|
|
e.ExtractFile(jf)
|
|
|
}
|
|
|
//分析抽取结果并保存 todo
|
|
@@ -1026,7 +1026,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
|
|
|
//质量审核
|
|
|
- if ju.Config["qualityaudit"].(bool) {
|
|
|
+ if ok, _ := ju.Config["qualityaudit"].(bool); ok {
|
|
|
e.QualityAudit(tmp)
|
|
|
}
|
|
|
if e.IsExtractCity { //城市抽取
|
|
@@ -1192,7 +1192,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
|
|
|
func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
|
|
|
defer qu.Catch()
|
|
|
i := redis.GetInt(field, field+"_"+fv) //查找redis
|
|
|
- if i == 0 { //reids未找到,执行规则匹配
|
|
|
+ if i == 0 { //reids未找到,执行规则匹配
|
|
|
val[field+"_isredis"] = false
|
|
|
e.RuleMatch(field, fv, val) //规则匹配
|
|
|
} else { //redis找到,打标识存库
|