|
@@ -33,7 +33,7 @@ var (
|
|
|
ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
saveLimit = 100 //抽取日志批量保存
|
|
|
PageSize = 5000 //查询分页
|
|
|
- Fields = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1}`
|
|
|
+ Fields = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging:":1}`
|
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
|
)
|
|
|
|
|
@@ -331,6 +331,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
Result: map[string][]*ju.ExtField{},
|
|
|
BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
|
|
|
RuleBlock: e.RuleBlock,
|
|
|
+ Dataging: qu.IntAll(doc["dataging"]),
|
|
|
}
|
|
|
if (j.Jsondata != nil || (*j.Jsondata) != nil) && (*j.Jsondata)["jsoncontent"] != nil {
|
|
|
delete((*j.Jsondata), "jsoncontent")
|
|
@@ -352,6 +353,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
|
|
|
RuleBlock: e.RuleBlock,
|
|
|
IsFile: isextFile,
|
|
|
+ Dataging: qu.IntAll(doc["dataging"]),
|
|
|
}
|
|
|
if (jf.Jsondata != nil || (*jf.Jsondata) != nil) && (*jf.Jsondata)["jsoncontent"] != nil {
|
|
|
delete((*jf.Jsondata), "jsoncontent")
|
|
@@ -1927,6 +1929,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
// qu.Debug(k, "---", v)
|
|
|
// }
|
|
|
//tmp["extract_content"] = j.Content
|
|
|
+ tmp["dataging"] = j.Dataging
|
|
|
if e.TaskInfo.TestColl == "" {
|
|
|
if len(tmp) > 0 { //保存抽取结果
|
|
|
/* if len(e.SiteFields) <= 0 {
|