|
@@ -4,13 +4,14 @@ package extract
|
|
|
import (
|
|
|
db "jy/mongodbutil"
|
|
|
ju "jy/util"
|
|
|
- "log"
|
|
|
qu "qfw/util"
|
|
|
"regexp"
|
|
|
"sort"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
|
+
|
|
|
+ log "github.com/donnie4w/go-logger/logger"
|
|
|
)
|
|
|
|
|
|
type RegLuaInfo struct { //正则或脚本信息
|
|
@@ -66,8 +67,8 @@ type ExtractTask struct {
|
|
|
IsExtractCity bool //是否开启城市抽取
|
|
|
Fields map[string]int //抽取属性组
|
|
|
|
|
|
- IsFileField bool //是否开启附件抽取
|
|
|
- FileFields map[string]int //抽取附件属性组
|
|
|
+ IsFileField bool //是否开启附件抽取
|
|
|
+ FileFields map[string]int //抽取附件属性组
|
|
|
|
|
|
ResultChanel chan bool //抽取结果详情
|
|
|
ResultArr [][]map[string]interface{} //抽取结果详情
|
|
@@ -155,11 +156,11 @@ func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) {
|
|
|
//加载任务信息
|
|
|
func (e *ExtractTask) InitTaskInfo() {
|
|
|
task, _ := db.Mgo.FindById("task", e.Id, nil)
|
|
|
- log.Println("task", task)
|
|
|
+ log.Debug("task", task)
|
|
|
if len(*task) > 1 {
|
|
|
v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`","delete":false}`)
|
|
|
strs := strings.Split((*task)["s_mgosavecoll"].(string), "/")
|
|
|
- log.Println("s_mgosavecoll", strs)
|
|
|
+ log.Debug("s_mgosavecoll", strs)
|
|
|
if len(strs) < 3 {
|
|
|
return
|
|
|
} else {
|
|
@@ -182,7 +183,7 @@ func (e *ExtractTask) InitTaskInfo() {
|
|
|
e.IsExtractCity = (*v)["isextractcity"].(bool)
|
|
|
}
|
|
|
}
|
|
|
- log.Println(e.TaskInfo.Name, "thread:", qu.IntAllDef((*task)["i_process"], 1))
|
|
|
+ log.Debug(e.TaskInfo.Name, "thread:", qu.IntAllDef((*task)["i_process"], 1))
|
|
|
} else {
|
|
|
return
|
|
|
}
|
|
@@ -219,7 +220,7 @@ func (e *ExtractTask) InitRulePres() {
|
|
|
}
|
|
|
e.RulePres = append(e.RulePres, rinfo)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(rinfo.Code, rinfo.Field, err)
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
})
|
|
|
}
|
|
|
}
|
|
@@ -256,7 +257,7 @@ func (e *ExtractTask) InitRuleBacks() {
|
|
|
}
|
|
|
e.RuleBacks = append(e.RuleBacks, rinfo)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(rinfo.Code, rinfo.Field, err)
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
})
|
|
|
}
|
|
|
}
|
|
@@ -313,7 +314,7 @@ func (e *ExtractTask) InitRuleCore() {
|
|
|
}
|
|
|
rulePres = append(rulePres, rinfo)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(rinfo.Code, rinfo.Field, err)
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
})
|
|
|
}
|
|
|
}
|
|
@@ -349,7 +350,7 @@ func (e *ExtractTask) InitRuleCore() {
|
|
|
}
|
|
|
ruleBacks = append(ruleBacks, rinfo)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(rinfo.Code, rinfo.Field, err)
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
})
|
|
|
}
|
|
|
}
|
|
@@ -402,7 +403,7 @@ func (e *ExtractTask) InitRuleCore() {
|
|
|
}
|
|
|
ruleCores = append(ruleCores, rinfo)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(rinfo.Code, rinfo.Field, err)
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
})
|
|
|
}
|
|
|
}
|
|
@@ -463,7 +464,7 @@ func (e *ExtractTask) InitPkgCore() {
|
|
|
}
|
|
|
ruleBacks = append(ruleBacks, rinfo)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(rinfo.Code, rinfo.Field, err)
|
|
|
+ log.Debug(rinfo.Code, rinfo.Field, err)
|
|
|
})
|
|
|
}
|
|
|
}
|
|
@@ -745,82 +746,83 @@ func (e *ExtractTask) InitDFA() {
|
|
|
}
|
|
|
|
|
|
//保存抽取详情数据
|
|
|
-func (e *ExtractTask) ResultSave() {
|
|
|
+func (e *ExtractTask) ResultSave(init bool) {
|
|
|
defer qu.Catch()
|
|
|
- e.ResultChanel = make(chan bool, 5)
|
|
|
- e.ResultArr = [][]map[string]interface{}{}
|
|
|
- for {
|
|
|
- if len(e.ResultArr) > 500 {
|
|
|
- e.ResultChanel <- true
|
|
|
- arr := e.ResultArr[:500]
|
|
|
- go func(tmp *[][]map[string]interface{}) {
|
|
|
- qu.Try(func() {
|
|
|
- db.Mgo.UpSertBulk("extract_result", *tmp...)
|
|
|
- <-e.ResultChanel
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Println(err)
|
|
|
- <-e.ResultChanel
|
|
|
- })
|
|
|
- }(&arr)
|
|
|
- e.ResultArr = e.ResultArr[500:]
|
|
|
- } else {
|
|
|
- e.ResultChanel <- true
|
|
|
- arr := e.ResultArr
|
|
|
- go func(tmp *[][]map[string]interface{}) {
|
|
|
- qu.Try(func() {
|
|
|
- db.Mgo.UpSertBulk("extract_result", *tmp...)
|
|
|
- <-e.ResultChanel
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Println(err)
|
|
|
- <-e.ResultChanel
|
|
|
- })
|
|
|
- }(&arr)
|
|
|
- e.ResultArr = [][]map[string]interface{}{}
|
|
|
- time.Sleep(10 * time.Second)
|
|
|
- }
|
|
|
- if !e.IsRun {
|
|
|
- break
|
|
|
- }
|
|
|
+ if e.ResultArr == nil {
|
|
|
+ e.ResultArr = [][]map[string]interface{}{}
|
|
|
+ }
|
|
|
+ if init {
|
|
|
+ go func() {
|
|
|
+ for {
|
|
|
+ if len(e.ResultArr) > 500 {
|
|
|
+ arr := e.ResultArr[:500]
|
|
|
+ qu.Try(func() {
|
|
|
+ db.Mgo.UpSertBulk("extract_result", arr...)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(err)
|
|
|
+ })
|
|
|
+ e.ResultArr = e.ResultArr[500:]
|
|
|
+ } else {
|
|
|
+ arr := e.ResultArr
|
|
|
+ qu.Try(func() {
|
|
|
+ db.Mgo.UpSertBulk("extract_result", arr...)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(err)
|
|
|
+ })
|
|
|
+ e.ResultArr = [][]map[string]interface{}{}
|
|
|
+ }
|
|
|
+ time.Sleep(10 * time.Second)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ } else {
|
|
|
+ arr := e.ResultArr
|
|
|
+ qu.Try(func() {
|
|
|
+ e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(err)
|
|
|
+ })
|
|
|
+ e.ResultArr = [][]map[string]interface{}{}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
//保存抽取数据
|
|
|
-func (e *ExtractTask) BidSave() {
|
|
|
+func (e *ExtractTask) BidSave(init bool) {
|
|
|
defer qu.Catch()
|
|
|
- e.BidChanel = make(chan bool, 5)
|
|
|
- e.BidArr = [][]map[string]interface{}{}
|
|
|
- for {
|
|
|
- if len(e.BidArr) > 500 {
|
|
|
- e.BidChanel <- true
|
|
|
- arr := e.BidArr[:500]
|
|
|
- go func(tmp *[][]map[string]interface{}) {
|
|
|
- qu.Try(func() {
|
|
|
- db.Mgo.UpSertBulk(e.TaskInfo.ToColl, *tmp...)
|
|
|
- <-e.BidChanel
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Println(err)
|
|
|
- <-e.BidChanel
|
|
|
- })
|
|
|
- }(&arr)
|
|
|
- e.BidArr = e.BidArr[500:]
|
|
|
- } else {
|
|
|
- e.BidChanel <- true
|
|
|
- arr := e.BidArr
|
|
|
- go func(tmp *[][]map[string]interface{}) {
|
|
|
- qu.Try(func() {
|
|
|
- db.Mgo.UpSertBulk(e.TaskInfo.ToColl, *tmp...)
|
|
|
- <-e.BidChanel
|
|
|
- }, func(err interface{}) {
|
|
|
- log.Println(err)
|
|
|
- <-e.BidChanel
|
|
|
- })
|
|
|
- }(&arr)
|
|
|
- e.BidArr = [][]map[string]interface{}{}
|
|
|
- }
|
|
|
- if !e.IsRun {
|
|
|
- break
|
|
|
- }
|
|
|
- time.Sleep(10 * time.Second)
|
|
|
+ if e.BidArr == nil {
|
|
|
+ e.BidArr = [][]map[string]interface{}{}
|
|
|
+ }
|
|
|
+ if init {
|
|
|
+ go func() {
|
|
|
+ for {
|
|
|
+ if len(e.BidArr) > 500 {
|
|
|
+ arr := e.BidArr[:500]
|
|
|
+ qu.Try(func() {
|
|
|
+ e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(err)
|
|
|
+ })
|
|
|
+ e.BidArr = e.BidArr[500:]
|
|
|
+ } else {
|
|
|
+ arr := e.BidArr
|
|
|
+ qu.Try(func() {
|
|
|
+ e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(err)
|
|
|
+ })
|
|
|
+ e.BidArr = [][]map[string]interface{}{}
|
|
|
+ }
|
|
|
+ time.Sleep(10 * time.Second)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ } else {
|
|
|
+ arr := e.BidArr
|
|
|
+ qu.Try(func() {
|
|
|
+ e.TaskInfo.TDB.UpSertBulk(e.TaskInfo.ToColl, arr...)
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Debug(err)
|
|
|
+ })
|
|
|
+ e.BidArr = [][]map[string]interface{}{}
|
|
|
+ time.Sleep(1 * time.Second)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -867,7 +869,7 @@ func (e *ExtractTask) InitAuditRule() {
|
|
|
ru = string(rs[1 : len(rs)-1])
|
|
|
rureg, err = regexp.Compile(ru)
|
|
|
if err != nil {
|
|
|
- log.Println("error---rule:", r)
|
|
|
+ log.Debug("error---rule:", r)
|
|
|
continue
|
|
|
}
|
|
|
i_rule = append(i_rule, []interface{}{rureg}...)
|
|
@@ -914,16 +916,16 @@ func (e *ExtractTask) InitFile() {
|
|
|
//query:=bson.M{"version":e.TaskInfo.Version,"delete":false}
|
|
|
ve, _ := db.Mgo.FindOne("version", `{"version":"`+e.TaskInfo.Version+`","delete":false}`)
|
|
|
//ve, _ := db.Mgo.FindOne("version", query)
|
|
|
- if ve == nil{
|
|
|
+ if ve == nil {
|
|
|
return
|
|
|
}
|
|
|
- if (*ve)["isfiles"]!=nil && (*ve)["isfiles"].(bool){
|
|
|
- e.IsFileField =true
|
|
|
+ if (*ve)["isfiles"] != nil && (*ve)["isfiles"].(bool) {
|
|
|
+ e.IsFileField = true
|
|
|
}
|
|
|
- efiled := make(map[string]int,0)
|
|
|
- if (*ve)["s_filefileds"] != nil{
|
|
|
- for _,vff :=range (*ve)["s_filefileds"].([]interface{}) {
|
|
|
- efiled[vff.(string)]=1
|
|
|
+ efiled := make(map[string]int, 0)
|
|
|
+ if (*ve)["s_filefileds"] != nil {
|
|
|
+ for _, vff := range (*ve)["s_filefileds"].([]interface{}) {
|
|
|
+ efiled[vff.(string)] = 1
|
|
|
}
|
|
|
}
|
|
|
e.FileFields = efiled
|
|
@@ -944,7 +946,7 @@ func (c *ClearTask) InitClearTaskInfo() {
|
|
|
IsCltLog: ju.Config["iscltlog"].(bool),
|
|
|
ProcessPool: make(chan bool, qu.IntAllDef((*cleartask)["i_process"], 1)),
|
|
|
}
|
|
|
- log.Println(c.ClearTaskInfo.Name, "thread:", qu.IntAllDef((*cleartask)["i_process"], 1))
|
|
|
+ log.Debug(c.ClearTaskInfo.Name, "thread:", qu.IntAllDef((*cleartask)["i_process"], 1))
|
|
|
} else {
|
|
|
return
|
|
|
}
|