|
@@ -101,8 +101,8 @@ func StartExtractTaskId(taskId string) bool {
|
|
|
ext.Id = taskId
|
|
|
ext.InitTaskInfo()
|
|
|
}
|
|
|
- ext.TaskInfo.FDB = db.MgoFactory(2, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
|
|
|
- ext.TaskInfo.TDB = db.MgoFactory(1, 3, 120, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
|
|
|
+ ext.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
|
|
|
+ ext.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
|
|
|
ext.InitRulePres()
|
|
|
ext.InitRuleBacks()
|
|
|
ext.InitRuleCore()
|
|
@@ -165,14 +165,15 @@ func RunExtractTask(taskId string) {
|
|
|
if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
|
|
|
continue
|
|
|
}
|
|
|
- //log.Println(v["_id"])
|
|
|
+ _id := qu.BsonIdToSId(v["_id"])
|
|
|
+ log.Println(_id)
|
|
|
if !ext.IsRun {
|
|
|
break
|
|
|
}
|
|
|
j := PreInfo(v)
|
|
|
ext.TaskInfo.ProcessPool <- true
|
|
|
go ext.ExtractProcess(j)
|
|
|
- ext.TaskInfo.LastExtId = qu.BsonIdToSId(v["_id"])
|
|
|
+ ext.TaskInfo.LastExtId = _id
|
|
|
}
|
|
|
db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
|
|
|
if !ext.IsRun {
|
|
@@ -303,12 +304,14 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
|
|
|
v.Value = data[0]
|
|
|
//清理特殊符号
|
|
|
+ lock.Lock()
|
|
|
if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
clear.MesField[key] != nil {
|
|
|
text := qu.ObjToString(v.Value)
|
|
|
text = clear.OtherClean(key, text)
|
|
|
v.Value = text
|
|
|
}
|
|
|
+ lock.Unlock()
|
|
|
}
|
|
|
}
|
|
|
PackageDetail(j, e) //处理分包信息
|
|
@@ -316,11 +319,10 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
// log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
//分析抽取结果并保存 todo
|
|
|
AnalysisSaveResult(j, e)
|
|
|
- <-e.TaskInfo.ProcessPool
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println("ExtractProcess err", err, (*j.Data)["_id"])
|
|
|
- <-e.TaskInfo.ProcessPool
|
|
|
+ log.Println("ExtractProcess err", err)
|
|
|
})
|
|
|
+ <-e.TaskInfo.ProcessPool
|
|
|
}
|
|
|
|
|
|
//前置过滤
|
|
@@ -648,9 +650,10 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
|
|
|
tmp := j.Result[in.Field]
|
|
|
exts := []interface{}{}
|
|
|
for k, v := range tmp {
|
|
|
- if v.Type == "table" && v.Field != "projectname" { //table抽取到的数据不清理
|
|
|
- continue
|
|
|
- }
|
|
|
+ //table抽取到的数据不清理
|
|
|
+ // if v.Type == "table" && v.Field != "projectname" {
|
|
|
+ // continue
|
|
|
+ // }
|
|
|
text := qu.ObjToString(v.Value)
|
|
|
if text != "" {
|
|
|
text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
|