|
@@ -68,6 +68,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
|
|
|
query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(startId)}}
|
|
|
list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, n)
|
|
|
for _, v := range *list {
|
|
|
+ //log.Println(v["_id"])
|
|
|
j := PreInfo(v)
|
|
|
ext.TaskInfo.ProcessPool <- true
|
|
|
go ext.ExtractProcess(j)
|
|
@@ -258,9 +259,12 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
|
|
|
v.Value = data[0]
|
|
|
//清理特殊符号
|
|
|
- text := qu.ObjToString(v.Value)
|
|
|
- text = clear.OtherClean(key, text)
|
|
|
- v.Value = text
|
|
|
+ if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
+ clear.MesField[key] != nil {
|
|
|
+ text := qu.ObjToString(v.Value)
|
|
|
+ text = clear.OtherClean(key, text)
|
|
|
+ v.Value = text
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
PackageDetail(j, e) //处理分包信息
|
|
@@ -269,7 +273,7 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
//分析抽取结果并保存 todo
|
|
|
AnalysisSaveResult(j, e)
|
|
|
}, func(err interface{}) {
|
|
|
- log.Println(err)
|
|
|
+ log.Println((*j.Data)["_id"], err)
|
|
|
<-e.TaskInfo.ProcessPool
|
|
|
})
|
|
|
<-e.TaskInfo.ProcessPool
|
|
@@ -774,7 +778,7 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
|
|
|
tmp := map[string]interface{}{} //抽取值
|
|
|
for key, val := range values {
|
|
|
for _, v := range val { //取第一个非负数
|
|
|
- if qu.ObjToString(v.Object) != "" && v.Value > -1 {
|
|
|
+ if v.Key != "" && v.Value > -1 {
|
|
|
tmp[key] = v.Object
|
|
|
break
|
|
|
}
|