|
@@ -53,8 +53,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
|
|
|
n, _ := strconv.Atoi(num)
|
|
|
id := IdTrans(startId)
|
|
|
if id.Valid() {
|
|
|
- query := bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(startId)}}
|
|
|
- //list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, -1, -1)
|
|
|
+ query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(startId)}}
|
|
|
list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, n)
|
|
|
for _, v := range *list {
|
|
|
j := PreInfo(v)
|
|
@@ -106,7 +105,7 @@ func RunExtractTask(ext *ExtractTask) {
|
|
|
if !ext.IsRun {
|
|
|
return
|
|
|
}
|
|
|
- query := bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}}
|
|
|
+ query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}}
|
|
|
list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, -1, -1)
|
|
|
for _, v := range *list {
|
|
|
if !ext.IsRun {
|
|
@@ -260,7 +259,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
|
|
|
if in.IsLua {
|
|
|
lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
|
|
|
if in.IsHasFields { //lua脚本配置有属性字段
|
|
|
- lua.KvMap = getKvByLuaFields(j, in, et.Tag)
|
|
|
+ lua.KvMap = getKvByLuaFields(extfrom, j, in, et.Tag)
|
|
|
} else {
|
|
|
lua.KvMap = map[string][]map[string]interface{}{}
|
|
|
}
|
|
@@ -273,7 +272,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
|
|
|
if tmps, ok := v.([]map[string]interface{}); ok {
|
|
|
for _, tmp := range tmps {
|
|
|
j.Result[k] = append(j.Result[k],
|
|
|
- &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"]})
|
|
|
+ &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"]})
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -293,7 +292,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
|
|
|
}
|
|
|
|
|
|
//lua脚本根据属性设置提取kv值
|
|
|
-func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} {
|
|
|
+func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} {
|
|
|
kvmap := map[string][]map[string]interface{}{}
|
|
|
for _, vv := range in.LFields {
|
|
|
field := qu.ObjToString(vv)
|
|
@@ -310,10 +309,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "colon1",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_string",
|
|
|
})
|
|
|
}
|
|
@@ -324,10 +325,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "colon1",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_regexp",
|
|
|
})
|
|
|
}
|
|
@@ -341,10 +344,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "colon2",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_string",
|
|
|
})
|
|
|
}
|
|
@@ -355,10 +360,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "colon2",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_regexp",
|
|
|
})
|
|
|
}
|
|
@@ -378,10 +385,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "space",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_string",
|
|
|
})
|
|
|
}
|
|
@@ -392,10 +401,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "space",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_regexp",
|
|
|
})
|
|
|
}
|
|
@@ -415,10 +426,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(val, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "table",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_string",
|
|
|
})
|
|
|
}
|
|
@@ -429,10 +442,12 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
|
|
|
text := ju.TrimLRSpace(val, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
"value": text,
|
|
|
"type": "table",
|
|
|
- "field": field,
|
|
|
- "key": tag.Key,
|
|
|
"matchtype": "tag_regexp",
|
|
|
})
|
|
|
}
|
|
@@ -462,17 +477,18 @@ func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[stri
|
|
|
val := text[pos[p]:pos[p+1]]
|
|
|
extinfo[k] = map[string]interface{}{
|
|
|
"field": v.Field,
|
|
|
- "key": v.Code,
|
|
|
- "type": "regexp",
|
|
|
- "matchtype": "regcontent",
|
|
|
+ "code": v.Code,
|
|
|
+ "ruletext": v.RuleText,
|
|
|
"extfrom": extfrom,
|
|
|
"value": val,
|
|
|
+ "type": "regexp",
|
|
|
+ "matchtype": "regcontent",
|
|
|
}
|
|
|
if val != "" {
|
|
|
if j.Result[v.Field] == nil {
|
|
|
j.Result[k] = [](*ju.ExtField){}
|
|
|
}
|
|
|
- j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, "regexp", "regcontent", extfrom, val})
|
|
|
+ j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val})
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -482,16 +498,17 @@ func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[stri
|
|
|
if val != "" {
|
|
|
extinfo[v.Field] = map[string]interface{}{
|
|
|
"field": v.Field,
|
|
|
- "key": v.Code,
|
|
|
- "type": "regexp",
|
|
|
- "matchtype": "regcontent",
|
|
|
+ "code": v.Code,
|
|
|
+ "ruletext": v.RuleText,
|
|
|
"extfrom": extfrom,
|
|
|
"value": val,
|
|
|
+ "type": "regexp",
|
|
|
+ "matchtype": "regcontent",
|
|
|
}
|
|
|
if j.Result[v.Field] == nil {
|
|
|
j.Result[v.Field] = [](*ju.ExtField){}
|
|
|
}
|
|
|
- j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, "regexp", "regcontent", extfrom, val})
|
|
|
+ j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val})
|
|
|
}
|
|
|
}
|
|
|
return extinfo
|
|
@@ -510,7 +527,7 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
|
|
|
if tmps, ok := v.([]map[string]interface{}); ok {
|
|
|
j.Result[k] = [](*ju.ExtField){}
|
|
|
for _, tmp := range tmps {
|
|
|
- j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"]})
|
|
|
+ j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"]})
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -530,7 +547,8 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
|
|
|
j.Result[in.Field][k].Value = text
|
|
|
exts = append(exts, map[string]interface{}{
|
|
|
"field": v.Field,
|
|
|
- "key": v.Key,
|
|
|
+ "code": v.Code,
|
|
|
+ "ruletext": v.RuleText,
|
|
|
"type": v.Type,
|
|
|
"matchtype": v.MatchType,
|
|
|
"extfrom": v.ExtFrom,
|
|
@@ -552,7 +570,8 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
|
|
|
j.Result[key][k].Value = text
|
|
|
exts = append(exts, map[string]interface{}{
|
|
|
"field": v.Field,
|
|
|
- "key": v.Key,
|
|
|
+ "code": v.Code,
|
|
|
+ "ruletext": v.RuleText,
|
|
|
"type": v.Type,
|
|
|
"matchtype": v.MatchType,
|
|
|
"extfrom": v.ExtFrom,
|
|
@@ -578,10 +597,11 @@ func getResultMapForLua(j *ju.Job) map[string][]map[string]interface{} {
|
|
|
for _, v := range val {
|
|
|
tmp := map[string]interface{}{
|
|
|
"field": v.Field,
|
|
|
+ "code": v.Code,
|
|
|
+ "ruletext": v.RuleText,
|
|
|
"value": v.Value,
|
|
|
"type": v.Type,
|
|
|
"matchtype": v.MatchType,
|
|
|
- "key": v.Key,
|
|
|
"extfrom": v.ExtFrom,
|
|
|
}
|
|
|
result[key] = append(result[key], tmp)
|
|
@@ -684,5 +704,10 @@ func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.Ext
|
|
|
task.DB.Update(task.SaveColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
|
|
|
//保存抽取详情
|
|
|
tmp["result"] = result
|
|
|
+ for k, v := range *doc {
|
|
|
+ if tmp[k] == nil { //&& (k != "detail" || k != "contenthtml") {
|
|
|
+ tmp[k] = v
|
|
|
+ }
|
|
|
+ }
|
|
|
db.Mgo.Update("extract_result", `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
|
|
|
}
|