|
@@ -22,7 +22,8 @@ import (
|
|
|
)
|
|
|
|
|
|
var (
|
|
|
- lock sync.RWMutex
|
|
|
+ lock, lockrule, lockclear, locksave sync.RWMutex
|
|
|
+
|
|
|
cut = ju.NewCut() //获取正文并清理
|
|
|
ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
TaskList map[string]*ExtractTask //任务列表
|
|
@@ -243,7 +244,10 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
|
|
|
toptype = "结果"
|
|
|
}
|
|
|
if toptype == "" {
|
|
|
- toptype = "*"
|
|
|
+ toptype = "all"
|
|
|
+ }
|
|
|
+ if subtype == "" {
|
|
|
+ subtype = "all"
|
|
|
}
|
|
|
j = &ju.Job{
|
|
|
SourceMid: qu.BsonIdToSId(doc["_id"]),
|
|
@@ -340,106 +344,64 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
//for _, v := range e.RulePres {
|
|
|
// doc = ExtRegPre(doc, j, v, e.TaskInfo)
|
|
|
//}
|
|
|
- if j.CategorySecond == "" {
|
|
|
- //抽取规则
|
|
|
- tmprules := map[string][]*RuleCore{}
|
|
|
- lock.Lock()
|
|
|
- if e.RuleCores[j.Category] == nil {
|
|
|
- j.Category = "*_其他"
|
|
|
+ tmprules := map[string][]*RuleCore{}
|
|
|
+ lockrule.Lock()
|
|
|
+ if j.Category == "all" || j.CategorySecond == "all" {
|
|
|
+ for k, vc1 := range e.RuleCores["all_all"] {
|
|
|
+ tmprules[k] = vc1
|
|
|
}
|
|
|
- for k, vc1 := range e.RuleCores[j.Category] {
|
|
|
+ } else {
|
|
|
+ for k, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
|
|
|
tmprules[k] = vc1
|
|
|
}
|
|
|
- lock.Unlock()
|
|
|
- for _, vc1 := range tmprules {
|
|
|
- for _, vc := range vc1 {
|
|
|
- tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
- //是否进入逻辑
|
|
|
- if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
- continue
|
|
|
- }
|
|
|
- ////抽取-前置规则
|
|
|
- //for _, v := range vc.RulePres {
|
|
|
- // tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
- //}
|
|
|
- // log.Debug("抽取-前置规则", tmp)
|
|
|
-
|
|
|
- //抽取-规则
|
|
|
- for _, v := range vc.RuleCores {
|
|
|
- ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
- }
|
|
|
- // log.Debug("抽取-规则", tmp)
|
|
|
-
|
|
|
- //项目名称未能抽取到,标题来凑
|
|
|
- if vc.Field == "projectname" {
|
|
|
- if len(j.Result[vc.Field]) < 1 {
|
|
|
- items := make([]*ju.ScoreItem, 1)
|
|
|
- items[0] = &ju.ScoreItem{Des: "项目名称未能抽取到,标题来凑初始化", ExtFrom: "title", Value: j.Title, Score: 4}
|
|
|
- field := &ju.ExtField{Field: vc.Field, Code: "title", RuleText: "title", Type: "regexp", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title, Score: 4, ScoreItem: items}
|
|
|
- if tmp["blocktag"] != nil {
|
|
|
- field.BlockTag = tmp["blocktag"].(map[string]bool)
|
|
|
- }
|
|
|
- j.Result[vc.Field] = append(j.Result[vc.Field], field)
|
|
|
- //j.Result[vc.Field] = append(j.Result[vc.Field], &ju.ExtField{tmp["blocktag"].(map[string]bool), vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, j.Title, 0})
|
|
|
- }
|
|
|
- }
|
|
|
+ }
|
|
|
+ if len(tmprules) < 1 { //分类未覆盖部分
|
|
|
+ for k, vc1 := range e.RuleCores["all_all"] {
|
|
|
+ tmprules[k] = vc1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ lockrule.Unlock()
|
|
|
|
|
|
- //抽取-后置规则
|
|
|
- for _, v := range vc.RuleBacks {
|
|
|
- ExtRegBack(j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- // log.Debug("抽取-后置规则", tmp)
|
|
|
+ //抽取规则
|
|
|
+ for _, vc1 := range tmprules {
|
|
|
+ for _, vc := range vc1 {
|
|
|
+ tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
+ //是否进入逻辑
|
|
|
+ if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
+ continue
|
|
|
}
|
|
|
- }
|
|
|
- } else {
|
|
|
- var cores map[string][]*RuleCore
|
|
|
- if e.RuleCores[j.Category+"_"+j.CategorySecond] == nil {
|
|
|
- cores = e.RuleCores["*_其他"]
|
|
|
- } else {
|
|
|
- cores = e.RuleCores[j.Category+"_"+j.CategorySecond]
|
|
|
- }
|
|
|
- for _, vc1 := range cores {
|
|
|
- for _, vc := range vc1 {
|
|
|
- tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
- //是否进入逻辑
|
|
|
- if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
- continue
|
|
|
- }
|
|
|
- //抽取-前置规则
|
|
|
- for _, v := range vc.RulePres {
|
|
|
- tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- // log.Debug("抽取-前置规则", tmp)
|
|
|
+ ////抽取-前置规则
|
|
|
+ //for _, v := range vc.RulePres {
|
|
|
+ // tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
+ //}
|
|
|
+ // log.Debug("抽取-前置规则", tmp)
|
|
|
|
|
|
- //抽取-规则
|
|
|
- for _, v := range vc.RuleCores {
|
|
|
- ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
- }
|
|
|
- // log.Debug("抽取-规则", tmp)
|
|
|
+ //抽取-规则
|
|
|
+ for _, v := range vc.RuleCores {
|
|
|
+ ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
+ }
|
|
|
+ // log.Debug("抽取-规则", tmp)
|
|
|
|
|
|
- //项目名称未能抽取到,标题来凑
|
|
|
- if vc.Field == "projectname" {
|
|
|
- items := make([]*ju.ScoreItem, 1)
|
|
|
- items[0] = &ju.ScoreItem{Des: "项目名称未能抽取到,标题来凑初始化", ExtFrom: "title", Value: j.Title, Score: 4}
|
|
|
- field := &ju.ExtField{Field: vc.Field, Code: "title", RuleText: "title", Type: "regexp", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title, Score: 4, ScoreItem: items}
|
|
|
- if len(j.Result[vc.Field]) < 1 {
|
|
|
- if tmp["blocktag"] != nil {
|
|
|
- field.BlockTag = tmp["blocktag"].(map[string]bool)
|
|
|
- }
|
|
|
- j.Result[vc.Field] = append(j.Result[vc.Field], field)
|
|
|
- //j.Result[vc.Field] = append(j.Result[vc.Field], &ju.ExtField{tmp["blocktag"].(map[string]bool), vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, j.Title, 0})
|
|
|
- }
|
|
|
+ //项目名称未能抽取到,标题来凑
|
|
|
+ if vc.Field == "projectname" {
|
|
|
+ //if len(j.Result[vc.Field]) < 1 {//如果抽取有结果,不走标题。待验证,暂时标题加入选举逻辑
|
|
|
+ items := make([]*ju.ScoreItem, 1)
|
|
|
+ items[0] = &ju.ScoreItem{Des: "项目名称未能抽取到,标题来凑初始化", ExtFrom: "title", Value: j.Title, Score: 4}
|
|
|
+ field := &ju.ExtField{Field: vc.Field, Code: "title", RuleText: "title", Type: "title", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title, Score: 4, ScoreItem: items}
|
|
|
+ if tmp["blocktag"] != nil {
|
|
|
+ field.BlockTag = tmp["blocktag"].(map[string]bool)
|
|
|
}
|
|
|
+ j.Result[vc.Field] = append(j.Result[vc.Field], field)
|
|
|
+ //}
|
|
|
+ }
|
|
|
|
|
|
- //抽取-后置规则
|
|
|
- for _, v := range vc.RuleBacks {
|
|
|
- ExtRegBack(j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- // log.Debug("抽取-后置规则", tmp)
|
|
|
+ //抽取-后置规则
|
|
|
+ for _, v := range vc.RuleBacks {
|
|
|
+ ExtRegBack(j, v, e.TaskInfo)
|
|
|
}
|
|
|
+ // log.Debug("抽取-后置规则", tmp)
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
//全局后置规则
|
|
|
for _, v := range e.RuleBacks {
|
|
|
ExtRegBack(j, v, e.TaskInfo)
|
|
@@ -471,13 +433,13 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
//函数清理
|
|
|
for key, val := range j.Result {
|
|
|
for _, v := range val {
|
|
|
- lock.Lock()
|
|
|
+ lockclear.Lock()
|
|
|
cfn := e.ClearFn[key]
|
|
|
- lock.Unlock()
|
|
|
+ lockclear.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
|
|
|
v.Value = data[0]
|
|
|
//清理特殊符号
|
|
|
- lock.Lock()
|
|
|
+ lockclear.Lock()
|
|
|
if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
clear.MesField[key] != nil {
|
|
|
text := qu.ObjToString(v.Value)
|
|
@@ -486,7 +448,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
v.Value = text
|
|
|
}
|
|
|
}
|
|
|
- lock.Unlock()
|
|
|
+ lockclear.Unlock()
|
|
|
}
|
|
|
}
|
|
|
PackageDetail(j, e) //处理分包信息
|
|
@@ -500,77 +462,54 @@ func (e *ExtractTask) ExtractFile(j *ju.Job) {
|
|
|
qu.Try(func() {
|
|
|
doc := *j.Data
|
|
|
//全局前置规则,结果覆盖doc属性
|
|
|
- for _, v := range e.RulePres {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- doc = ExtRegPre(doc, j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- }
|
|
|
+ // for _, v := range e.RulePres {
|
|
|
+ // if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
+ // doc = ExtRegPre(doc, j, v, e.TaskInfo)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
//抽取规则
|
|
|
- if j.CategorySecond == "" {
|
|
|
- for _, vc1 := range e.RuleCores[j.Category] {
|
|
|
- for _, vc := range vc1 {
|
|
|
- tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
- //是否进入逻辑
|
|
|
- if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
- continue
|
|
|
- }
|
|
|
- //抽取-前置规则
|
|
|
- for _, v := range vc.RulePres {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- }
|
|
|
- // log.Debug("抽取-前置规则", tmp)
|
|
|
-
|
|
|
- //抽取-规则
|
|
|
- for _, v := range vc.RuleCores {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
- }
|
|
|
- }
|
|
|
- // log.Debug("抽取-规则", tmp)
|
|
|
-
|
|
|
- //抽取-后置规则
|
|
|
- for _, v := range vc.RuleBacks {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- ExtRegBack(j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- }
|
|
|
- // log.Debug("抽取-后置规则", tmp)
|
|
|
- }
|
|
|
+ tmprules := map[string][]*RuleCore{}
|
|
|
+ lockrule.Lock()
|
|
|
+ if j.Category == "all" || j.CategorySecond == "all" {
|
|
|
+ for k, vc1 := range e.RuleCores["all_all"] {
|
|
|
+ tmprules[k] = vc1
|
|
|
}
|
|
|
} else {
|
|
|
- for _, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
|
|
|
- for _, vc := range vc1 {
|
|
|
- tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
- //是否进入逻辑
|
|
|
- if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
- continue
|
|
|
- }
|
|
|
- //抽取-前置规则
|
|
|
- for _, v := range vc.RulePres {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
- }
|
|
|
- }
|
|
|
- // log.Debug("抽取-前置规则", tmp)
|
|
|
+ for k, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
|
|
|
+ tmprules[k] = vc1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ lockrule.Unlock()
|
|
|
+ for _, vc1 := range tmprules {
|
|
|
+ for _, vc := range vc1 {
|
|
|
+ tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
+ //是否进入逻辑
|
|
|
+ if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ //抽取-前置规则
|
|
|
+ // for _, v := range vc.RulePres {
|
|
|
+ // if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
+ // tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // log.Debug("抽取-前置规则", tmp)
|
|
|
|
|
|
- //抽取-规则
|
|
|
- for _, v := range vc.RuleCores {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
- }
|
|
|
+ //抽取-规则
|
|
|
+ for _, v := range vc.RuleCores {
|
|
|
+ if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
+ ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
}
|
|
|
- // log.Debug("抽取-规则", tmp)
|
|
|
+ }
|
|
|
+ // log.Debug("抽取-规则", tmp)
|
|
|
|
|
|
- //抽取-后置规则
|
|
|
- for _, v := range vc.RuleBacks {
|
|
|
- if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
- ExtRegBack(j, v, e.TaskInfo)
|
|
|
- }
|
|
|
+ //抽取-后置规则
|
|
|
+ for _, v := range vc.RuleBacks {
|
|
|
+ if value, ok := e.FileFields.Load(v.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
+ ExtRegBack(j, v, e.TaskInfo)
|
|
|
}
|
|
|
- // log.Debug("抽取-后置规则", tmp)
|
|
|
}
|
|
|
+ // log.Debug("抽取-后置规则", tmp)
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -607,20 +546,20 @@ func (e *ExtractTask) ExtractFile(j *ju.Job) {
|
|
|
//函数清理
|
|
|
for key, val := range j.Result {
|
|
|
for _, v := range val {
|
|
|
- lock.Lock()
|
|
|
+ lockclear.Lock()
|
|
|
cfn := e.ClearFn[key]
|
|
|
- lock.Unlock()
|
|
|
+ lockclear.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
|
|
|
v.Value = data[0]
|
|
|
//清理特殊符号
|
|
|
- lock.Lock()
|
|
|
+ lockclear.Lock()
|
|
|
if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
clear.MesField[key] != nil {
|
|
|
text := qu.ObjToString(v.Value)
|
|
|
text = clear.OtherClean(key, text)
|
|
|
v.Value = text
|
|
|
}
|
|
|
- lock.Unlock()
|
|
|
+ lockclear.Unlock()
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -682,7 +621,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
|
|
|
}
|
|
|
if tmps, ok := v.([]map[string]interface{}); ok {
|
|
|
for _, tmp := range tmps {
|
|
|
- field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]), ExtFrom: extfrom, Value: tmp["value"]}
|
|
|
+ field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]), ExtFrom: extfrom, SourceValue: tmp["sourcevalue"], Value: tmp["value"]}
|
|
|
if extfrom == "title" {
|
|
|
field.Score = 4
|
|
|
}
|
|
@@ -761,14 +700,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "colon1",
|
|
|
- "matchtype": "tag_string",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
+ "sourcevalue": text,
|
|
|
+ "value": text,
|
|
|
+ "type": "colon1",
|
|
|
+ "matchtype": "tag_string",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
}
|
|
|
break
|
|
@@ -778,14 +718,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "colon1",
|
|
|
- "matchtype": "tag_regexp",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
+ "sourcevalue": text,
|
|
|
+ "value": text,
|
|
|
+ "type": "colon1",
|
|
|
+ "matchtype": "tag_regexp",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
}
|
|
|
break
|
|
@@ -798,14 +739,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "colon2",
|
|
|
- "matchtype": "tag_string",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
+ "sourcevalue": text,
|
|
|
+ "value": text,
|
|
|
+ "type": "colon2",
|
|
|
+ "matchtype": "tag_string",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
}
|
|
|
break
|
|
@@ -815,14 +757,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "colon2",
|
|
|
- "matchtype": "tag_regexp",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
+ "sourcevalue": text,
|
|
|
+ "value": text,
|
|
|
+ "type": "colon2",
|
|
|
+ "matchtype": "tag_regexp",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
}
|
|
|
break
|
|
@@ -832,7 +775,7 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
}
|
|
|
}
|
|
|
//空格kv
|
|
|
- if bl.SpaceKV != nil {
|
|
|
+ if bl.SpaceKV != nil {
|
|
|
kvs := bl.SpaceKV.Kvs
|
|
|
// log.Debug("SpaceKV", kvs)
|
|
|
for _, tag := range tags {
|
|
@@ -842,14 +785,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "space",
|
|
|
- "matchtype": "tag_string",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
+ "sourcevalue": text,
|
|
|
+ "value": text,
|
|
|
+ "type": "space",
|
|
|
+ "matchtype": "tag_string",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
}
|
|
|
break
|
|
@@ -859,14 +803,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
text := ju.TrimLRSpace(kv.Value, "")
|
|
|
if text != "" {
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "space",
|
|
|
- "matchtype": "tag_regexp",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tag.Key,
|
|
|
+ "extfrom": extfrom,
|
|
|
+ "sourcevalue": text,
|
|
|
+ "value": text,
|
|
|
+ "type": "space",
|
|
|
+ "matchtype": "tag_regexp",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
}
|
|
|
break
|
|
@@ -876,7 +821,7 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
}
|
|
|
}
|
|
|
//表格kv
|
|
|
- if bl.TableKV != nil {
|
|
|
+ if bl.TableKV != nil {
|
|
|
tkv := bl.TableKV
|
|
|
// log.Debug("tkv", tkv)
|
|
|
for k, v := range tkv.Kv {
|
|
@@ -889,14 +834,15 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
ruletext = tags[-tkv.KvIndex[fieldname]].Key
|
|
|
}
|
|
|
kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": ruletext,
|
|
|
- "extfrom": "table",
|
|
|
- "value": v,
|
|
|
- "type": "table",
|
|
|
- "matchtype": "tag_string",
|
|
|
- "blocktag": bl.Tag,
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": ruletext,
|
|
|
+ "extfrom": "table",
|
|
|
+ "sourcevalue": v,
|
|
|
+ "value": v,
|
|
|
+ "type": "table",
|
|
|
+ "matchtype": "tag_string",
|
|
|
+ "blocktag": bl.Tag,
|
|
|
})
|
|
|
} else { //涉及其他待处理
|
|
|
// log.Debug(tags)
|
|
@@ -946,7 +892,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
|
|
|
if v.RegCore.NumSign == -1 { //正负值修正
|
|
|
val = "-" + val
|
|
|
}
|
|
|
- exfield := ju.ExtField{BlockTag: *tag, Field: k, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, Value: val}
|
|
|
+ exfield := ju.ExtField{BlockTag: *tag, Field: k, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: val, Value: val}
|
|
|
if extfrom == "title" {
|
|
|
exfield.Score = 4
|
|
|
}
|
|
@@ -957,9 +903,9 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
|
|
|
if extfrom == "title" {
|
|
|
item.Score = 4
|
|
|
}
|
|
|
- if strings.Contains(val,"\n") {
|
|
|
- item.Score -=1
|
|
|
- exfield.Score-=1
|
|
|
+ if strings.Contains(val, "\n") {
|
|
|
+ item.Score -= 1
|
|
|
+ exfield.Score -= 1
|
|
|
}
|
|
|
if tmp["scoreitem"] == nil {
|
|
|
sitems := make([]*ju.ScoreItem, 0)
|
|
@@ -1002,7 +948,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
|
|
|
if j.Result[v.Field] == nil {
|
|
|
j.Result[v.Field] = [](*ju.ExtField){}
|
|
|
}
|
|
|
- field := &ju.ExtField{BlockTag: *tag, Field: v.Field, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, Value: val}
|
|
|
+ field := &ju.ExtField{BlockTag: *tag, Field: v.Field, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: val, Value: val}
|
|
|
if extfrom == "title" {
|
|
|
field.Score = 4
|
|
|
}
|