|
@@ -1,6 +1,7 @@
|
|
|
package extract
|
|
|
|
|
|
import (
|
|
|
+ "encoding/json"
|
|
|
//"encoding/json"
|
|
|
"fmt"
|
|
|
"jy/clear"
|
|
@@ -195,6 +196,12 @@ func PreInfo(doc map[string]interface{}) *ju.Job {
|
|
|
|
|
|
//抽取
|
|
|
func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
+ if 1 == 2 {
|
|
|
+ for k, v := range j.Block {
|
|
|
+ bs, _ := json.Marshal(v.TableKV)
|
|
|
+ log.Println("Block TableKV", k, string(bs))
|
|
|
+ }
|
|
|
+ }
|
|
|
qu.Catch()
|
|
|
qu.Try(func() {
|
|
|
doc := *j.Data
|
|
@@ -241,8 +248,8 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
v.Value = data[0]
|
|
|
}
|
|
|
}
|
|
|
- //bs, _ := json.Marshal(j.Result)
|
|
|
- //log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
+ bs, _ := json.Marshal(j.Result)
|
|
|
+ log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
|
|
|
//抽取省份城市县
|
|
|
|
|
@@ -287,11 +294,7 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
|
|
|
func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
|
|
|
if in.IsLua {
|
|
|
lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
|
|
|
- if in.IsHasFields { //lua脚本配置有属性字段
|
|
|
- lua.KvMap = getKvByLuaFields(extfrom, j, in, et.Tag)
|
|
|
- } else {
|
|
|
- lua.KvMap = map[string][]map[string]interface{}{}
|
|
|
- }
|
|
|
+ lua.KvMap = getKvByLuaFields(extfrom, j, in, et.Tag)
|
|
|
lua.Block = j.Block
|
|
|
extinfo := lua.RunScript("core")
|
|
|
for k, v := range extinfo {
|
|
@@ -323,14 +326,15 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
|
|
|
//lua脚本根据属性设置提取kv值
|
|
|
func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} {
|
|
|
kvmap := map[string][]map[string]interface{}{}
|
|
|
- for _, vv := range in.LFields {
|
|
|
- field := qu.ObjToString(vv)
|
|
|
- tags := t[qu.ObjToString(vv)] //获取对应标签库
|
|
|
+ for fieldname, field := range in.LFields {
|
|
|
+ tags := t[field] //获取对应标签库
|
|
|
for _, bl := range j.Block {
|
|
|
//冒号kv
|
|
|
if bl.ColonKV != nil {
|
|
|
kvs := bl.ColonKV.Kvs
|
|
|
kvs2 := bl.ColonKV.Kvs_2
|
|
|
+ //log.Println("ColonKV1", kvs)
|
|
|
+ //log.Println("ColonKV2", kvs2)
|
|
|
for _, tag := range tags {
|
|
|
for _, kv := range kvs {
|
|
|
if tag.Type == "string" {
|
|
@@ -407,6 +411,7 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
//空格kv
|
|
|
if bl.SpaceKV != nil {
|
|
|
kvs := bl.SpaceKV.Kvs
|
|
|
+ //log.Println("SpaceKV", kvs)
|
|
|
for _, tag := range tags {
|
|
|
for _, kv := range kvs {
|
|
|
if tag.Type == "string" {
|
|
@@ -447,44 +452,61 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
|
|
|
}
|
|
|
//表格kv
|
|
|
if bl.TableKV != nil {
|
|
|
- kv := bl.TableKV.Kv
|
|
|
- for _, tag := range tags {
|
|
|
- for k, val := range kv {
|
|
|
- if tag.Type == "string" {
|
|
|
- if k == tag.Key {
|
|
|
- text := ju.TrimLRSpace(val, "")
|
|
|
- if text != "" {
|
|
|
- kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "table",
|
|
|
- "matchtype": "tag_string",
|
|
|
- })
|
|
|
- }
|
|
|
- break
|
|
|
- }
|
|
|
- } else if tag.Type == "regexp" {
|
|
|
- if tag.Reg.MatchString(k) {
|
|
|
- text := ju.TrimLRSpace(val, "")
|
|
|
- if text != "" {
|
|
|
- kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
- "field": field,
|
|
|
- "code": in.Code,
|
|
|
- "ruletext": tag.Key,
|
|
|
- "extfrom": extfrom,
|
|
|
- "value": text,
|
|
|
- "type": "table",
|
|
|
- "matchtype": "tag_regexp",
|
|
|
- })
|
|
|
- }
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
+ tkv := bl.TableKV
|
|
|
+ //log.Println("tkv", tkv)
|
|
|
+ for k, v := range tkv.Kv {
|
|
|
+ if k == fieldname {
|
|
|
+ //log.Println(k, v, tags[-tkv.KvIndex[fieldname]].Key)
|
|
|
+ kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ "field": field,
|
|
|
+ "code": in.Code,
|
|
|
+ "ruletext": tags[-tkv.KvIndex[fieldname]].Key,
|
|
|
+ "extfrom": "table",
|
|
|
+ "value": v,
|
|
|
+ "type": "table",
|
|
|
+ "matchtype": "tag_string",
|
|
|
+ })
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // kv := bl.TableKV.Kv
|
|
|
+ // for _, tag := range tags {
|
|
|
+ // for k, val := range kv {
|
|
|
+ // if tag.Type == "string" {
|
|
|
+ // if k == tag.Key {
|
|
|
+ // text := ju.TrimLRSpace(val, "")
|
|
|
+ // if text != "" {
|
|
|
+ // kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ // "field": field,
|
|
|
+ // "code": in.Code,
|
|
|
+ // "ruletext": tag.Key,
|
|
|
+ // "extfrom": extfrom,
|
|
|
+ // "value": text,
|
|
|
+ // "type": "table",
|
|
|
+ // "matchtype": "tag_string",
|
|
|
+ // })
|
|
|
+ // }
|
|
|
+ // break
|
|
|
+ // }
|
|
|
+ // } else if tag.Type == "regexp" {
|
|
|
+ // if tag.Reg.MatchString(k) {
|
|
|
+ // text := ju.TrimLRSpace(val, "")
|
|
|
+ // if text != "" {
|
|
|
+ // kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
+ // "field": field,
|
|
|
+ // "code": in.Code,
|
|
|
+ // "ruletext": tag.Key,
|
|
|
+ // "extfrom": extfrom,
|
|
|
+ // "value": text,
|
|
|
+ // "type": "table",
|
|
|
+ // "matchtype": "tag_regexp",
|
|
|
+ // })
|
|
|
+ // }
|
|
|
+ // break
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
}
|
|
|
}
|
|
|
}
|