|
@@ -0,0 +1,904 @@
|
|
|
|
+package extract
|
|
|
|
+
|
|
|
|
+import (
|
|
|
|
+ "fmt"
|
|
|
|
+ "jy/clear"
|
|
|
|
+ "jy/pretreated"
|
|
|
|
+ ju "jy/util"
|
|
|
|
+ qu "qfw/util"
|
|
|
|
+ "regexp"
|
|
|
|
+ "strconv"
|
|
|
|
+ "strings"
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+//抽取-规则
|
|
|
|
+func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju.Job, isSite bool) {
|
|
|
|
+ //候选人加入
|
|
|
|
+ var kvMap map[string][]map[string]interface{}
|
|
|
|
+ extByReg := true
|
|
|
|
+ if vc.ExtFrom != "title" {
|
|
|
|
+ kvMap, extByReg = getKvByLuaFields(vc, j, e)
|
|
|
|
+ }
|
|
|
|
+ for _, v := range vc.RuleCores {
|
|
|
|
+ if v.IsLua {
|
|
|
|
+ ExtRuleCoreByKv(vc.ExtFrom, doc, j, v, &kvMap, e)
|
|
|
|
+ } else if extByReg {
|
|
|
|
+ ExtRuleCoreByReg(vc.ExtFrom, doc, j, v, e, isSite)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //如果只有一个分包,预算没有抽取到,把分包中的预算保存到外面
|
|
|
|
+ if vc.Field == "budget" && len(kvMap) == 0 {
|
|
|
|
+ if len(j.BlockPackage) == 1 {
|
|
|
|
+ for _, bp := range j.BlockPackage {
|
|
|
|
+ for fieldname, field := range vc.LFields {
|
|
|
|
+ if field != vc.Field {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ tp := ""
|
|
|
|
+ for k, v := range []*ju.JobKv{bp.ColonKV, bp.SpaceKV, bp.TableKV} {
|
|
|
|
+ if k == 0 {
|
|
|
|
+ tp = "colon"
|
|
|
|
+ } else if k == 1 {
|
|
|
|
+ tp = "space"
|
|
|
|
+ } else if k == 2 {
|
|
|
|
+ tp = "table"
|
|
|
|
+ }
|
|
|
|
+ if v == nil || v.KvTags == nil {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ for _, vv := range v.KvTags[fieldname] {
|
|
|
|
+ text := ju.TrimLRSpace(vv.Value, "")
|
|
|
|
+ if text != "" {
|
|
|
|
+ tmp := &ju.ExtField{
|
|
|
|
+ ExtFrom: "package",
|
|
|
|
+ Field: vc.Field,
|
|
|
|
+ Code: "CL_分包",
|
|
|
|
+ Type: tp,
|
|
|
|
+ MatchType: "package",
|
|
|
|
+ RuleText: bp.Text,
|
|
|
|
+ SourceValue: vv.Key,
|
|
|
|
+ Value: text,
|
|
|
|
+ }
|
|
|
|
+ if isSite {
|
|
|
|
+ tmp.Score = 1
|
|
|
|
+ }
|
|
|
|
+ j.Result[vc.Field] = append(j.Result[vc.Field], tmp)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ for k, v := range kvMap {
|
|
|
|
+ if j.Result[k] == nil {
|
|
|
|
+ j.Result[k] = [](*ju.ExtField){}
|
|
|
|
+ }
|
|
|
|
+ for _, tmp := range v {
|
|
|
|
+ field := &ju.ExtField{Weight: qu.IntAll(tmp["weight"]),
|
|
|
|
+ ExtFrom: qu.ObjToString(tmp["extfrom"]), Field: k,
|
|
|
|
+ Code: qu.ObjToString(tmp["code"]), Type: qu.ObjToString(tmp["type"]),
|
|
|
|
+ MatchType: qu.ObjToString(tmp["matchtype"]),
|
|
|
|
+ RuleText: qu.ObjToString(tmp["ruletext"]),
|
|
|
|
+ SourceValue: tmp["sourcevalue"],
|
|
|
|
+ Value: tmp["value"]}
|
|
|
|
+ if k == "bidamount" && field.ExtFrom == "第一候选人" {
|
|
|
|
+ field.Score = 1
|
|
|
|
+ }
|
|
|
|
+ if isSite {
|
|
|
|
+ field.Score = 1
|
|
|
|
+ }
|
|
|
|
+ if (field.Field == "bidamount" || field.Field == "budget") && field.Type == "table" {
|
|
|
|
+ moneys := clear.ObjToMoney([]interface{}{field.Value, ""}, j.SpiderCode, j.IsClearnMoney)
|
|
|
|
+ if len(moneys) > 0 {
|
|
|
|
+ if vf, ok := moneys[0].(float64); ok {
|
|
|
|
+ field.Value = vf
|
|
|
|
+ field.IsTrue = moneys[len(moneys)-1].(bool)
|
|
|
|
+ } else if vi, ok := moneys[0].(int); ok {
|
|
|
|
+ field.Value = float64(vi)
|
|
|
|
+ field.IsTrue = moneys[len(moneys)-1].(bool)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if tmp["blocktag"] != nil {
|
|
|
|
+ btag := make(map[string]string)
|
|
|
|
+ for k := range tmp["blocktag"].(map[string]bool) {
|
|
|
|
+ blocktag.Lock()
|
|
|
|
+ if TagConfigDesc[k] != "" {
|
|
|
|
+ btag[k] = TagConfigDesc[k]
|
|
|
|
+ }
|
|
|
|
+ blocktag.Unlock()
|
|
|
|
+ }
|
|
|
|
+ field.BlockTag = btag
|
|
|
|
+ }
|
|
|
|
+ j.Result[k] = append(j.Result[k], field)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//抽取-规则-kv
|
|
|
|
+func ExtRuleCoreByKv(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, kvMap *map[string][]map[string]interface{}, et *ExtractTask) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ if extfrom == "title" || !in.IsLua {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
|
|
|
|
+ lua.KvMap = *kvMap
|
|
|
|
+ lua.Block = j.Block
|
|
|
|
+ extinfo := lua.RunScript("core")
|
|
|
|
+ if tmps, ok := extinfo[in.Field].([]map[string]interface{}); ok {
|
|
|
|
+ for _, v := range tmps {
|
|
|
|
+ v["core"] = in.Code
|
|
|
|
+ }
|
|
|
|
+ (*kvMap)[in.Field] = append((*kvMap)[in.Field], tmps...)
|
|
|
|
+ }
|
|
|
|
+ if len(extinfo) > 0 {
|
|
|
|
+ AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//抽取-规则-正则
|
|
|
|
+func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask, isSite bool) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ //根据field配置项目,是否抽取。例如:废标、流标等跳过,
|
|
|
|
+ b := IsExtract(in.Field, j.Title, j.Content)
|
|
|
|
+ if !b {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ //全文正则
|
|
|
|
+ //text := qu.ObjToString(doc[extfrom])
|
|
|
|
+ //if in.Field != "" {
|
|
|
|
+ // extinfo := extRegCoreToResult(extfrom, text, j, in)
|
|
|
|
+ // if len(extinfo) > 0 {
|
|
|
|
+ // AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
|
|
+ // }
|
|
|
|
+ //}
|
|
|
|
+ //块抽取
|
|
|
|
+ if in.Field != "" {
|
|
|
|
+ if extfrom == "title" {
|
|
|
|
+ extinfo := extRegCoreToResult(extfrom, qu.ObjToString(doc[extfrom]), &map[string]string{}, j, in, isSite)
|
|
|
|
+ if len(extinfo) > 0 {
|
|
|
|
+ AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "qualifies" {
|
|
|
|
+ extinfo := extRegCoreToResult(extfrom, pretreated.HtmlToText(qu.ObjToString(doc[extfrom])), &map[string]string{}, j, in, isSite)
|
|
|
|
+ if len(extinfo) > 0 {
|
|
|
|
+ AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ for _, v := range j.Block {
|
|
|
|
+ btag := make(map[string]string)
|
|
|
|
+ for k := range v.Classify {
|
|
|
|
+ blocktag.Lock()
|
|
|
|
+ btag[k] = TagConfigDesc[k]
|
|
|
|
+ blocktag.Unlock()
|
|
|
|
+ }
|
|
|
|
+ extinfo := extRegCoreToResult(extfrom, v.Text, &btag, j, in, isSite)
|
|
|
|
+ if len(extinfo) > 0 {
|
|
|
|
+ AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//pkg抽取-规则-正则
|
|
|
|
+func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ //根据field配置项目,是否抽取。例如:废标、流标等跳过,
|
|
|
|
+ b := IsExtract(in.Field, j.Title, j.Content)
|
|
|
|
+ if !b {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ //块抽取
|
|
|
|
+ if in.Field != "" {
|
|
|
|
+ //临时调试分包抽取字段-
|
|
|
|
+ if in.Field == "bidamount" {
|
|
|
|
+ //log.Debug("分包-调试字段...")
|
|
|
|
+ }
|
|
|
|
+ for k, vbpkg := range j.BlockPackage {
|
|
|
|
+ rep := map[string]string{}
|
|
|
|
+ if in.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
|
|
|
|
+ if in.Field == "budget" && vbpkg.Budget > 0 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "agencyfee" && vbpkg.Agencyfee > 0 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "bidamount" && vbpkg.Bidamount > 0 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "winner" && vbpkg.Winner != "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "bidstatus" && vbpkg.BidStatus != "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "projectname" && vbpkg.Name != "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "winner" && vbpkg.Winner != "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "winnerperson" {
|
|
|
|
+ if vbpkg.Winner == "" || len(vbpkg.Winner) < 4 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if !strings.Contains(vbpkg.Text, vbpkg.Winner) {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "winnertel" {
|
|
|
|
+ if vbpkg.WinnerPerson == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //处理正负数修正
|
|
|
|
+ ptmp := strings.Split(in.RuleText, "#")
|
|
|
|
+ sign := 0
|
|
|
|
+ if len(ptmp) == 2 {
|
|
|
|
+ if ptmp[1] == "正" {
|
|
|
|
+ sign = 1
|
|
|
|
+ } else if ptmp[1] == "负" {
|
|
|
|
+ sign = -1
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ tmp := strings.Split(ptmp[0], "__")
|
|
|
|
+ if len(tmp) == 2 {
|
|
|
|
+ epos := strings.Split(tmp[1], ",")
|
|
|
|
+ posm := map[string]int{}
|
|
|
|
+ for _, v := range epos {
|
|
|
|
+ ks := strings.Split(v, ":")
|
|
|
|
+ if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
|
|
|
|
+ posm[ks[1]] = qu.IntAll(ks[0])
|
|
|
|
+ } else {
|
|
|
|
+ posm[in.Field] = qu.IntAll(ks[0])
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ var pattern string
|
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
|
+ } else {
|
|
|
|
+ pattern = tmp[0]
|
|
|
|
+ }
|
|
|
|
+ //log.Debug("pattern", pattern)
|
|
|
|
+ //fmt.Println(text)
|
|
|
|
+ reg := regexp.MustCompile(pattern)
|
|
|
|
+ apos := reg.FindAllStringSubmatchIndex(vbpkg.Text, -1)
|
|
|
|
+ for i, _ := range apos {
|
|
|
|
+ pos := apos[i]
|
|
|
|
+ for k, p := range posm {
|
|
|
|
+ if len(pos) > p {
|
|
|
|
+ if pos[p] == -1 || pos[p+1] == -1 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ val := vbpkg.Text[pos[p]:pos[p+1]]
|
|
|
|
+ if string(val) == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if sign == -1 {
|
|
|
|
+ rep[k+"_"+fmt.Sprint(i)] = "-" + val
|
|
|
|
+ } else {
|
|
|
|
+ rep[k+"_"+fmt.Sprint(i)] = val
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //fmt.Println(text)
|
|
|
|
+ for i := 0; i < len(apos); i++ {
|
|
|
|
+ if strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]) != "" {
|
|
|
|
+ if in.Field == "budget" && vbpkg.Budget <= 0 {
|
|
|
|
+ lock.Lock()
|
|
|
|
+ cfn := e.ClearFn[in.Field]
|
|
|
|
+ lock.Unlock()
|
|
|
|
+ data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
|
|
|
|
+ if data[len(data)-1].(bool) {
|
|
|
|
+ j.BlockPackage[k].Budget = qu.Float64All(data[0])
|
|
|
|
+ j.BlockPackage[k].IsTrueBudget = true
|
|
|
|
+ }
|
|
|
|
+ break
|
|
|
|
+ } else if in.Field == "agencyfee" && vbpkg.Agencyfee <= 0 {
|
|
|
|
+ lock.Lock()
|
|
|
|
+ cfn := e.ClearFn[in.Field]
|
|
|
|
+ lock.Unlock()
|
|
|
|
+ data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
|
|
|
|
+ if data[len(data)-1].(bool) {
|
|
|
|
+ j.BlockPackage[k].Agencyfee = qu.Float64All(data[0])
|
|
|
|
+ j.BlockPackage[k].IsTrueAgencyfee = true
|
|
|
|
+ }
|
|
|
|
+ break
|
|
|
|
+ } else if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
|
|
|
|
+ lock.Lock()
|
|
|
|
+ cfn := e.ClearFn[in.Field]
|
|
|
|
+ lock.Unlock()
|
|
|
|
+ data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
|
|
|
|
+ if data[len(data)-1].(bool) {
|
|
|
|
+ j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
|
|
|
|
+ j.BlockPackage[k].IsTrueBidamount = true
|
|
|
|
+ }
|
|
|
|
+ break
|
|
|
|
+ } else if in.Field == "winner" {
|
|
|
|
+ if j.BlockPackage[k].Winner == "" {
|
|
|
|
+ j.BlockPackage[k].Winner = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "winnertel" {
|
|
|
|
+ if j.BlockPackage[k].WinnerTel == "" {
|
|
|
|
+ j.BlockPackage[k].WinnerTel = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "winnerperson" {
|
|
|
|
+ if j.BlockPackage[k].WinnerPerson == "" {
|
|
|
|
+ j.BlockPackage[k].WinnerPerson = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "bidstatus" {
|
|
|
|
+ if j.BlockPackage[k].BidStatus == "" {
|
|
|
|
+ j.BlockPackage[k].BidStatus = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "projectname" {
|
|
|
|
+ if j.BlockPackage[k].Name == "" {
|
|
|
|
+ j.BlockPackage[k].Name = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "winnerperson" {
|
|
|
|
+ if j.BlockPackage[k].WinnerPerson == "" {
|
|
|
|
+ j.BlockPackage[k].WinnerPerson = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "winnertel" {
|
|
|
|
+ if j.BlockPackage[k].WinnerTel == "" && j.BlockPackage[k].Winner != "" && j.BlockPackage[k].WinnerPerson != "" {
|
|
|
|
+ j.BlockPackage[k].WinnerTel = rep[in.Field+"_"+fmt.Sprint(i)]
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ pos := in.RegCore.Reg.FindStringIndex(vbpkg.Text)
|
|
|
|
+ val := ""
|
|
|
|
+ if len(pos) == 2 {
|
|
|
|
+ //"text" = "text"[pos[1]:]
|
|
|
|
+ val = "text"[pos[1]:]
|
|
|
|
+ rs := regexp.MustCompile("[^\r\n\t]+")
|
|
|
|
+ tmp := rs.FindAllString("text", -1)
|
|
|
|
+ if len(tmp) > 0 {
|
|
|
|
+ val = tmp[0]
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if val != "" {
|
|
|
|
+ if in.Field == "budget" && vbpkg.Budget <= 0 {
|
|
|
|
+ lock.Lock()
|
|
|
|
+ cfn := e.ClearFn[in.Field]
|
|
|
|
+ lock.Unlock()
|
|
|
|
+ data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
|
|
|
|
+ if data[len(data)-1].(bool) {
|
|
|
|
+ j.BlockPackage[k].Budget = qu.Float64All(data[0])
|
|
|
|
+ j.BlockPackage[k].IsTrueBudget = true
|
|
|
|
+ }
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
|
|
|
|
+ lock.Lock()
|
|
|
|
+ cfn := e.ClearFn[in.Field]
|
|
|
|
+ lock.Unlock()
|
|
|
|
+ data := clear.DoClearFn(cfn, []interface{}{val, j.Content}, j.SpiderCode, j.IsClearnMoney)
|
|
|
|
+ if data[len(data)-1].(bool) {
|
|
|
|
+ j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
|
|
|
|
+ j.BlockPackage[k].IsTrueBidamount = true
|
|
|
|
+ }
|
|
|
|
+ break
|
|
|
|
+ } else if in.Field == "bidstatus" {
|
|
|
|
+ if j.BlockPackage[k].BidStatus == "" {
|
|
|
|
+ j.BlockPackage[k].BidStatus = val
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ } else if in.Field == "projectname" {
|
|
|
|
+ if j.BlockPackage[k].Name == "" {
|
|
|
|
+ j.BlockPackage[k].Name = val
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//lua脚本根据属性设置提取kv值
|
|
|
|
+func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]map[string]interface{}, bool) {
|
|
|
|
+ kvmap := map[string][]map[string]interface{}{}
|
|
|
|
+ if len(j.Winnerorder) > 1 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
|
|
|
|
+ if vc.Field == "bidamount" {
|
|
|
|
+ for k, v := range j.Winnerorder {
|
|
|
|
+ if v["price"] == nil || k != 0 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
|
+ "code": "winnerorder",
|
|
|
|
+ "field": vc.Field,
|
|
|
|
+ "ruletext": "中标候选人_" + fmt.Sprint(v["sortstr"]),
|
|
|
|
+ "extfrom": v["sortstr"],
|
|
|
|
+ "sourcevalue": v["price"],
|
|
|
|
+ "value": v["price"],
|
|
|
|
+ "type": "winnerorder",
|
|
|
|
+ "matchtype": "winnerorder",
|
|
|
|
+ })
|
|
|
|
+ if len(j.Winnerorder) < 4 {
|
|
|
|
+ return kvmap, false
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ //候选人中标金额
|
|
|
|
+ if price := j.Winnerorder[0]["price"]; price != nil {
|
|
|
|
+ kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
|
+ "code": "CL_中标候选人",
|
|
|
|
+ "field": vc.Field,
|
|
|
|
+ "ruletext": "中标候选人",
|
|
|
|
+ "extfrom": j.Winnerorder[0]["sortstr"],
|
|
|
|
+ "sourcevalue": price,
|
|
|
|
+ "value": price,
|
|
|
|
+ "type": "winnerorder",
|
|
|
|
+ "matchtype": "winnerorder",
|
|
|
|
+ })
|
|
|
|
+ if len(j.Winnerorder) < 4 {
|
|
|
|
+ return kvmap, false
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ for fieldname, field := range vc.LFields {
|
|
|
|
+ if field != vc.Field {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ extractFromKv(field, fieldname, j.Block, vc, kvmap, j.Category)
|
|
|
|
+ }
|
|
|
|
+ AddExtLog("extract", j.SourceMid, nil, kvmap, &RegLuaInfo{Field: vc.Field}, et.TaskInfo) //抽取日志
|
|
|
|
+ return kvmap, true
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kvmap map[string][]map[string]interface{}, Category string) {
|
|
|
|
+ //qu.Debug("fieldname+++", fieldname)
|
|
|
|
+ for _, bl := range blocks {
|
|
|
|
+ tp := ""
|
|
|
|
+ if strings.Contains(bl.Title, "保证金") && (field == "bid_bond" || field == "contract_bond") {
|
|
|
|
+ if text := ju.TrimLRSpace(bl.Text, ""); text != "" {
|
|
|
|
+ if Category == "招标" || Category == "拟建" || Category == "预告" {
|
|
|
|
+ kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
|
+ "code": "CL_块内容",
|
|
|
|
+ "field": field,
|
|
|
|
+ "ruletext": "投标保证金",
|
|
|
|
+ "extfrom": "投标保证金_块内容",
|
|
|
|
+ "sourcevalue": bl.Text,
|
|
|
|
+ "value": text,
|
|
|
|
+ "type": "投标保证金_块内容",
|
|
|
|
+ "matchtype": "tag_string",
|
|
|
|
+ "blocktag": bl.Classify,
|
|
|
|
+ "weight": 0,
|
|
|
|
+ })
|
|
|
|
+ } else if Category == "结果" {
|
|
|
|
+ kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
|
+ "code": "CL_",
|
|
|
|
+ "field": field,
|
|
|
|
+ "ruletext": "履约保证金",
|
|
|
|
+ "extfrom": "履约保证金_块内容",
|
|
|
|
+ "sourcevalue": bl.Text,
|
|
|
|
+ "value": text,
|
|
|
|
+ "type": "履约保证金_块内容",
|
|
|
|
+ "matchtype": "tag_string",
|
|
|
|
+ "blocktag": bl.Classify,
|
|
|
|
+ "weight": 0,
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ for k, v := range []*ju.JobKv{bl.ColonKV, bl.SpaceKV, bl.TableKV} {
|
|
|
|
+ if k == 0 {
|
|
|
|
+ tp = "colon"
|
|
|
|
+ } else if k == 1 {
|
|
|
|
+ tp = "space"
|
|
|
|
+ } else if k == 2 {
|
|
|
|
+ tp = "table"
|
|
|
|
+ }
|
|
|
|
+ if v == nil || v.KvTags == nil {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ for _, vv := range v.KvTags[fieldname] {
|
|
|
|
+ text := ju.TrimLRSpace(vv.Value, "")
|
|
|
|
+ if text != "" {
|
|
|
|
+ kvmap[field] = append(kvmap[field], map[string]interface{}{
|
|
|
|
+ "code": "CL_" + vv.Key,
|
|
|
|
+ "field": field,
|
|
|
|
+ "ruletext": vv.Key,
|
|
|
|
+ "extfrom": vc.ExtFrom,
|
|
|
|
+ "sourcevalue": text,
|
|
|
|
+ "value": text,
|
|
|
|
+ "type": tp,
|
|
|
|
+ "matchtype": "tag_string",
|
|
|
|
+ "blocktag": bl.Classify,
|
|
|
|
+ "weight": vv.Weight,
|
|
|
|
+ })
|
|
|
|
+ //if field != "winnertel" && field != "winnerperson" {
|
|
|
|
+ // //break //暂定取第一个
|
|
|
|
+ //}
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(kvmap[field]) == 0 {
|
|
|
|
+ extractFromKv(field, fieldname, bl.Block, vc, kvmap, Category)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//正则提取结果
|
|
|
|
+func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job, vre *RegLuaInfo, isSite bool) map[string][]map[string]interface{} {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ var score float64
|
|
|
|
+ score = vre.Score
|
|
|
|
+ if isSite {
|
|
|
|
+ score = score + 1.0
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ extinfo := map[string][]map[string]interface{}{}
|
|
|
|
+ rep := map[string]string{}
|
|
|
|
+ if vre.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
|
|
|
|
+ //处理正负数修正
|
|
|
|
+ ptmp := strings.Split(vre.RuleText, "#")
|
|
|
|
+ sign := 0
|
|
|
|
+ if len(ptmp) == 2 {
|
|
|
|
+ if ptmp[1] == "正" {
|
|
|
|
+ sign = 1
|
|
|
|
+ } else if ptmp[1] == "负" {
|
|
|
|
+ sign = -1
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ tmp := strings.Split(ptmp[0], "__")
|
|
|
|
+ if len(tmp) == 2 {
|
|
|
|
+ posm := map[string]int{}
|
|
|
|
+ sufVal := ""
|
|
|
|
+ sufArr := strings.Split(tmp[1], "~~")
|
|
|
|
+ if len(sufArr) == 2 { //后缀补
|
|
|
|
+ posm[vre.Field] = qu.IntAll(sufArr[0])
|
|
|
|
+ sufVal = sufArr[1]
|
|
|
|
+ } else {
|
|
|
|
+ epos := strings.Split(tmp[1], ",")
|
|
|
|
+ for _, v := range epos {
|
|
|
|
+ ks := strings.Split(v, ":")
|
|
|
|
+ if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
|
|
|
|
+ posm[ks[1]] = qu.IntAll(ks[0])
|
|
|
|
+ } else {
|
|
|
|
+ posm[vre.Field] = qu.IntAll(ks[0])
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ var pattern string
|
|
|
|
+ if strings.Contains(tmp[0], "\\u") {
|
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
|
|
|
|
+ tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
|
|
|
|
+ pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
|
|
|
|
+ } else {
|
|
|
|
+ pattern = tmp[0]
|
|
|
|
+ }
|
|
|
|
+ //log.Debug("pattern", pattern)
|
|
|
|
+ //fmt.Println(text)
|
|
|
|
+ reg := regexp.MustCompile(pattern)
|
|
|
|
+ apos := reg.FindAllStringSubmatchIndex(text, -1)
|
|
|
|
+ for i, _ := range apos {
|
|
|
|
+ pos := apos[i]
|
|
|
|
+ for k, p := range posm {
|
|
|
|
+ if len(pos) > p {
|
|
|
|
+ if pos[p] == -1 || pos[p+1] == -1 {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ val := text[pos[p]:pos[p+1]]
|
|
|
|
+ if val != "" && sufVal != "" {
|
|
|
|
+ val += sufVal
|
|
|
|
+ }
|
|
|
|
+ if string(val) == "" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if sign == -1 {
|
|
|
|
+ rep[k+"_"+fmt.Sprint(i)] = "-" + val
|
|
|
|
+ } else {
|
|
|
|
+ rep[k+"_"+fmt.Sprint(i)] = val
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ tmps := []map[string]interface{}{}
|
|
|
|
+ for i := 0; i < len(apos); i++ {
|
|
|
|
+ if strings.TrimSpace(rep[vre.Field+"_"+fmt.Sprint(i)]) != "" {
|
|
|
|
+ tmp := map[string]interface{}{
|
|
|
|
+ "field": vre.Field,
|
|
|
|
+ "code": vre.Code,
|
|
|
|
+ "ruletext": vre.RuleText,
|
|
|
|
+ "extfrom": text,
|
|
|
|
+ "value": rep[vre.Field+"_"+fmt.Sprint(i)],
|
|
|
|
+ "type": "regexp",
|
|
|
|
+ "matchtype": "regcontent",
|
|
|
|
+ "blocktag": *tag,
|
|
|
|
+ "score": score,
|
|
|
|
+ }
|
|
|
|
+ exfield := ju.ExtField{
|
|
|
|
+ BlockTag: *tag,
|
|
|
|
+ Field: vre.Field,
|
|
|
|
+ Code: vre.Code,
|
|
|
|
+ RuleText: vre.RuleText,
|
|
|
|
+ Type: "regexp",
|
|
|
|
+ MatchType: "regcontent",
|
|
|
|
+ ExtFrom: extfrom,
|
|
|
|
+ SourceValue: rep[vre.Field+"_"+fmt.Sprint(i)],
|
|
|
|
+ Value: rep[vre.Field+"_"+fmt.Sprint(i)],
|
|
|
|
+ Score: score,
|
|
|
|
+ }
|
|
|
|
+ if vre.Field == "qualifies" {
|
|
|
|
+ if len(rep) >= 2 {
|
|
|
|
+ tmp["ruletext"] = rep[vre.Field+"_key_"+fmt.Sprint(i)]
|
|
|
|
+ exfield.RuleText = rep[vre.Field+"_key_"+fmt.Sprint(i)]
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ tmps = append(tmps, tmp)
|
|
|
|
+ if tmp["blocktag"] != nil {
|
|
|
|
+ exfield.BlockTag = tmp["blocktag"].(map[string]string)
|
|
|
|
+ }
|
|
|
|
+ j.Result[vre.Field] = append(j.Result[vre.Field], &exfield)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(tmps) > 0 {
|
|
|
|
+ //fmt.Println(tmps)
|
|
|
|
+ extinfo[vre.Field] = tmps
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ pos := vre.RegCore.Reg.FindStringIndex(text)
|
|
|
|
+ val := ""
|
|
|
|
+ if len(pos) == 2 {
|
|
|
|
+ text = text[pos[1]:]
|
|
|
|
+ rs := regexp.MustCompile("[^\r\n\t]+")
|
|
|
|
+ tmp := rs.FindAllString(text, -1)
|
|
|
|
+ if len(tmp) > 0 {
|
|
|
|
+ val = tmp[0]
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if val != "" {
|
|
|
|
+ tmps := []map[string]interface{}{}
|
|
|
|
+ tmp := map[string]interface{}{
|
|
|
|
+ "field": vre.Field,
|
|
|
|
+ "code": vre.Code,
|
|
|
|
+ "ruletext": vre.RuleText,
|
|
|
|
+ "extfrom": text,
|
|
|
|
+ "value": val,
|
|
|
|
+ "type": "regexp",
|
|
|
|
+ "matchtype": "regcontent",
|
|
|
|
+ "blocktag": *tag,
|
|
|
|
+ "score": score,
|
|
|
|
+ }
|
|
|
|
+ tmps = append(tmps, tmp)
|
|
|
|
+ extinfo[vre.Field] = tmps
|
|
|
|
+ if j.Result[vre.Field] == nil {
|
|
|
|
+ j.Result[vre.Field] = [](*ju.ExtField){}
|
|
|
|
+ }
|
|
|
|
+ field := &ju.ExtField{BlockTag: *tag, Field: vre.Field, Code: vre.Code, RuleText: vre.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text,
|
|
|
|
+ Value: val,
|
|
|
|
+ Score: score}
|
|
|
|
+ if tmp["blocktag"] != nil {
|
|
|
|
+ field.BlockTag = tmp["blocktag"].(map[string]string)
|
|
|
|
+ }
|
|
|
|
+ j.Result[vre.Field] = append(j.Result[vre.Field], field)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return extinfo
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//后置过滤
|
|
|
|
+func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo, vc *RuleCore) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ if in.IsLua {
|
|
|
|
+ result := GetResultMapForLua(j)
|
|
|
|
+ lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
|
|
|
|
+ if j != nil {
|
|
|
|
+ lua.Block = j.Block
|
|
|
|
+ }
|
|
|
|
+ extinfo := lua.RunScript("back")
|
|
|
|
+ for k, v := range extinfo {
|
|
|
|
+ if tmps, ok := v.([]map[string]interface{}); ok {
|
|
|
|
+ j.Result[k] = [](*ju.ExtField){}
|
|
|
|
+ for _, tmp := range tmps {
|
|
|
|
+ field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]),
|
|
|
|
+ ExtFrom: qu.ObjToString(tmp["extfrom"]),
|
|
|
|
+ Value: tmp["value"]}
|
|
|
|
+ if tmp["blocktag"] != nil {
|
|
|
|
+ field.BlockTag = tmp["blocktag"].(map[string]string)
|
|
|
|
+ }
|
|
|
|
+ j.Result[k] = append(j.Result[k], field)
|
|
|
|
+ //j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if len(extinfo) > 0 {
|
|
|
|
+ AddExtLog("clear", j.SourceMid, result, extinfo, in, t) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ extinfo := map[string]interface{}{}
|
|
|
|
+ if in.Field != "" {
|
|
|
|
+ clearByTitle := false
|
|
|
|
+ if vc != nil && vc.ExtFrom == "title" && in.Field == "buyer" { //buyer从title抽取到的单独走titile的清理
|
|
|
|
+ clearByTitle = true
|
|
|
|
+ }
|
|
|
|
+ if j.Result[in.Field] != nil {
|
|
|
|
+ tmp := j.Result[in.Field]
|
|
|
|
+ exts := []interface{}{}
|
|
|
|
+ for k, v := range tmp {
|
|
|
|
+ if clearByTitle && v.ExtFrom != "title" {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ //table抽取到的数据不清理
|
|
|
|
+ if v.Type == "table" && v.Field == "projectname" {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ text := qu.ObjToString(v.Value)
|
|
|
|
+ if v.Field == "bidamount" || v.Field == "budget" {
|
|
|
|
+ if (strings.Contains(qu.ObjToString(v.SourceValue), "费率") ||
|
|
|
|
+ strings.Contains(qu.ObjToString(v.SourceValue), "税率") ||
|
|
|
|
+ strings.Contains(qu.ObjToString(v.SourceValue), "(%)")) &&
|
|
|
|
+ !strings.Contains(qu.ObjToString(v.SourceValue), "工程设计费") &&
|
|
|
|
+ !strings.Contains(qu.ObjToString(v.SourceValue), "含税总价") {
|
|
|
|
+ j.Result[in.Field][k].IsTrue = false
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if text != "" {
|
|
|
|
+ text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
|
|
|
|
+ }
|
|
|
|
+ if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ j.Result[in.Field][k].Value = text
|
|
|
|
+ exts = append(exts, map[string]interface{}{
|
|
|
|
+ "field": v.Field,
|
|
|
|
+ "code": v.Code,
|
|
|
|
+ "ruletext": v.RuleText,
|
|
|
|
+ "type": v.Type,
|
|
|
|
+ "matchtype": v.MatchType,
|
|
|
|
+ "extfrom": v.ExtFrom,
|
|
|
|
+ "value": text,
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+ if len(exts) > 0 {
|
|
|
|
+ extinfo[in.Field] = exts
|
|
|
|
+ AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ for key, tmp := range j.Result {
|
|
|
|
+ exts := []interface{}{}
|
|
|
|
+ for k, v := range tmp {
|
|
|
|
+ //table抽取到的数据不清理
|
|
|
|
+ if v.Type == "table" && v.Field == "projectname" {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ text := qu.ObjToString(v.Value)
|
|
|
|
+ if text != "" {
|
|
|
|
+ text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
|
|
|
|
+ }
|
|
|
|
+ if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ j.Result[key][k].Value = text
|
|
|
|
+ exts = append(exts, map[string]interface{}{
|
|
|
|
+ "field": v.Field,
|
|
|
|
+ "code": v.Code,
|
|
|
|
+ "ruletext": v.RuleText,
|
|
|
|
+ "type": v.Type,
|
|
|
|
+ "matchtype": v.MatchType,
|
|
|
|
+ "extfrom": v.ExtFrom,
|
|
|
|
+ "value": text,
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+ if len(exts) > 0 {
|
|
|
|
+ extinfo[key] = exts
|
|
|
|
+ AddExtLog("clear", j.SourceMid, j.Result, extinfo, in, t) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//后置过滤
|
|
|
|
+func ExtRegBackPkg(j *ju.Job, in *RegLuaInfo) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ for k, v := range j.BlockPackage {
|
|
|
|
+ if in.Field == "winner" {
|
|
|
|
+ j.BlockPackage[k].Winner = in.RegPreBac.Reg.ReplaceAllString(v.Winner, in.RegPreBac.Replace)
|
|
|
|
+ } else if in.Field == "bidstatus" {
|
|
|
|
+ j.BlockPackage[k].BidStatus = in.RegPreBac.Reg.ReplaceAllString(v.BidStatus, in.RegPreBac.Replace)
|
|
|
|
+ } else if in.Field == "" {
|
|
|
|
+ j.BlockPackage[k].Text = in.RegPreBac.Reg.ReplaceAllString(v.Text, in.RegPreBac.Replace)
|
|
|
|
+ } else if in.Field == "projectname" {
|
|
|
|
+ j.BlockPackage[k].Name = in.RegPreBac.Reg.ReplaceAllString(v.Name, in.RegPreBac.Replace)
|
|
|
|
+ } else if in.Field == "winnerperson" {
|
|
|
|
+ j.BlockPackage[k].WinnerPerson = in.RegPreBac.Reg.ReplaceAllString(v.WinnerPerson, in.RegPreBac.Replace)
|
|
|
|
+ } else if in.Field == "winnertel" {
|
|
|
|
+ j.BlockPackage[k].WinnerTel = in.RegPreBac.Reg.ReplaceAllString(v.WinnerTel, in.RegPreBac.Replace)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//KV过滤
|
|
|
|
+func ExtRuleKV(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ extinfo := map[string]interface{}{}
|
|
|
|
+ if in.Field != "" {
|
|
|
|
+ if j.Result[in.Field] != nil {
|
|
|
|
+ tmp := j.Result[in.Field]
|
|
|
|
+ exts := []interface{}{}
|
|
|
|
+ for k, v := range tmp {
|
|
|
|
+ if v.Type != "table" && !strings.Contains(v.Type, "colon") && !strings.Contains(v.Type, "space") {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if v.Field == "docendtime" {
|
|
|
|
+ //log.Debug("调试字段...")
|
|
|
|
+ }
|
|
|
|
+ text := qu.ObjToString(v.Value)
|
|
|
|
+ if text != "" {
|
|
|
|
+ text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
|
|
|
|
+ }
|
|
|
|
+ if text == qu.ObjToString(v.Value) { //值未发生改变,不存日志
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ j.Result[in.Field][k].Value = text
|
|
|
|
+ exts = append(exts, map[string]interface{}{
|
|
|
|
+ "field": v.Field,
|
|
|
|
+ "code": v.Code,
|
|
|
|
+ "ruletext": v.RuleText,
|
|
|
|
+ "type": v.Type,
|
|
|
|
+ "matchtype": v.MatchType,
|
|
|
|
+ "extfrom": v.ExtFrom,
|
|
|
|
+ "value": text,
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+ if len(exts) > 0 {
|
|
|
|
+ extinfo[in.Field] = exts
|
|
|
|
+ AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+//前置过滤
|
|
|
|
+func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInfo) map[string]interface{} {
|
|
|
|
+ defer qu.Catch()
|
|
|
|
+ before := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
|
+ extinfo := map[string]interface{}{}
|
|
|
|
+ if in.IsLua {
|
|
|
|
+ lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
|
|
|
|
+ if j != nil {
|
|
|
|
+ lua.Block = j.Block
|
|
|
|
+ }
|
|
|
|
+ extinfo = lua.RunScript("pre")
|
|
|
|
+ for k, v := range extinfo { //结果覆盖原doc
|
|
|
|
+ doc[k] = v
|
|
|
|
+ }
|
|
|
|
+ AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
|
|
|
|
+ } else {
|
|
|
|
+ var key string
|
|
|
|
+ if !j.IsFile {
|
|
|
|
+ key = qu.If(in.Field == "", "detail", in.Field).(string)
|
|
|
|
+ } else {
|
|
|
|
+ key = qu.If(in.Field == "", "detailfile", in.Field).(string)
|
|
|
|
+ }
|
|
|
|
+ text := qu.ObjToString(doc[key])
|
|
|
|
+ extinfo[key] = in.RegPreBac.Reg.ReplaceAllString(text, "")
|
|
|
|
+ doc[key] = extinfo[key] //结果覆盖原doc
|
|
|
|
+ AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
|
|
|
|
+ }
|
|
|
|
+ return doc
|
|
|
|
+}
|