Quellcode durchsuchen

块tag中文解释

fengweiqiang vor 6 Jahren
Ursprung
Commit
70d841b8f6
5 geänderte Dateien mit 53 neuen und 12 gelöschten Zeilen
  1. 21 9
      src/jy/extract/extract.go
  2. 2 0
      src/jy/extract/score.go
  3. 1 1
      src/jy/util/article.go
  4. 2 2
      src/res/fieldscore.json
  5. 27 0
      src/res/tagscoredesc.json

+ 21 - 9
src/jy/extract/extract.go

@@ -387,7 +387,11 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 					//if len(j.Result[vc.Field]) < 1 {//如果抽取有结果,不走标题。待验证,暂时标题加入选举逻辑
 					field := &ju.ExtField{Field: vc.Field, Code: "title", RuleText: "title", Type: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title}
 					if tmp["blocktag"] != nil {
-						field.BlockTag = tmp["blocktag"].(map[string]bool)
+						btag:= make(map[string]string)
+						for k := range tmp["blocktag"].(map[string]bool){
+							btag[k] = TagConfigDesc[k]
+						}
+						field.BlockTag = btag
 					}
 					j.Result[vc.Field] = append(j.Result[vc.Field], field)
 					//}
@@ -621,7 +625,11 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
 					for _, tmp := range tmps {
 						field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), SourceValue: tmp["sourcevalue"], Value: tmp["value"]}
 						if tmp["blocktag"] != nil {
-							field.BlockTag = tmp["blocktag"].(map[string]bool)
+							btag := make(map[string]string)
+							for k := range tmp["blocktag"].(map[string]bool){
+								btag[k] = TagConfigDesc[k]
+							}
+							field.BlockTag = btag
 						}
 						j.Result[k] = append(j.Result[k], field)
 					}
@@ -643,13 +651,17 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
 		//块抽取
 		if in.Field != "" {
 			if extfrom == "title" {
-				extinfo := extRegCoreToResult(extfrom, qu.ObjToString(doc[extfrom]), &map[string]bool{"title": true}, j, in)
+				extinfo := extRegCoreToResult(extfrom, qu.ObjToString(doc[extfrom]), &map[string]string{}, j, in)
 				if len(extinfo) > 0 {
 					AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 				}
 			} else {
 				for _, v := range j.Block {
-					extinfo := extRegCoreToResult(extfrom, v.Text, &v.Classify, j, in)
+					btag := make(map[string]string)
+					for k:=range v.Classify{
+						btag[k] = TagConfigDesc[k]
+					}
+					extinfo := extRegCoreToResult(extfrom, v.Text, &btag, j, in)
 					if len(extinfo) > 0 {
 						AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 					}
@@ -840,7 +852,7 @@ func getKvByLuaFields(extfrom string, j *ju.Job, in *RegLuaInfo, t map[string][]
 }
 
 //正则提取结果
-func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v *RegLuaInfo) map[string][]map[string]interface{} {
+func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job, v *RegLuaInfo) map[string][]map[string]interface{} {
 	defer qu.Catch()
 	extinfo := map[string][]map[string]interface{}{}
 	if v.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
@@ -879,7 +891,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
 						}
 						exfield := ju.ExtField{BlockTag: *tag, Field: k, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: sourcevalue, Value: val}
 						if tmp["blocktag"] != nil {
-							exfield.BlockTag = tmp["blocktag"].(map[string]bool)
+							exfield.BlockTag = tmp["blocktag"].(map[string]string)
 						}
 						j.Result[k] = append(j.Result[k], &exfield)
 						//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
@@ -912,7 +924,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
 							extinfo[v.Field] = tmps
 							exfield := ju.ExtField{BlockTag: *tag, Field: v.Field, Code: v.Code + "去除__*后", RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text, Value: value}
 							if tmp["blocktag"] != nil {
-								exfield.BlockTag = tmp["blocktag"].(map[string]bool)
+								exfield.BlockTag = tmp["blocktag"].(map[string]string)
 							}
 							j.Result[v.Field] = append(j.Result[v.Field], &exfield)
 							//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
@@ -951,7 +963,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
 			}
 			field := &ju.ExtField{BlockTag: *tag, Field: v.Field, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text, Value: val}
 			if tmp["blocktag"] != nil {
-				field.BlockTag = tmp["blocktag"].(map[string]bool)
+				field.BlockTag = tmp["blocktag"].(map[string]string)
 			}
 			j.Result[v.Field] = append(j.Result[v.Field], field)
 		}
@@ -975,7 +987,7 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 				for _, tmp := range tmps {
 					field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]), ExtFrom: qu.ObjToString(tmp["extfrom"]), Value: tmp["value"], Score: 0}
 					if tmp["blocktag"] != nil {
-						field.BlockTag = tmp["blocktag"].(map[string]bool)
+						field.BlockTag = tmp["blocktag"].(map[string]string)
 					}
 					j.Result[k] = append(j.Result[k], field)
 					//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})

+ 2 - 0
src/jy/extract/score.go

@@ -14,9 +14,11 @@ import (
 
 var SoreConfig map[string]map[string]interface{}
 var TagConfig map[string]map[string]float64
+var TagConfigDesc map[string]string
 var TitleScore float64
 
 func init() {
+	qu.ReadConfig("./res/tagscoredesc.json", &TagConfigDesc)
 	qu.ReadConfig("./res/tagscore.json", &TagConfig)
 	qu.ReadConfig("./res/fieldscore.json", &SoreConfig)
 	TitleScore = qu.Float64All(SoreConfig["extractype"]["title"])

+ 1 - 1
src/jy/util/article.go

@@ -38,7 +38,7 @@ type Job struct {
 }
 
 type ExtField struct {
-	BlockTag    map[string]bool //块标签
+	BlockTag    map[string]string //块标签
 	Field       string          //属性
 	Code        string          //匹配标签(字符串、正则)、正则或lua代码
 	RuleText    string          //内容

+ 2 - 2
src/res/fieldscore.json

@@ -197,8 +197,8 @@
             },
             {
                 "describe": "包含负分",
-                "regstr": "(勘察|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|[,,。、::“”‘’\"])",
-                "score": -2
+                "regstr": "(勘察|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天|[,,。、::“”‘’_\"])",
+                "score": -1
             },
             {
                 "describe": "标段编号匹配-2",

+ 27 - 0
src/res/tagscoredesc.json

@@ -0,0 +1,27 @@
+{
+  "bidcondition": "招标条件",
+  "projectoverview": "项目概况/采购需求",
+  "bidder_requirement": "投标人资格要求",
+  "examineway": "资格审查方式",
+  "biddingsignup": "投标报名",
+  "biddingfile_obtain": "招标文件的获取",
+  "bidfile_submit": "投标文件的递交",
+  "purchasepolicy": "采购项目需要落实的政府采购政策",
+  "noticemedia": "公告媒体",
+  "superviseway": "监督方式",
+  "contactway": "联系方式",
+  "bidbond": "投标保证金",
+  "bidder_inforeg": "投标人信息注册",
+  "bid_note": "投标注意事项",
+  "projectinfo": "项目信息",
+  "buyerinfo": "采购单位信息",
+  "bidagencyinfo": "招标代理机构信息",
+  "winner": "中标供应商",
+  "dealinfo": "成交信息",
+  "servicecharge": "采购代理服务费收取",
+  "bidevaluat_result": "评标结果公示",
+  "bidevaluat_committee": "评标委员会",
+  "offerdetail": "报价明细",
+  "contractamount": "合同金额",
+  "payway": "付款方式"
+}