|
@@ -236,6 +236,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
|
|
|
} else {
|
|
|
detail = d2
|
|
|
}
|
|
|
+ detail = regexp.MustCompile(`<!--[\w\W]*?-->`).ReplaceAllString(detail, "")
|
|
|
d3, _ := doc["summary"].(string)
|
|
|
detail = ju.CutLableStr(d3 + "\n" + detail)
|
|
|
detail = cut.ClearHtml(d3 + "\n" + detail)
|
|
@@ -366,7 +367,6 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
}
|
|
|
}
|
|
|
lockrule.Unlock()
|
|
|
-
|
|
|
//抽取规则
|
|
|
for _, vc1 := range tmprules {
|
|
|
for _, vc := range vc1 {
|
|
@@ -392,17 +392,19 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
// log.Debug("抽取-后置规则", tmp)
|
|
|
|
|
|
//项目名称未能抽取到,标题来凑
|
|
|
- if vc.Field == "projectname" && vc.ExtFrom == "title" {
|
|
|
- isextitle := true
|
|
|
- for _, v := range j.Result[vc.Field] {
|
|
|
- if len([]rune(qu.ObjToString(v.Value))) > 5 {
|
|
|
- isextitle = false
|
|
|
- break
|
|
|
+ if vc.Field == "projectname" {
|
|
|
+ if vc.ExtFrom == "title" {
|
|
|
+ isextitle := true
|
|
|
+ for _, v := range j.Result[vc.Field] {
|
|
|
+ if len([]rune(qu.ObjToString(v.Value))) > 5 {
|
|
|
+ isextitle = false
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if isextitle { //标题加入选举
|
|
|
+ field := &ju.ExtField{Field: vc.Field, Code: vc.Id + "_title", RuleText: "title", Type: "title", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title}
|
|
|
+ j.Result[vc.Field] = append(j.Result[vc.Field], field)
|
|
|
}
|
|
|
- }
|
|
|
- if isextitle { //标题加入选举
|
|
|
- field := &ju.ExtField{Field: vc.Field, Code: vc.Id + "_title", RuleText: "title", Type: "title", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title}
|
|
|
- j.Result[vc.Field] = append(j.Result[vc.Field], field)
|
|
|
}
|
|
|
for i := 0; i < 3; i++ {
|
|
|
for _, v := range vc.RuleBacks {
|
|
@@ -412,42 +414,10 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
//全局后置规则
|
|
|
for _, v := range e.RuleBacks {
|
|
|
ExtRegBack(j, v, e.TaskInfo)
|
|
|
}
|
|
|
- //候选人加入
|
|
|
- if len(j.Winnerorder) > 0 {
|
|
|
- //候选人中标金额
|
|
|
- if price := j.Winnerorder[0]["price"]; price != nil {
|
|
|
- bidamount := &ju.ExtField{
|
|
|
- Field: "bidamount",
|
|
|
- Code: "",
|
|
|
- RuleText: "",
|
|
|
- Type: "winnerorder",
|
|
|
- MatchType: "winnerorder",
|
|
|
- ExtFrom: "",
|
|
|
- Value: price,
|
|
|
- Score: 0,
|
|
|
- }
|
|
|
- j.Result["bidamount"] = []*ju.ExtField{bidamount}
|
|
|
- }
|
|
|
- //候选人中标单位
|
|
|
- if entname := j.Winnerorder[0]["entname"]; entname != nil {
|
|
|
- winner := &ju.ExtField{
|
|
|
- Field: "winner",
|
|
|
- Code: "",
|
|
|
- RuleText: "",
|
|
|
- Type: "winnerorder",
|
|
|
- MatchType: "winnerorder",
|
|
|
- ExtFrom: "",
|
|
|
- Value: entname,
|
|
|
- Score: 0,
|
|
|
- }
|
|
|
- j.Result["winner"] = []*ju.ExtField{winner}
|
|
|
- }
|
|
|
- }
|
|
|
//函数清理
|
|
|
for key, val := range j.Result {
|
|
|
for _, v := range val {
|
|
@@ -537,37 +507,6 @@ func (e *ExtractTask) ExtractFile(j *ju.Job) {
|
|
|
ExtRegBack(j, v, e.TaskInfo)
|
|
|
}
|
|
|
}
|
|
|
- //候选人加入
|
|
|
- if len(j.Winnerorder) > 0 {
|
|
|
- //候选人中标金额
|
|
|
- if price := j.Winnerorder[0]["price"]; price != nil {
|
|
|
- bidamount := &ju.ExtField{
|
|
|
- Field: "bidamount",
|
|
|
- Code: "",
|
|
|
- RuleText: "",
|
|
|
- Type: "winnerorder",
|
|
|
- MatchType: "winnerorder",
|
|
|
- ExtFrom: "",
|
|
|
- Value: price,
|
|
|
- Score: 0,
|
|
|
- }
|
|
|
- j.Result["bidamount"] = []*ju.ExtField{bidamount}
|
|
|
- }
|
|
|
- //候选人中标单位
|
|
|
- if entname := j.Winnerorder[0]["entname"]; entname != nil {
|
|
|
- winner := &ju.ExtField{
|
|
|
- Field: "winner",
|
|
|
- Code: "",
|
|
|
- RuleText: "",
|
|
|
- Type: "winnerorder",
|
|
|
- MatchType: "winnerorder",
|
|
|
- ExtFrom: "",
|
|
|
- Value: entname,
|
|
|
- Score: 0,
|
|
|
- }
|
|
|
- j.Result["winner"] = []*ju.ExtField{winner}
|
|
|
- }
|
|
|
- }
|
|
|
//函数清理
|
|
|
for key, val := range j.Result {
|
|
|
for _, v := range val {
|
|
@@ -628,14 +567,16 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
|
|
|
|
|
|
//抽取-规则
|
|
|
func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju.Job) {
|
|
|
+ //候选人加入
|
|
|
var kvMap map[string][]map[string]interface{}
|
|
|
+ extByReg := true
|
|
|
if vc.ExtFrom != "title" {
|
|
|
- kvMap = getKvByLuaFields(vc, j, e)
|
|
|
+ kvMap, extByReg = getKvByLuaFields(vc, j, e)
|
|
|
}
|
|
|
for _, v := range vc.RuleCores {
|
|
|
if v.IsLua {
|
|
|
ExtRuleCoreByKv(vc.ExtFrom, doc, j, v, kvMap)
|
|
|
- } else {
|
|
|
+ } else if extByReg {
|
|
|
ExtRuleCoreByReg(vc.ExtFrom, doc, j, v, e)
|
|
|
}
|
|
|
}
|
|
@@ -720,8 +661,65 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
|
|
|
}
|
|
|
|
|
|
//lua脚本根据属性设置提取kv值
|
|
|
-func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) map[string][]map[string]interface{} {
|
|
|
+func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]map[string]interface{}, bool) {
|
|
|
kvmap := map[string][]map[string]interface{}{}
|
|
|
+ if len(j.Winnerorder) > 0 {
|
|
|
+ if vc.Field == "bidamount" {
|
|
|
+ for _, v := range j.Winnerorder {
|
|
|
+ kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
+ "code": "winnerorder",
|
|
|
+ "field": vc.Field,
|
|
|
+ "ruletext": "中标候选人",
|
|
|
+ "extfrom": vc.ExtFrom,
|
|
|
+ "sourcevalue": "中标候选人",
|
|
|
+ "value": v["price"],
|
|
|
+ "type": "winnerorder",
|
|
|
+ "matchtype": "winnerorder",
|
|
|
+ })
|
|
|
+ }
|
|
|
+ //候选人中标金额
|
|
|
+ if price := j.Winnerorder[0]["price"]; price != nil {
|
|
|
+ kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
+ "code": "CL_中标候选人",
|
|
|
+ "field": vc.Field,
|
|
|
+ "ruletext": "中标候选人",
|
|
|
+ "extfrom": vc.ExtFrom,
|
|
|
+ "sourcevalue": "中标候选人",
|
|
|
+ "value": price,
|
|
|
+ "type": "winnerorder",
|
|
|
+ "matchtype": "winnerorder",
|
|
|
+ })
|
|
|
+ return kvmap, false
|
|
|
+ }
|
|
|
+ } else if vc.Field == "winner" {
|
|
|
+ for _, v := range j.Winnerorder {
|
|
|
+ kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
+ "code": "winnerorder",
|
|
|
+ "field": vc.Field,
|
|
|
+ "ruletext": "中标候选人",
|
|
|
+ "extfrom": vc.ExtFrom,
|
|
|
+ "sourcevalue": "中标候选人",
|
|
|
+ "value": v["entname"],
|
|
|
+ "type": "winnerorder",
|
|
|
+ "matchtype": "winnerorder",
|
|
|
+ })
|
|
|
+ }
|
|
|
+ //候选人中标单位
|
|
|
+ if entname := j.Winnerorder[0]["entname"]; entname != nil {
|
|
|
+ kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
+ "code": "CL_中标候选人",
|
|
|
+ "field": vc.Field,
|
|
|
+ "ruletext": "中标候选人",
|
|
|
+ "extfrom": vc.ExtFrom,
|
|
|
+ "sourcevalue": "中标候选人",
|
|
|
+ "value": entname,
|
|
|
+ "type": "winnerorder",
|
|
|
+ "matchtype": "winnerorder",
|
|
|
+ })
|
|
|
+ return kvmap, false
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
for fieldname, field := range vc.LFields {
|
|
|
if field != vc.Field {
|
|
|
continue
|
|
@@ -729,7 +727,7 @@ func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) map[string][]map
|
|
|
extractFromKv(field, fieldname, j.Block, vc, kvmap)
|
|
|
}
|
|
|
AddExtLog("extract", j.SourceMid, nil, kvmap, &RegLuaInfo{Field: vc.Field}, et.TaskInfo) //抽取日志
|
|
|
- return kvmap
|
|
|
+ return kvmap, true
|
|
|
}
|
|
|
|
|
|
func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kvmap map[string][]map[string]interface{}) {
|
|
@@ -1106,6 +1104,8 @@ type FieldValue struct {
|
|
|
//分析抽取结果并保存
|
|
|
func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
qu.Try(func() {
|
|
|
+ //重新取出清理过后的中标候选人
|
|
|
+ resetWinnerorder(j)
|
|
|
doc, result, _id := funcAnalysis(j, e.Tag)
|
|
|
if isSaveTag, _ := ju.Config["isSaveTag"].(bool); isSaveTag {
|
|
|
go otherNeedSave(j, result, e)
|
|
@@ -1517,3 +1517,34 @@ func (e *ExtractTask) StartMatch(field, text string) *pretreated.SortMap {
|
|
|
}
|
|
|
return SMap
|
|
|
}
|
|
|
+
|
|
|
+//中标候选人经过清理之后,重新取出赋值
|
|
|
+func resetWinnerorder(j *ju.Job) {
|
|
|
+ if len(j.Winnerorder) == 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ //中标单位
|
|
|
+ i := 0
|
|
|
+ winners := []*ju.ExtField{}
|
|
|
+ for _, v := range j.Result["winner"] {
|
|
|
+ if v.Code == "winnerorder" {
|
|
|
+ j.Winnerorder[i]["entname"] = v.Value
|
|
|
+ i++
|
|
|
+ } else {
|
|
|
+ winners = append(winners, v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ j.Result["winner"] = winners
|
|
|
+ //中标金额
|
|
|
+ i = 0
|
|
|
+ bidamounts := []*ju.ExtField{}
|
|
|
+ for _, v := range j.Result["bidamount"] {
|
|
|
+ if v.Code == "winnerorder" {
|
|
|
+ j.Winnerorder[i]["price"] = v.Value
|
|
|
+ i++
|
|
|
+ } else {
|
|
|
+ bidamounts = append(bidamounts, v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ j.Result["bidamount"] = bidamounts
|
|
|
+}
|