|
@@ -175,7 +175,7 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
|
|
} else if in.Field == "enterprise_qualification" || in.Field == "personnel_qualification" || in.Field == "performance_qualification" || in.Field == "enterprise_credit" {
|
|
} else if in.Field == "enterprise_qualification" || in.Field == "personnel_qualification" || in.Field == "performance_qualification" || in.Field == "enterprise_credit" {
|
|
hasResult := false
|
|
hasResult := false
|
|
//1.通过文中的资质要求抽取四个资质
|
|
//1.通过文中的资质要求抽取四个资质
|
|
- qualifications := ju.GetQualifications(pretreated.HtmlToText(qu.ObjToString(doc[extfrom])))
|
|
|
|
|
|
+ qualifications := ju.GetQualifications(j.ContentClean)
|
|
if qualifications != "" {
|
|
if qualifications != "" {
|
|
extinfo := extRegCoreToResult(extfrom, qualifications, &map[string]string{}, j, in, isSite, "")
|
|
extinfo := extRegCoreToResult(extfrom, qualifications, &map[string]string{}, j, in, isSite, "")
|
|
if len(extinfo) > 0 {
|
|
if len(extinfo) > 0 {
|
|
@@ -185,13 +185,7 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
|
|
}
|
|
}
|
|
//2.通过抽取资质要求段落匹配不到时,匹配全文或者附件内容,
|
|
//2.通过抽取资质要求段落匹配不到时,匹配全文或者附件内容,
|
|
if !hasResult {
|
|
if !hasResult {
|
|
- content := ""
|
|
|
|
- if j.IsFile {
|
|
|
|
- content = j.Content
|
|
|
|
- } else {
|
|
|
|
- content = doc["detail"].(string)
|
|
|
|
- }
|
|
|
|
- extinfo := extRegCoreToResult(extfrom, content, &map[string]string{}, j, in, isSite, "")
|
|
|
|
|
|
+ extinfo := extRegCoreToResult(extfrom, j.ContentClean, &map[string]string{}, j, in, isSite, "")
|
|
if len(extinfo) > 0 {
|
|
if len(extinfo) > 0 {
|
|
AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
|
|
}
|
|
}
|