1 年間前 · db186e72bc
--- a/src/jy/admin/rulecheck.go
+++ b/src/jy/admin/rulecheck.go
@@ -313,6 +313,35 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 
				 					}
			
 
				 				}
			
 
				 			}
			
 
				+		} else {
			
 
				+			//添加 "<<"符号，返回所有匹配结果
			
 
				+			regs := strings.Split(ptmp[0], "<<")
			
 
				+			if len(regs) == 2 {
			
 
				+				parses := regs[1] //定义的排除关键词，含有这些关键词，则视为作废
			
 
				+				pattern := regs[0]
			
 
				+				reg := regexp.MustCompile(pattern)
			
 
				+				results := reg.FindAllString(content, -1)
			
 
				+				if len(results) > 0 {
			
 
				+					results = ju.RemoveDuplicates(results)
			
 
				+					result := ""
			
 
				+					//有排除词时需要过滤
			
 
				+					if len(parses) > 0 {
			
 
				+						var res []string
			
 
				+						for _, v := range results {
			
 
				+							reg2 := regexp.MustCompile(parses)
			
 
				+							res2 := reg2.FindAllString(v, -1)
			
 
				+							if len(res2) == 0 {
			
 
				+								res = append(res, v)
			
 
				+							}
			
 
				+						}
			
 
				+						result = strings.Join(res, "\n")
			
 
				+					} else {
			
 
				+						result = strings.Join(results, "\n")
			
 
				+					}
			
 
				+
			
 
				+					rep[field] = result
			
 
				+				}
			
 
				+			}
			
 
				 		}
			
 
				 	}, func(err interface{}) {
			
 
				 		rep["err"] = fmt.Sprint(err)
			
--- a/src/jy/extract/extractcheck.go
+++ b/src/jy/extract/extractcheck.go
@@ -256,5 +256,25 @@ func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	//企业资质检验,不含有资质时删除
			
 
				+	if enterprise_qualification, ok := tmp["enterprise_qualification"]; ok {
			
 
				+		special := `(甲级|乙级|丙级|丁级|一级|二级|三级|叁级|壹级|贰级|四级|五级|工程设计|市政公用工程|铁路工程|建筑工程|公路工程|人防工程|工程勘察|岩土工程|水文地质勘察|工程测量|工程钻探|电力工程|大地测量|消防设施工程|特种工程|房屋建筑工程|信息技术服务|信息系统安全|机电工程|建筑机电安装工程|消防设施工程|水利水电工程|水利工程|地基基础工程)`
			
 
				+		reg := regexp.MustCompile(special)
			
 
				+		var res = make([]string, 0)
			
 
				+		datas := strings.Split(qu.ObjToString(enterprise_qualification), "\n")
			
 
				+		for _, data := range datas {
			
 
				+			results := reg.FindAllString(data, -1)
			
 
				+			if len(results) > 0 {
			
 
				+				res = append(res, data)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if len(res) == 0 {
			
 
				+			delete(tmp, "enterprise_qualification")
			
 
				+		} else {
			
 
				+			tmp["enterprise_qualification"] = strings.Join(res, "\n")
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	return tmp
			
 
				 }
			
--- a/src/jy/extract/extractrule.go
+++ b/src/jy/extract/extractrule.go
@@ -172,6 +172,30 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
 
				 			if len(extinfo) > 0 {
			
 
				 				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				 			}
			
 
				+		} else if in.Field == "enterprise_qualification" || in.Field == "personnel_qualification" || in.Field == "performance_qualification" || in.Field == "enterprise_credit" {
			
 
				+			hasResult := false
			
 
				+			//1.通过文中的资质要求抽取四个资质
			
 
				+			qualifications := ju.GetQualifications(pretreated.HtmlToText(qu.ObjToString(doc[extfrom])))
			
 
				+			if qualifications != "" {
			
 
				+				extinfo := extRegCoreToResult(extfrom, qualifications, &map[string]string{}, j, in, isSite, "")
			
 
				+				if len(extinfo) > 0 {
			
 
				+					hasResult = true
			
 
				+					AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				+				}
			
 
				+			}
			
 
				+			//2.通过抽取资质要求段落匹配不到时，匹配全文或者附件内容，
			
 
				+			if !hasResult {
			
 
				+				content := ""
			
 
				+				if j.IsFile {
			
 
				+					content = j.Content
			
 
				+				} else {
			
 
				+					content = doc["detail"].(string)
			
 
				+				}
			
 
				+				extinfo := extRegCoreToResult(extfrom, content, &map[string]string{}, j, in, isSite, "")
			
 
				+				if len(extinfo) > 0 {
			
 
				+					AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				+				}
			
 
				+			}
			
 
				 		} else {
			
 
				 			for _, v := range j.Block {
			
 
				 				btag := make(map[string]string)
			
@@ -651,14 +675,43 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job,
 
				 			}
			
 
				 		}
			
 
				 	} else {
			
 
				-		pos := vre.RegCore.Reg.FindStringIndex(text)
			
 
				 		val := ""
			
 
				-		if len(pos) == 2 {
			
 
				-			text = text[pos[1]:]
			
 
				-			rs := regexp.MustCompile("[^\r\n\t]+")
			
 
				-			tmp := rs.FindAllString(text, -1)
			
 
				-			if len(tmp) > 0 {
			
 
				-				val = tmp[0]
			
 
				+		//正则表达式含有"<<",表示需要所有匹配数据
			
 
				+		regs := strings.Split(vre.RuleText, "<<")
			
 
				+		if len(regs) == 2 {
			
 
				+			pattern := regs[0]
			
 
				+			parses := regs[1] //定义的排除关键词，含有这些关键词，则视为作废
			
 
				+			reg := regexp.MustCompile(pattern)
			
 
				+			results := reg.FindAllString(text, -1)
			
 
				+			if len(results) > 0 {
			
 
				+				results = ju.RemoveDuplicates(results)
			
 
				+				result := ""
			
 
				+				//有排除词时需要过滤
			
 
				+				if len(parses) > 0 {
			
 
				+					var res []string
			
 
				+					for _, v := range results {
			
 
				+						reg2 := regexp.MustCompile(parses)
			
 
				+						res2 := reg2.FindAllString(v, -1)
			
 
				+						if len(res2) == 0 {
			
 
				+							res = append(res, v)
			
 
				+						}
			
 
				+					}
			
 
				+					result = strings.Join(res, "\n")
			
 
				+				} else {
			
 
				+					result = strings.Join(results, "\n")
			
 
				+				}
			
 
				+
			
 
				+				val = result
			
 
				+			}
			
 
				+		} else {
			
 
				+			pos := vre.RegCore.Reg.FindStringIndex(text)
			
 
				+			if len(pos) == 2 {
			
 
				+				text = text[pos[1]:]
			
 
				+				rs := regexp.MustCompile("[^\r\n\t]+")
			
 
				+				tmp := rs.FindAllString(text, -1)
			
 
				+				if len(tmp) > 0 {
			
 
				+					val = tmp[0]
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 		if val != "" {
			
--- a/src/jy/extract/extractsave.go
+++ b/src/jy/extract/extractsave.go
@@ -399,6 +399,8 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 
				 		e.inscribeRecognize(&tmp, *j.Data)
			
 
				 		//落款识别指定特殊采购单位
			
 
				 		e.AimAtRecognizeBuyer(&tmp, *j.Data)
			
 
				+		//根据正文获取资质要求
			
 
				+		e.getQualifications(&tmp, *j.Data)
			
 
				 		//城市抽取
			
 
				 		if e.IsExtractCity {
			
 
				 			//e.NewExtractCity(j, &tmp) //旧版
			
--- a/src/jy/extract/extraxtmethod.go
+++ b/src/jy/extract/extraxtmethod.go
@@ -216,7 +216,21 @@ func isUsedMultiPackage(pkg map[string]map[string]interface{}) bool {
 
				 	return false
			
 
				 }
			
 
				 
			
 
				-// 落款识别~采购单位
			
 
				+//getQualifications 添加所有资质新字段
			
 
				+func (e *ExtractTask) getQualifications(tmp *map[string]interface{}, j_data map[string]interface{}) {
			
 
				+	/**
			
 
				+	qualifications 资质要求
			
 
				+	*/
			
 
				+	detail := qu.ObjToString(j_data["detail"])
			
 
				+	new_detail := pretreated.HtmlToText(detail)
			
 
				+
			
 
				+	qualifications := ju.GetQualifications(new_detail)
			
 
				+	if qualifications != "" {
			
 
				+		(*tmp)["qualifications"] = qualifications
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//落款识别~采购单位
			
 
				 func (e *ExtractTask) inscribeRecognize(tmp *map[string]interface{}, j_data map[string]interface{}) {
			
 
				 	//落款实体
			
 
				 	if qu.ObjToString((*tmp)["buyer"]) == "" && ju.Inscribe &&
			
--- a/src/jy/pretreated/analycore.go
+++ b/src/jy/pretreated/analycore.go
@@ -62,7 +62,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}, isSite bool,
 
				 	if len(res) > 0 {
			
 
				 		for _, t1 := range res {
			
 
				 			//降低冒号值的权重-不适合日期格式的数据
			
 
				-			if MhSpilt.MatchString(v1) && !(UnTimeSpiltKey.MatchString(t1.Value) && UnTimeSpiltValue.MatchString(v1)) {
			
 
				+			if MhSpilt.MatchString(v1) && !(UnTimeSpiltKey.MatchString(t1.Value) && UnTimeSpiltValue.MatchString(v1)) && t1.Value != "企业信用" {
			
 
				 				t1.Weight -= 50
			
 
				 			}
			
 
				 			if winnerOrderAndBidResult.MatchString(tabletag) && t1.Value == "采购单位联系人" { //处理table中项目负责人
			
--- a/src/jy/util/util.go
+++ b/src/jy/util/util.go
@@ -8,6 +8,7 @@ import (
 
				 	qu "qfw/util"
			
 
				 	"regexp"
			
 
				 	"strconv"
			
 
				+	"strings"
			
 
				 
			
 
				 	. "gopkg.in/mgo.v2/bson"
			
 
				 )
			
@@ -274,3 +275,86 @@ func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 
				 	}
			
 
				 	return p_list
			
 
				 }
			
 
				+
			
 
				+//GetQualifications 从正文中获取气质要求
			
 
				+func GetQualifications(text string) (qualifications string) {
			
 
				+	re1 := regexp.MustCompile(`(\n(\s)*(\d)[、.．](\s*)[\p{Han}]+[:：]?)`)
			
 
				+	re2 := regexp.MustCompile(`(?m)^(★)?(一|二|三|四|五|六|七|八|九|十|十一)[、.](.+?)[\p{Han}]?[:：]?$`)
			
 
				+	match := regexp.MustCompile(`(投标人|参选单位|投标商|供应商|申请人|参选人|应答人|承包人|应答方|报名人|报价人|投标单位|服务商|竞价方).*(资质|资格|要求|准入条件|条件).*`)
			
 
				+	matchWords := []string{"合格的投标人", "资格要求", "入围审核条价", "报名要求"}
			
 
				+
			
 
				+	var sections = make([]map[string]interface{}, 0)
			
 
				+	sections = ExtractSections(text, re2)
			
 
				+	if len(sections) > 0 {
			
 
				+
			
 
				+	} else {
			
 
				+		sections = ExtractSections(text, re1)
			
 
				+	}
			
 
				+
			
 
				+	for _, section := range sections {
			
 
				+		matches := match.FindStringSubmatch(section["title"].(string))
			
 
				+		if len(matches) > 1 {
			
 
				+			return section["content"].(string)
			
 
				+		} else {
			
 
				+			for _, mword := range matchWords {
			
 
				+				if strings.Contains(section["title"].(string), mword) {
			
 
				+					return section["content"].(string)
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// ExtractSections 函数提取文本中的标题和内容，并返回一个包含map的切片
			
 
				+func ExtractSections(text string, re *regexp.Regexp) []map[string]interface{} {
			
 
				+	// 使用正则表达式在文本中找到所有章节标题的起始索引位置
			
 
				+	sectionIndexes := re.FindAllStringIndex(text, -1)
			
 
				+	// 将文本的结束索引位置也添加为一个哨兵，方便处理最后一个章节
			
 
				+	sectionIndexes = append(sectionIndexes, []int{len(text), len(text)})
			
 
				+	// 初始化一个map切片，用于保存提取的各个章节内容
			
 
				+	sections := make([]map[string]interface{}, len(sectionIndexes)-1)
			
 
				+	// 遍历各个章节标题的索引位置，将文本切分为不同的章节
			
 
				+	for i := range sections {
			
 
				+		// 当前章节的起始和结束索引位置
			
 
				+		start := sectionIndexes[i][1]
			
 
				+		end := sectionIndexes[i+1][0]
			
 
				+		// 提取当前章节的标题和文本内容，并去除前后的空白字符
			
 
				+		title := strings.TrimSpace(text[sectionIndexes[i][0]:start])
			
 
				+		content := strings.TrimSpace(text[start:end])
			
 
				+		// 将标题和内容添加到map中
			
 
				+		sections[i] = map[string]interface{}{
			
 
				+			"title":   title,
			
 
				+			"content": content,
			
 
				+		}
			
 
				+	}
			
 
				+	// 返回提取的各个章节内容组成的map切片
			
 
				+	return sections
			
 
				+}
			
 
				+
			
 
				+//ContainSpecialWord 判断一个字符串是否包含特殊关键词
			
 
				+func ContainSpecialWord(key string, words []string) bool {
			
 
				+	//含有排除关键词，直接跳过
			
 
				+	for _, word := range words {
			
 
				+		if strings.Contains(key, word) {
			
 
				+			return true
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+//RemoveDuplicates 移除重复字符串
			
 
				+func RemoveDuplicates(input []string) []string {
			
 
				+	seen := make(map[string]bool)
			
 
				+	output := []string{}
			
 
				+
			
 
				+	for _, str := range input {
			
 
				+		if !seen[str] {
			
 
				+			seen[str] = true
			
 
				+			output = append(output, str)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return output
			
 
				+}