|
@@ -108,17 +108,17 @@ var clearWinnerReg = regexp.MustCompile("(名称|施工|拟定供应商名称|[:
|
|
var unPackageWinnerReg = regexp.MustCompile("(重新招标)")
|
|
var unPackageWinnerReg = regexp.MustCompile("(重新招标)")
|
|
|
|
|
|
// 包含字母的实体单位
|
|
// 包含字母的实体单位
|
|
-var letter_entity = regexp.MustCompile("^[\u4E00-\u9FA5]{1,10}[A-Za-z]{1,5}[\u4E00-\u9FA5]{1,10}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])$")
|
|
|
|
|
|
+var letter_entity = regexp.MustCompile("^[\u4E00-\u9FA5]{1,10}[A-Za-z]{1,5}[\u4E00-\u9FA5]{1,10}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会体]|政府)$")
|
|
|
|
|
|
// 落款单位抽取
|
|
// 落款单位抽取
|
|
-var inscribe_entity_1 = regexp.MustCompile("\n([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会|体]))\n([\\s]+)?([0-9]+年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)")
|
|
|
|
-var inscribe_entity_2 = regexp.MustCompile("[\n。]([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会|体]))([\\s]+)?([0-9]+年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)\n([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))")
|
|
|
|
|
|
+var inscribe_entity_1 = regexp.MustCompile("\n([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府|段))[\\s ]*[\n]+([\\s ]+|发布时间[::\\s ]+)?([0-9]+[\\s ]*年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)")
|
|
|
|
+var inscribe_entity_2 = regexp.MustCompile("[\n。]([\\s]+)?([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府|段))[\\s ]*([\\s ]+|发布时间[::\\s ]+)?([0-9]+[\\s ]*年[0-9]+月[0-9]+日|[0-9]+[-][0-9]+[-][0-9]+)\n([\u4E00-\u9FA5].{4,20}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体]))")
|
|
|
|
|
|
// 特殊实体
|
|
// 特殊实体
|
|
-var inscribe_entity_3 = regexp.MustCompile("(招标组织部门|招标机构)[::]([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会|体]))")
|
|
|
|
|
|
+var inscribe_entity_3 = regexp.MustCompile("(招标组织部门|招标机构)[::]([\u4E00-\u9FA5].{2,25}(公司|集团|单位|委员会|办公室|车务段|机构|企业|厂|场|院|所|店|中心|市|校|学|局|站|城|处|行|部|队|联合[会体]|政府))")
|
|
|
|
|
|
// 有效企业
|
|
// 有效企业
|
|
-var effectivefirm = regexp.MustCompile("^[\u4E00-\u9FA5]{4,15}(公司|集团|委员会|机构|企业|设计|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会|体])$")
|
|
|
|
|
|
+var effectivefirm = regexp.MustCompile("^[\u4E00-\u9FA5]{4,15}(公司|集团|委员会|办公室|车务段|机构|企业|设计|厂|场|院|所|店|中心|市|局|站|城|处|行|部|队|联合[会体]|政府)$")
|
|
|
|
|
|
// 发布时间识别
|
|
// 发布时间识别
|
|
var inscribe_publishtime_1 = regexp.MustCompile("(\\d{4}[年-]\\d{1,2}[月-]\\d{1,2}[日-]*)")
|
|
var inscribe_publishtime_1 = regexp.MustCompile("(\\d{4}[年-]\\d{1,2}[月-]\\d{1,2}[日-]*)")
|
|
@@ -253,6 +253,25 @@ func isUsedMultiPackage(pkg map[string]map[string]interface{}) bool {
|
|
return false
|
|
return false
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+// 判断-附件分包是否无效判定(不通用)
|
|
|
|
+func isExistsPackage(pkg map[string]map[string]interface{}) bool {
|
|
|
|
+ if pkg == nil || len(pkg) == 0 {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ if len(pkg) == 1 {
|
|
|
|
+ for _, v := range pkg {
|
|
|
|
+ winner := qu.ObjToString(v["winner"])
|
|
|
|
+ budget := qu.Float64All(v["budget"])
|
|
|
|
+ bidamout := qu.Float64All(v["bidamount"])
|
|
|
|
+ if winner != "" || budget > float64(0) || bidamout > float64(0) {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ return true
|
|
|
|
+}
|
|
|
|
+
|
|
// getQualifications 添加所有资质新字段
|
|
// getQualifications 添加所有资质新字段
|
|
func (e *ExtractTask) getQualifications(tmp *map[string]interface{}, j_data map[string]interface{}) {
|
|
func (e *ExtractTask) getQualifications(tmp *map[string]interface{}, j_data map[string]interface{}) {
|
|
/**
|
|
/**
|
|
@@ -274,7 +293,7 @@ func (e *ExtractTask) inscribeRecognize(tmp *map[string]interface{}, j_data map[
|
|
!(qu.ObjToString((*tmp)["toptype"]) == "拟建" && qu.ObjToString((*tmp)["subtype"]) == "拟建") {
|
|
!(qu.ObjToString((*tmp)["toptype"]) == "拟建" && qu.ObjToString((*tmp)["subtype"]) == "拟建") {
|
|
if new_buyer := InscribeEntity(qu.ObjToString(j_data["detail"]), *tmp); new_buyer != "" {
|
|
if new_buyer := InscribeEntity(qu.ObjToString(j_data["detail"]), *tmp); new_buyer != "" {
|
|
(*tmp)["buyer"] = new_buyer
|
|
(*tmp)["buyer"] = new_buyer
|
|
- (*tmp)["inscribe_buyer"] = "落款实体"
|
|
|
|
|
|
+ (*tmp)["inscribe_buyer"] = "落款结构实体"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//落款特殊实体
|
|
//落款特殊实体
|
|
@@ -286,13 +305,13 @@ func (e *ExtractTask) inscribeRecognize(tmp *map[string]interface{}, j_data map[
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//实体服务识别
|
|
//实体服务识别
|
|
- //if qu.ObjToString((*tmp)["buyer"]) == "" && ju.Inscribe &&
|
|
|
|
- // !(qu.ObjToString((*tmp)["toptype"]) == "拟建" && qu.ObjToString((*tmp)["subtype"]) == "拟建") {
|
|
|
|
- // if new_buyer := InscribeEntityDfa(qu.ObjToString(j_data["detail"]), jf_text, *tmp); new_buyer != "" {
|
|
|
|
- // (*tmp)["buyer"] = new_buyer
|
|
|
|
- // (*tmp)["inscribe_buyer"] = "实体识别"
|
|
|
|
- // }
|
|
|
|
- //}
|
|
|
|
|
|
+ if qu.ObjToString((*tmp)["buyer"]) == "" && ju.Inscribe &&
|
|
|
|
+ !(qu.ObjToString((*tmp)["toptype"]) == "拟建" && qu.ObjToString((*tmp)["subtype"]) == "拟建") {
|
|
|
|
+ if new_buyer := InscribeEntityDfa(qu.ObjToString(j_data["detail"]), jf_text, *tmp); new_buyer != "" {
|
|
|
|
+ (*tmp)["buyer"] = new_buyer
|
|
|
|
+ (*tmp)["inscribe_buyer"] = "实体识别服务"
|
|
|
|
+ }
|
|
|
|
+ }
|
|
//拟建不能存buyer
|
|
//拟建不能存buyer
|
|
if qu.ObjToString((*tmp)["toptype"]) == "拟建" &&
|
|
if qu.ObjToString((*tmp)["toptype"]) == "拟建" &&
|
|
qu.ObjToString((*tmp)["subtype"]) == "拟建" {
|
|
qu.ObjToString((*tmp)["subtype"]) == "拟建" {
|
|
@@ -349,6 +368,11 @@ func InscribeEntityDfa(detail string, jf_detail string, tmp map[string]interface
|
|
title := qu.ObjToString(tmp["title"])
|
|
title := qu.ObjToString(tmp["title"])
|
|
winner := qu.ObjToString(tmp["winner"])
|
|
winner := qu.ObjToString(tmp["winner"])
|
|
agency := qu.ObjToString(tmp["agency"])
|
|
agency := qu.ObjToString(tmp["agency"])
|
|
|
|
+ toptype := qu.ObjToString(tmp["toptype"])
|
|
|
|
+ //采用-标题项目名称
|
|
|
|
+ if new_str = EmployEntDfaText(title+"\n"+projectname, winner, agency); new_str != "" {
|
|
|
|
+ return new_str
|
|
|
|
+ }
|
|
if !entdfa_filtration.MatchString(title) {
|
|
if !entdfa_filtration.MatchString(title) {
|
|
//采用-排除表格的文本识别
|
|
//采用-排除表格的文本识别
|
|
new_detail := pretreated.TextAfterRemoveTable(detail)
|
|
new_detail := pretreated.TextAfterRemoveTable(detail)
|
|
@@ -359,22 +383,18 @@ func InscribeEntityDfa(detail string, jf_detail string, tmp map[string]interface
|
|
if new_str = EmployEntDfaText(new_detail, winner, agency); new_str != "" {
|
|
if new_str = EmployEntDfaText(new_detail, winner, agency); new_str != "" {
|
|
return new_str
|
|
return new_str
|
|
}
|
|
}
|
|
-
|
|
|
|
- //采用-去除标签的纯文本(含表格)
|
|
|
|
- new_detail = pretreated.HtmlToText(detail)
|
|
|
|
- new_detail = entdfa_clean.ReplaceAllString(new_detail, "\n")
|
|
|
|
- if len(new_detail) > 500 {
|
|
|
|
- new_detail = new_detail[len(new_detail)-500:]
|
|
|
|
- }
|
|
|
|
- if new_str = EmployEntDfaText(new_detail, winner, agency); new_str != "" {
|
|
|
|
- return new_str
|
|
|
|
|
|
+ if toptype != "结果" {
|
|
|
|
+ //采用-去除标签的纯文本(含表格)
|
|
|
|
+ new_detail = pretreated.HtmlToText(detail)
|
|
|
|
+ new_detail = entdfa_clean.ReplaceAllString(new_detail, "\n")
|
|
|
|
+ if len(new_detail) > 500 {
|
|
|
|
+ new_detail = new_detail[len(new_detail)-500:]
|
|
|
|
+ }
|
|
|
|
+ if new_str = EmployEntDfaText(new_detail, winner, agency); new_str != "" {
|
|
|
|
+ return new_str
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- //采用-标题项目名称
|
|
|
|
- if new_str = EmployEntDfaText(title+"\n"+projectname, winner, agency); new_str != "" {
|
|
|
|
- return new_str
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
//采用-附件识别
|
|
//采用-附件识别
|
|
if !entdfa_filtration.MatchString(title) {
|
|
if !entdfa_filtration.MatchString(title) {
|
|
if len(jf_detail) > 500 {
|
|
if len(jf_detail) > 500 {
|