|
@@ -11,9 +11,48 @@ import (
|
|
|
"unicode/utf8"
|
|
|
)
|
|
|
|
|
|
+//清理html
|
|
|
+var htmlclrear = regexp.MustCompile("</?[^>]+>")
|
|
|
+
|
|
|
+//清理括号结尾
|
|
|
+var endOfParenthesesClrear = regexp.MustCompile(`[\(\[(【\{]+[\u4e00-\u9fa5\s]*[\))\]】\}]+$`)
|
|
|
+
|
|
|
+//清理标点符合结尾
|
|
|
+var endOfPunctuationClrear = regexp.MustCompile("[,,.。??;;]+$")
|
|
|
+
|
|
|
+//清理关键字
|
|
|
+var keysClrear = regexp.MustCompile("(详见|公告|X|内文|某单位|某部|文件|\\*|暂无|?|\\?)")
|
|
|
+
|
|
|
+//jsondata清理
|
|
|
+func clearJd(jd *map[string]interface{}) {
|
|
|
+ for k, v := range *jd {
|
|
|
+ if k == "buyer" || k == "winner" || k == "agency" || k == "projectcode" || k == "projectname" {
|
|
|
+ vstring := util2.ObjToString(v)
|
|
|
+ if vstring == "" {
|
|
|
+ delete(*jd, k)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ vstring = htmlclrear.ReplaceAllString(vstring, "")
|
|
|
+ vstring = endOfParenthesesClrear.ReplaceAllString(vstring, "")
|
|
|
+ vstring = endOfPunctuationClrear.ReplaceAllString(vstring, "")
|
|
|
+ vstring = keysClrear.ReplaceAllString(vstring, "")
|
|
|
+ if utf8.RuneCountInString(vstring) < 5 {
|
|
|
+ delete(*jd, k)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if utf8.RuneCountInString(vstring) > 35 {
|
|
|
+ delete(*jd, k)
|
|
|
+ continue
|
|
|
+ } else {
|
|
|
+ (*jd)[k] = vstring
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
|
|
|
jdextweight := util2.IntAll((*j.Jsondata)["extweight"])
|
|
|
- if jdextweight==0{
|
|
|
+ if jdextweight == 0 {
|
|
|
return j.Result
|
|
|
}
|
|
|
tmps := make(map[string][]*util.ExtField)
|