소스 검색

jsondata清理

fengweiqiang 5 년 전
부모
커밋
0f79913c0f
2개의 변경된 파일42개의 추가작업 그리고 1개의 파일을 삭제
  1. 2 0
      src/jy/extract/extract.go
  2. 40 1
      src/jy/extract/score_jsondata.go

+ 2 - 0
src/jy/extract/extract.go

@@ -2091,6 +2091,8 @@ func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[strin
 		marshalbt, _ := json.Marshal(j.Jsondata)
 		tmpjddata := make(map[string]interface{})
 		json.Unmarshal(marshalbt,&tmpjddata)
+		//jsondata清理
+		clearJd(j.Jsondata)
 		for _, jdkey := range ju.JsonData {
 			if (*j.Jsondata)[jdkey] != nil && (*j.Jsondata)[jdkey] != "" && len(j.Result[jdkey]) >= 5 {
 				for tmpk, tmpv := range j.Result[jdkey][:5] {

+ 40 - 1
src/jy/extract/score_jsondata.go

@@ -11,9 +11,48 @@ import (
 	"unicode/utf8"
 )
 
+//清理html
+var htmlclrear = regexp.MustCompile("</?[^>]+>")
+
+//清理括号结尾
+var endOfParenthesesClrear = regexp.MustCompile(`[\(\[(【\{]+[\u4e00-\u9fa5\s]*[\))\]】\}]+$`)
+
+//清理标点符合结尾
+var endOfPunctuationClrear = regexp.MustCompile("[,,.。??;;]+$")
+
+//清理关键字
+var keysClrear = regexp.MustCompile("(详见|公告|X|内文|某单位|某部|文件|\\*|暂无|?|\\?)")
+
+//jsondata清理
+func clearJd(jd *map[string]interface{}) {
+	for k, v := range *jd {
+		if k == "buyer" || k == "winner" || k == "agency" || k == "projectcode" || k == "projectname" {
+			vstring := util2.ObjToString(v)
+			if vstring == "" {
+				delete(*jd, k)
+				continue
+			}
+			vstring = htmlclrear.ReplaceAllString(vstring, "")
+			vstring = endOfParenthesesClrear.ReplaceAllString(vstring, "")
+			vstring = endOfPunctuationClrear.ReplaceAllString(vstring, "")
+			vstring = keysClrear.ReplaceAllString(vstring, "")
+			if utf8.RuneCountInString(vstring) < 5 {
+				delete(*jd, k)
+				continue
+			}
+			if utf8.RuneCountInString(vstring) > 35 {
+				delete(*jd, k)
+				continue
+			} else {
+				(*jd)[k] = vstring
+			}
+		}
+	}
+}
+
 func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
 	jdextweight := util2.IntAll((*j.Jsondata)["extweight"])
-	if jdextweight==0{
+	if jdextweight == 0 {
 		return j.Result
 	}
 	tmps := make(map[string][]*util.ExtField)