|
@@ -39,10 +39,80 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
|
|
|
}
|
|
|
continue
|
|
|
}
|
|
|
-
|
|
|
- extFields := make([]*util.ExtField, 0)
|
|
|
- extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: 0.1})
|
|
|
- j.Result[v] = extFields
|
|
|
+ vv := strings.TrimSpace(util2.ObjToString((*j.Jsondata)[v]))
|
|
|
+ if vv == ""|| strings.Contains(vv,"详见公告"){
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ lockscore.Lock()
|
|
|
+ scoreRule := SoreConfig[v]
|
|
|
+ lockscore.Unlock()
|
|
|
+ tmpExtField := &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: vv, Score: 0.1}
|
|
|
+ //1.长度打分
|
|
|
+ valueLen := utf8.RuneCountInString(fmt.Sprint(tmpExtField.Value))
|
|
|
+ if valueLen < 1 {
|
|
|
+ tmpExtField.Score = -5
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if valueLen > 100 {
|
|
|
+ tmpExtField.Score = -99
|
|
|
+ }
|
|
|
+ if lengths, ok := scoreRule["length"].([]interface{}); ok {
|
|
|
+ for _, tmp := range lengths {
|
|
|
+ if length, ok := tmp.(map[string]interface{}); ok {
|
|
|
+ if ranges, ok := length["range"].([]interface{}); ok {
|
|
|
+ gt := util2.IntAll(ranges[0])
|
|
|
+ lte := util2.IntAll(ranges[1])
|
|
|
+ if lte < 0 { //∞
|
|
|
+ lte = 999999
|
|
|
+ }
|
|
|
+ score := util2.Float64All(ranges[2])
|
|
|
+ if valueLen > gt && valueLen <= lte {
|
|
|
+ tmpExtField.Score += score
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //2.负面词打分
|
|
|
+ if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
|
|
|
+ for _, position := range positions {
|
|
|
+ if p, ok := position.(map[string]interface{}); ok {
|
|
|
+ util2.Try(func() {
|
|
|
+ if p["regexp"] != nil {
|
|
|
+ reg := p["regexp"].(*regexp.Regexp)
|
|
|
+ if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
|
|
|
+ tmpExtField.Score += util2.Float64All(p["score"])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Println(err)
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //3.正面词打分
|
|
|
+ if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
|
|
|
+ for _, position := range positions {
|
|
|
+ if p, ok := position.(map[string]interface{}); ok {
|
|
|
+ util2.Try(func() {
|
|
|
+ if p["regexp"] != nil {
|
|
|
+ reg := p["regexp"].(*regexp.Regexp)
|
|
|
+ if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
|
|
|
+ tmpExtField.Score += util2.Float64All(p["score"])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Println(err)
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if tmpExtField.Score > 0{
|
|
|
+ extFields := make([]*util.ExtField, 0)
|
|
|
+ extFields = append(extFields,tmpExtField )
|
|
|
+ j.Result[v] = extFields
|
|
|
+ }
|
|
|
//AddExtLog("extract", j.SourceMid, nil, (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
|
|
|
//AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
|
|
|
continue
|