package extract import ( "fmt" "jy/clear" "jy/util" "log" util2 "qfw/util" "regexp" "strings" "unicode/utf8" ) func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField { if len((j.Result)) <= 0 { return j.Result } tmps := make(map[string][]*util.ExtField) for _, v := range util.JsonData { tmp := make([]*util.ExtField, 0) //jsondata没有值跳过 if (*j.Jsondata)[v] == nil || (*j.Jsondata)[v] == "" { continue } //jsondata有值,res没有值,取jsondata值 if j.Result[v] == nil { if v == "budget" || v == "bidamount" { lockclear.Lock() cfn := e.ClearFn[v] lockclear.Unlock() newNum := clear.DoClearFn(cfn, []interface{}{fmt.Sprint((*j.Jsondata)[v]), ""}) if util2.IntAll(newNum[0]) != 0 { extFields := make([]*util.ExtField, 0) extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1}) j.Result[v] = extFields //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 } continue } extFields := make([]*util.ExtField, 0) extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: 0.1}) j.Result[v] = extFields //AddExtLog("extract", j.SourceMid, nil, (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 continue } else { if v == "budget" || v == "bidamount" { lockclear.Lock() cfn := e.ClearFn[v] lockclear.Unlock() newNum := clear.DoClearFn(cfn, []interface{}{fmt.Sprint((*j.Jsondata)[v]), ""}) if util2.IntAll(newNum[0]) != 0 { extFields := make([]*util.ExtField, 0) extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1}) j.Result[v] = extFields //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 } continue } if strings.Trim(util2.ObjToString(j.Result[v][0].Value), " ") != strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " ") { tmp = append(tmp, j.Result[v][0]) tmp = append(tmp, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: j.Result[v][0].Score}) //AddExtLog("extract", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 //AddExtLog("clear", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志 tmps[v] = tmp } } } for k, v := range tmps { lockscore.Lock() scoreRule := SoreConfig[k] lockscore.Unlock() if k == "projectname" || k == "buyer" || k == "projectcode" || k == "agency" { for i, tmpsvalue := range v { //1.长度打分 valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value)) if valueLen < 1 { continue } if valueLen > 100 { v[i].Score = -99 } if lengths, ok := scoreRule["length"].([]interface{}); ok { for _, tmp := range lengths { if length, ok := tmp.(map[string]interface{}); ok { if ranges, ok := length["range"].([]interface{}); ok { gt := util2.IntAll(ranges[0]) lte := util2.IntAll(ranges[1]) if lte < 0 { //∞ lte = 999999 } score := util2.Float64All(ranges[2]) if valueLen > gt && valueLen <= lte { v[i].Score += score break } } } } } //2.负面词打分 if positions, ok := scoreRule["negativewords"].([]interface{}); ok { for _, position := range positions { if p, ok := position.(map[string]interface{}); ok { util2.Try(func() { if p["regexp"] != nil { reg := p["regexp"].(*regexp.Regexp) if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) { v[i].Score += util2.Float64All(p["score"]) } } }, func(err interface{}) { log.Println(err) }) } } } //3.正面词打分 if positions, ok := scoreRule["positivewords"].([]interface{}); ok { for _, position := range positions { if p, ok := position.(map[string]interface{}); ok { util2.Try(func() { if p["regexp"] != nil { reg := p["regexp"].(*regexp.Regexp) if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) { v[i].Score += util2.Float64All(p["score"]) } } }, func(err interface{}) { log.Println(err) }) } } } } } } for k, v := range tmps { //新打分的结果集放入到result中,v为数组只有2个值 if v[0].Score == v[1].Score {//分数相等优先取打分的值 if v[0].ExtFrom == "JsonData_"+k { j.Result[k] = append(j.Result[k], v[1]) } else { j.Result[k] = append(j.Result[k], v[0]) } continue } j.Result[k] = append(j.Result[k], v...)//分数不相等就放入result排序 } //结果排序 for _, val := range j.Result { util.Sort(val) } return j.Result }