123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- package extract
- import (
- "fmt"
- "jy/clear"
- "jy/util"
- "log"
- util2 "qfw/util"
- "regexp"
- "strings"
- "unicode/utf8"
- )
- func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
- if len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0 {
- return j.Result
- }
- jdextweight := util2.IntAll((*j.Jsondata)["extweight"])
- tmps := make(map[string][]*util.ExtField)
- for _, v := range util.JsonData {
- tmp := make([]*util.ExtField, 0)
- //jsondata没有值跳过
- if j.Jsondata == nil || (*j.Jsondata)[v] == nil || (*j.Jsondata)[v] == "" {
- continue
- }
- //jsondata有值,res没有值,取jsondata值
- if j.Result[v] == nil || len(j.Result[v]) == 0 {
- if v == "budget" || v == "bidamount" {
- lockclear.Lock()
- cfn := e.ClearFn[v]
- lockclear.Unlock()
- newNum := clear.DoClearFn(cfn, []interface{}{util2.Float64All((*j.Jsondata)[v]), ""})
- if util2.IntAll(newNum[0]) != 0 {
- extFields := make([]*util.ExtField, 0)
- extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
- j.Result[v] = extFields
- //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- }
- continue
- }
- vv := strings.TrimSpace(util2.ObjToString((*j.Jsondata)[v]))
- if vv == ""|| strings.Contains(vv,"详见公告"){
- continue
- }
- lockscore.Lock()
- scoreRule := SoreConfig[v]
- lockscore.Unlock()
- tmpExtField := &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: vv, Score: 0.1}
- //1.长度打分
- valueLen := utf8.RuneCountInString(fmt.Sprint(tmpExtField.Value))
- if valueLen < 1 {
- tmpExtField.Score = -5
- continue
- }
- if valueLen > 100 {
- tmpExtField.Score = -99
- }
- if lengths, ok := scoreRule["length"].([]interface{}); ok {
- for _, tmp := range lengths {
- if length, ok := tmp.(map[string]interface{}); ok {
- if ranges, ok := length["range"].([]interface{}); ok {
- gt := util2.IntAll(ranges[0])
- lte := util2.IntAll(ranges[1])
- if lte < 0 { //∞
- lte = 999999
- }
- score := util2.Float64All(ranges[2])
- if valueLen > gt && valueLen <= lte {
- tmpExtField.Score += score
- break
- }
- }
- }
- }
- }
- //2.负面词打分
- if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
- for _, position := range positions {
- if p, ok := position.(map[string]interface{}); ok {
- util2.Try(func() {
- if p["regexp"] != nil {
- reg := p["regexp"].(*regexp.Regexp)
- if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
- tmpExtField.Score += util2.Float64All(p["score"])
- }
- }
- }, func(err interface{}) {
- log.Println(err)
- })
- }
- }
- }
- //3.正面词打分
- if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
- for _, position := range positions {
- if p, ok := position.(map[string]interface{}); ok {
- util2.Try(func() {
- if p["regexp"] != nil {
- reg := p["regexp"].(*regexp.Regexp)
- if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
- tmpExtField.Score += util2.Float64All(p["score"])
- }
- }
- }, func(err interface{}) {
- log.Println(err)
- })
- }
- }
- }
- if tmpExtField.Score > 0{
- extFields := make([]*util.ExtField, 0)
- extFields = append(extFields,tmpExtField )
- j.Result[v] = extFields
- }
- //AddExtLog("extract", j.SourceMid, nil, (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- continue
- } else {
- if jdextweight == 0 {
- continue
- }
- if v == "budget" || v == "bidamount" {
- lockclear.Lock()
- cfn := e.ClearFn[v]
- lockclear.Unlock()
- newNum := clear.DoClearFn(cfn, []interface{}{util2.Float64All((*j.Jsondata)[v]), ""})
- if util2.IntAll(newNum[0]) != 0 {
- extFields := make([]*util.ExtField, 0)
- extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
- j.Result[v] = extFields
- //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- }
- continue
- }
- if strings.Trim(util2.ObjToString(j.Result[v][0].Value), " ") != strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " ") {
- tmp = append(tmp, j.Result[v][0])
- oneScore := j.Result[v][0].Score
- if jdextweight == 2 {
- oneScore += 2
- }
- tmp = append(tmp, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: oneScore})
- //AddExtLog("extract", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- //AddExtLog("clear", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
- tmps[v] = tmp
- }
- }
- }
- if jdextweight < 2 {
- for k, v := range tmps {
- lockscore.Lock()
- scoreRule := SoreConfig[k]
- lockscore.Unlock()
- if k == "projectname" || k == "projectcode" || k == "buyer" || k == "winner" || k == "agency" || k == "buyerperson" || k == "buyertel" {
- for i, tmpsvalue := range v {
- //1.长度打分
- valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value))
- if valueLen < 1 {
- v[i].Score = -5
- continue
- }
- if valueLen > 100 {
- v[i].Score = -99
- }
- if lengths, ok := scoreRule["length"].([]interface{}); ok {
- for _, tmp := range lengths {
- if length, ok := tmp.(map[string]interface{}); ok {
- if ranges, ok := length["range"].([]interface{}); ok {
- gt := util2.IntAll(ranges[0])
- lte := util2.IntAll(ranges[1])
- if lte < 0 { //∞
- lte = 999999
- }
- score := util2.Float64All(ranges[2])
- if valueLen > gt && valueLen <= lte {
- v[i].Score += score
- v[i].ScoreItem = append(v[i].ScoreItem, &util.ScoreItem{Des: "JsonData长度打分", Code: fmt.Sprint(gt, "<", valueLen, "<=", lte), ScoreFrom: "fieldscore.json.length", Value: tmpsvalue.Value, Score: score})
- break
- }
- }
- }
- }
- }
- //2.负面词打分
- if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
- for _, position := range positions {
- if p, ok := position.(map[string]interface{}); ok {
- util2.Try(func() {
- if p["regexp"] != nil {
- reg := p["regexp"].(*regexp.Regexp)
- if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) {
- v[i].Score += util2.Float64All(p["score"])
- v[i].ScoreItem = append(v[i].ScoreItem, &util.ScoreItem{Des: "JsonData负面词打分" + fmt.Sprint(p["describe"]), Code: "negativewords", RuleText: reg.String(), ScoreFrom: "fieldscore.json.negativewords", Value: tmpsvalue.Value, Score: util2.Float64All(p["score"])})
- }
- }
- }, func(err interface{}) {
- log.Println(err)
- })
- }
- }
- }
- //3.正面词打分
- if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
- for _, position := range positions {
- if p, ok := position.(map[string]interface{}); ok {
- util2.Try(func() {
- if p["regexp"] != nil {
- reg := p["regexp"].(*regexp.Regexp)
- if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) {
- v[i].Score += util2.Float64All(p["score"])
- v[i].ScoreItem = append(v[i].ScoreItem, &util.ScoreItem{Des: "Jsondata正面词打分" + fmt.Sprint(p["describe"]), Code: "positivewords", RuleText: reg.String(), ScoreFrom: "fieldscore.json.positivewords", Value: tmpsvalue.Value, Score: util2.Float64All(p["score"])})
- }
- }
- }, func(err interface{}) {
- log.Println(err)
- })
- }
- }
- }
- }
- }
- }
- }
- for k, v := range tmps { //新打分的结果集放入到result中,v为数组只有2个值
- if v[0].Score == v[1].Score { //分数相等优先取打分的值
- if v[0].ExtFrom == "JsonData_"+k + "_" + fmt.Sprint(jdextweight){
- j.Result[k] = append(j.Result[k], v[1])
- } else {
- j.Result[k] = append(j.Result[k], v[0])
- }
- continue
- }
- j.Result[k] = append(j.Result[k], v...) //分数不相等就放入result排序
- }
- //结果排序
- for _, val := range j.Result {
- util.Sort(val)
- }
- return j.Result
- }
|