score_jsondata.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. package extract
  2. import (
  3. "fmt"
  4. "jy/clear"
  5. "jy/util"
  6. "log"
  7. util2 "qfw/util"
  8. "regexp"
  9. "strings"
  10. "unicode/utf8"
  11. )
  12. func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
  13. if len((j.Result)) <= 0 {
  14. return j.Result
  15. }
  16. tmps := make(map[string][]*util.ExtField)
  17. for _, v := range util.JsonData {
  18. tmp := make([]*util.ExtField, 0)
  19. //jsondata没有值跳过
  20. if (*j.Jsondata)[v] == nil || (*j.Jsondata)[v] == "" {
  21. continue
  22. }
  23. //jsondata有值,res没有值,取jsondata值
  24. if j.Result[v] == nil {
  25. if v == "budget" || v == "bidamount" {
  26. lockclear.Lock()
  27. cfn := e.ClearFn[v]
  28. lockclear.Unlock()
  29. newNum := clear.DoClearFn(cfn, []interface{}{fmt.Sprint((*j.Jsondata)[v]), ""})
  30. if util2.IntAll(newNum[0]) != 0 {
  31. extFields := make([]*util.ExtField, 0)
  32. extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
  33. j.Result[v] = extFields
  34. //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  35. //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  36. }
  37. continue
  38. }
  39. extFields := make([]*util.ExtField, 0)
  40. extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: 0.1})
  41. j.Result[v] = extFields
  42. //AddExtLog("extract", j.SourceMid, nil, (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  43. //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  44. continue
  45. } else {
  46. if v == "budget" || v == "bidamount" {
  47. lockclear.Lock()
  48. cfn := e.ClearFn[v]
  49. lockclear.Unlock()
  50. newNum := clear.DoClearFn(cfn, []interface{}{fmt.Sprint((*j.Jsondata)[v]), ""})
  51. if util2.IntAll(newNum[0]) != 0 {
  52. extFields := make([]*util.ExtField, 0)
  53. extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
  54. j.Result[v] = extFields
  55. //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  56. //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  57. }
  58. continue
  59. }
  60. if strings.Trim(util2.ObjToString(j.Result[v][0].Value), " ") != strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " ") {
  61. tmp = append(tmp, j.Result[v][0])
  62. tmp = append(tmp, &util.ExtField{Code: "JsonData_" + v, Field: v, ExtFrom: "JsonData_" + v, SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: j.Result[v][0].Score})
  63. //AddExtLog("extract", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  64. //AddExtLog("clear", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  65. tmps[v] = tmp
  66. }
  67. }
  68. }
  69. for k, v := range tmps {
  70. lockscore.Lock()
  71. scoreRule := SoreConfig[k]
  72. lockscore.Unlock()
  73. if k == "projectname" || k == "buyer" || k == "projectcode" || k == "agency" {
  74. for i, tmpsvalue := range v {
  75. //1.长度打分
  76. valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value))
  77. if valueLen < 1 {
  78. continue
  79. }
  80. if valueLen > 100 {
  81. v[i].Score = -99
  82. }
  83. if lengths, ok := scoreRule["length"].([]interface{}); ok {
  84. for _, tmp := range lengths {
  85. if length, ok := tmp.(map[string]interface{}); ok {
  86. if ranges, ok := length["range"].([]interface{}); ok {
  87. gt := util2.IntAll(ranges[0])
  88. lte := util2.IntAll(ranges[1])
  89. if lte < 0 { //∞
  90. lte = 999999
  91. }
  92. score := util2.Float64All(ranges[2])
  93. if valueLen > gt && valueLen <= lte {
  94. v[i].Score += score
  95. break
  96. }
  97. }
  98. }
  99. }
  100. }
  101. //2.负面词打分
  102. if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
  103. for _, position := range positions {
  104. if p, ok := position.(map[string]interface{}); ok {
  105. util2.Try(func() {
  106. if p["regexp"] != nil {
  107. reg := p["regexp"].(*regexp.Regexp)
  108. if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) {
  109. v[i].Score += util2.Float64All(p["score"])
  110. }
  111. }
  112. }, func(err interface{}) {
  113. log.Println(err)
  114. })
  115. }
  116. }
  117. }
  118. //3.正面词打分
  119. if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
  120. for _, position := range positions {
  121. if p, ok := position.(map[string]interface{}); ok {
  122. util2.Try(func() {
  123. if p["regexp"] != nil {
  124. reg := p["regexp"].(*regexp.Regexp)
  125. if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) {
  126. v[i].Score += util2.Float64All(p["score"])
  127. }
  128. }
  129. }, func(err interface{}) {
  130. log.Println(err)
  131. })
  132. }
  133. }
  134. }
  135. }
  136. }
  137. }
  138. for k, v := range tmps { //新打分的结果集放入到result中,v为数组只有2个值
  139. if v[0].Score == v[1].Score {//分数相等优先取打分的值
  140. if v[0].ExtFrom == "JsonData_"+k {
  141. j.Result[k] = append(j.Result[k], v[1])
  142. } else {
  143. j.Result[k] = append(j.Result[k], v[0])
  144. }
  145. continue
  146. }
  147. j.Result[k] = append(j.Result[k], v...)//分数不相等就放入result排序
  148. }
  149. //结果排序
  150. for _, val := range j.Result {
  151. util.Sort(val)
  152. }
  153. return j.Result
  154. }