score_jsondata.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. package extract
  2. import (
  3. "fmt"
  4. "jy/clear"
  5. "jy/util"
  6. "log"
  7. util2 "qfw/util"
  8. "regexp"
  9. "strings"
  10. "unicode/utf8"
  11. )
  12. func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
  13. if len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0 {
  14. return j.Result
  15. }
  16. jdextweight := util2.IntAll((*j.Jsondata)["extweight"])
  17. tmps := make(map[string][]*util.ExtField)
  18. for _, v := range util.JsonData {
  19. tmp := make([]*util.ExtField, 0)
  20. //jsondata没有值跳过
  21. if j.Jsondata == nil || (*j.Jsondata)[v] == nil || (*j.Jsondata)[v] == "" {
  22. continue
  23. }
  24. //jsondata有值,res没有值,取jsondata值
  25. if j.Result[v] == nil || len(j.Result[v]) == 0 {
  26. if v == "budget" || v == "bidamount" {
  27. lockclear.Lock()
  28. cfn := e.ClearFn[v]
  29. lockclear.Unlock()
  30. newNum := clear.DoClearFn(cfn, []interface{}{util2.Float64All((*j.Jsondata)[v]), ""})
  31. if util2.IntAll(newNum[0]) != 0 {
  32. extFields := make([]*util.ExtField, 0)
  33. extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
  34. j.Result[v] = extFields
  35. //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  36. //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  37. }
  38. continue
  39. }
  40. vv := strings.TrimSpace(util2.ObjToString((*j.Jsondata)[v]))
  41. if vv == ""|| strings.Contains(vv,"详见公告"){
  42. continue
  43. }
  44. lockscore.Lock()
  45. scoreRule := SoreConfig[v]
  46. lockscore.Unlock()
  47. tmpExtField := &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: vv, Score: 0.1}
  48. //1.长度打分
  49. valueLen := utf8.RuneCountInString(fmt.Sprint(tmpExtField.Value))
  50. if valueLen < 1 {
  51. tmpExtField.Score = -5
  52. continue
  53. }
  54. if valueLen > 100 {
  55. tmpExtField.Score = -99
  56. }
  57. if lengths, ok := scoreRule["length"].([]interface{}); ok {
  58. for _, tmp := range lengths {
  59. if length, ok := tmp.(map[string]interface{}); ok {
  60. if ranges, ok := length["range"].([]interface{}); ok {
  61. gt := util2.IntAll(ranges[0])
  62. lte := util2.IntAll(ranges[1])
  63. if lte < 0 { //∞
  64. lte = 999999
  65. }
  66. score := util2.Float64All(ranges[2])
  67. if valueLen > gt && valueLen <= lte {
  68. tmpExtField.Score += score
  69. break
  70. }
  71. }
  72. }
  73. }
  74. }
  75. //2.负面词打分
  76. if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
  77. for _, position := range positions {
  78. if p, ok := position.(map[string]interface{}); ok {
  79. util2.Try(func() {
  80. if p["regexp"] != nil {
  81. reg := p["regexp"].(*regexp.Regexp)
  82. if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
  83. tmpExtField.Score += util2.Float64All(p["score"])
  84. }
  85. }
  86. }, func(err interface{}) {
  87. log.Println(err)
  88. })
  89. }
  90. }
  91. }
  92. //3.正面词打分
  93. if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
  94. for _, position := range positions {
  95. if p, ok := position.(map[string]interface{}); ok {
  96. util2.Try(func() {
  97. if p["regexp"] != nil {
  98. reg := p["regexp"].(*regexp.Regexp)
  99. if reg.MatchString(util2.ObjToString(tmpExtField.Value)) {
  100. tmpExtField.Score += util2.Float64All(p["score"])
  101. }
  102. }
  103. }, func(err interface{}) {
  104. log.Println(err)
  105. })
  106. }
  107. }
  108. }
  109. if tmpExtField.Score > 0{
  110. extFields := make([]*util.ExtField, 0)
  111. extFields = append(extFields,tmpExtField )
  112. j.Result[v] = extFields
  113. }
  114. //AddExtLog("extract", j.SourceMid, nil, (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  115. //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  116. continue
  117. } else {
  118. if jdextweight == 0 {
  119. continue
  120. }
  121. if v == "budget" || v == "bidamount" {
  122. lockclear.Lock()
  123. cfn := e.ClearFn[v]
  124. lockclear.Unlock()
  125. newNum := clear.DoClearFn(cfn, []interface{}{util2.Float64All((*j.Jsondata)[v]), ""})
  126. if util2.IntAll(newNum[0]) != 0 {
  127. extFields := make([]*util.ExtField, 0)
  128. extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
  129. j.Result[v] = extFields
  130. //AddExtLog("extract", j.SourceMid, nil, newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  131. //AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  132. }
  133. continue
  134. }
  135. if strings.Trim(util2.ObjToString(j.Result[v][0].Value), " ") != strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " ") {
  136. tmp = append(tmp, j.Result[v][0])
  137. oneScore := j.Result[v][0].Score
  138. if jdextweight == 2 {
  139. oneScore += 2
  140. }
  141. tmp = append(tmp, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: strings.Trim(util2.ObjToString((*j.Jsondata)[v]), " "), Score: oneScore})
  142. //AddExtLog("extract", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  143. //AddExtLog("clear", j.SourceMid, j.Result[v], (*j.Jsondata)[v], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
  144. tmps[v] = tmp
  145. }
  146. }
  147. }
  148. if jdextweight < 2 {
  149. for k, v := range tmps {
  150. lockscore.Lock()
  151. scoreRule := SoreConfig[k]
  152. lockscore.Unlock()
  153. if k == "projectname" || k == "projectcode" || k == "buyer" || k == "winner" || k == "agency" || k == "buyerperson" || k == "buyertel" {
  154. for i, tmpsvalue := range v {
  155. //1.长度打分
  156. valueLen := utf8.RuneCountInString(fmt.Sprint(tmpsvalue.Value))
  157. if valueLen < 1 {
  158. v[i].Score = -5
  159. continue
  160. }
  161. if valueLen > 100 {
  162. v[i].Score = -99
  163. }
  164. if lengths, ok := scoreRule["length"].([]interface{}); ok {
  165. for _, tmp := range lengths {
  166. if length, ok := tmp.(map[string]interface{}); ok {
  167. if ranges, ok := length["range"].([]interface{}); ok {
  168. gt := util2.IntAll(ranges[0])
  169. lte := util2.IntAll(ranges[1])
  170. if lte < 0 { //∞
  171. lte = 999999
  172. }
  173. score := util2.Float64All(ranges[2])
  174. if valueLen > gt && valueLen <= lte {
  175. v[i].Score += score
  176. v[i].ScoreItem = append(v[i].ScoreItem, &util.ScoreItem{Des: "JsonData长度打分", Code: fmt.Sprint(gt, "<", valueLen, "<=", lte), ScoreFrom: "fieldscore.json.length", Value: tmpsvalue.Value, Score: score})
  177. break
  178. }
  179. }
  180. }
  181. }
  182. }
  183. //2.负面词打分
  184. if positions, ok := scoreRule["negativewords"].([]interface{}); ok {
  185. for _, position := range positions {
  186. if p, ok := position.(map[string]interface{}); ok {
  187. util2.Try(func() {
  188. if p["regexp"] != nil {
  189. reg := p["regexp"].(*regexp.Regexp)
  190. if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) {
  191. v[i].Score += util2.Float64All(p["score"])
  192. v[i].ScoreItem = append(v[i].ScoreItem, &util.ScoreItem{Des: "JsonData负面词打分" + fmt.Sprint(p["describe"]), Code: "negativewords", RuleText: reg.String(), ScoreFrom: "fieldscore.json.negativewords", Value: tmpsvalue.Value, Score: util2.Float64All(p["score"])})
  193. }
  194. }
  195. }, func(err interface{}) {
  196. log.Println(err)
  197. })
  198. }
  199. }
  200. }
  201. //3.正面词打分
  202. if positions, ok := scoreRule["positivewords"].([]interface{}); ok {
  203. for _, position := range positions {
  204. if p, ok := position.(map[string]interface{}); ok {
  205. util2.Try(func() {
  206. if p["regexp"] != nil {
  207. reg := p["regexp"].(*regexp.Regexp)
  208. if reg.MatchString(util2.ObjToString(tmpsvalue.Value)) {
  209. v[i].Score += util2.Float64All(p["score"])
  210. v[i].ScoreItem = append(v[i].ScoreItem, &util.ScoreItem{Des: "Jsondata正面词打分" + fmt.Sprint(p["describe"]), Code: "positivewords", RuleText: reg.String(), ScoreFrom: "fieldscore.json.positivewords", Value: tmpsvalue.Value, Score: util2.Float64All(p["score"])})
  211. }
  212. }
  213. }, func(err interface{}) {
  214. log.Println(err)
  215. })
  216. }
  217. }
  218. }
  219. }
  220. }
  221. }
  222. }
  223. for k, v := range tmps { //新打分的结果集放入到result中,v为数组只有2个值
  224. if v[0].Score == v[1].Score { //分数相等优先取打分的值
  225. if v[0].ExtFrom == "JsonData_"+k + "_" + fmt.Sprint(jdextweight){
  226. j.Result[k] = append(j.Result[k], v[1])
  227. } else {
  228. j.Result[k] = append(j.Result[k], v[0])
  229. }
  230. continue
  231. }
  232. j.Result[k] = append(j.Result[k], v...) //分数不相等就放入result排序
  233. }
  234. //结果排序
  235. for _, val := range j.Result {
  236. util.Sort(val)
  237. }
  238. return j.Result
  239. }