score.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. package main
  2. import (
  3. "math"
  4. "strings"
  5. "time"
  6. "github.com/adrg/strutil"
  7. "github.com/adrg/strutil/metrics"
  8. )
  9. type InputData struct {
  10. ProjectCode string
  11. Area string
  12. City string
  13. ProjectName string
  14. ConstructionUnit string
  15. AreaCode string
  16. PlannedInvestment float64
  17. BaseDate time.Time
  18. }
  19. type EsDocument struct {
  20. Id string `json:"id"`
  21. Title string `json:"title"`
  22. Projectname string `json:"projectname"`
  23. ProjectCode string `json:"projectcode"`
  24. Toptype string `json:"toptype"`
  25. Subtype string `json:"subtype"`
  26. Area string `json:"area"`
  27. City string `json:"city"`
  28. Buyer string `json:"buyer"`
  29. Budget float64 `json:"budget"`
  30. Bidamount float64 `json:"bidamount"`
  31. Winner string `json:"winner"`
  32. Detail string `json:"detail"`
  33. Publishtime int64 `json:"publishtime"`
  34. Agency string `json:"agency"`
  35. SWinner string `json:"s_winner"`
  36. WinnerTel string `json:"winnertel"`
  37. BuyerTel string `json:"buyertel"`
  38. BuyerPerson string `json:"buyerperson"`
  39. Score float64 `json:"score"`
  40. }
  41. // calculateConfidenceScore calculates a score from 0-100 based on multiple factors.
  42. func calculateConfidenceScore22(target InputData, candidate EsDocument) float64 {
  43. var totalScore float64 = 0.0
  44. nameSimilarity := EnhancedSimilarity(strings.ReplaceAll(target.ProjectName, target.ConstructionUnit, ""), strings.ReplaceAll(strings.ReplaceAll(candidate.Projectname, candidate.Buyer, ""), target.ConstructionUnit, ""))
  45. // 1. Project Name Similarity (Weight: 40 points)
  46. totalScore += nameSimilarity * 70.0
  47. //log.Println(target.ProjectName, candidate.Projectname, nameSimilarity)
  48. // 2. Construction Unit Similarity (Weight: 30 points)
  49. unitSimilarity := EnhancedSimilarity(target.ConstructionUnit, candidate.Buyer)
  50. totalScore += unitSimilarity * 10.0
  51. //log.Println(target.ConstructionUnit, candidate.Buyer, unitSimilarity)
  52. if target.Area != "" && target.Area == candidate.Area {
  53. totalScore += 5
  54. if target.City != "" && target.City == candidate.City {
  55. totalScore += 5
  56. }
  57. }
  58. // 3. Project Code Presence
  59. if target.ProjectCode != "" && strings.Contains(candidate.Detail, target.ProjectCode) {
  60. totalScore += 5.0
  61. }
  62. // 4. 投资额
  63. if target.PlannedInvestment > 0 && candidate.Budget > 0 {
  64. diff := math.Abs(target.PlannedInvestment - candidate.Budget)
  65. percentDiff := diff / math.Max(target.PlannedInvestment, 1) // Avoid division by zero
  66. // Simple linear decay: 100% score for 0% diff, 0% score for 100% diff or more
  67. investmentScore := math.Max(0, (1.0-percentDiff)*5.0)
  68. totalScore += investmentScore
  69. }
  70. //计算时间
  71. if target.BaseDate.Year() > 2000 && candidate.Publishtime > 1600000000 {
  72. totalScore += TimeProximity(target.BaseDate, time.Unix(candidate.Publishtime, 0)) * 5
  73. }
  74. return totalScore
  75. }
  76. func calculateConfidenceScore(target InputData, candidate EsDocument) float64 {
  77. var totalScore float64 = 0.0
  78. nameSimilarity := EnhancedSimilarity(strings.ReplaceAll(target.ProjectName, target.ConstructionUnit, ""), strings.ReplaceAll(strings.ReplaceAll(candidate.Projectname, candidate.Buyer, ""), target.ConstructionUnit, ""))
  79. // 1. Project Name Similarity (Weight: 40 points)
  80. totalScore += nameSimilarity * 80.0
  81. //log.Println(target.ProjectName, candidate.Projectname, nameSimilarity)
  82. // 2. Construction Unit Similarity (Weight: 30 points)
  83. unitSimilarity := EnhancedSimilarity(target.ConstructionUnit, candidate.Buyer)
  84. totalScore += unitSimilarity * 10.0
  85. //log.Println(target.ConstructionUnit, candidate.Buyer, unitSimilarity)
  86. if target.Area != "" && target.Area == candidate.Area {
  87. totalScore += 5
  88. if target.City != "" && target.City == candidate.City {
  89. totalScore += 5
  90. }
  91. }
  92. return totalScore
  93. }
  94. // 计算时间
  95. func TimeProximity(t1, t2 time.Time) float64 {
  96. // 解析时间
  97. // 计算天数差值(取绝对值)
  98. days := int(t1.Sub(t2).Abs().Hours() / 24)
  99. // 根据天数差值返回不同的相似度
  100. switch {
  101. case days < 100:
  102. return 1.0
  103. case days < 200:
  104. return 0.7
  105. case days < 365:
  106. return 0.55
  107. case days < 700:
  108. return 0.3
  109. default:
  110. return 0.15
  111. }
  112. }
  113. func EnhancedSimilarity(str1, str2 string) float64 {
  114. // 标准化:去除空格和标点(根据中文需求调整)
  115. s1 := strings.TrimSpace(str1)
  116. s2 := strings.TrimSpace(str2)
  117. // 1. 检查完全相等
  118. if s1 == "" || s2 == "" {
  119. return 0
  120. } else if s1 == s2 {
  121. return 1.0
  122. }
  123. // 2. 检查包含关系(权重50%)
  124. if containmentScore := checkContainment(s1, s2); containmentScore > 0 {
  125. return containmentScore
  126. }
  127. // 3. 使用改进的Jaro-Winkler(考虑中文分词)
  128. jw := metrics.NewJaroWinkler()
  129. baseScore := strutil.Similarity(s1, s2, jw)
  130. // 4. 添加公共子串权重
  131. substringScore := checkLongestCommonSubstring(s1, s2)
  132. // 5. 组合得分
  133. finalScore := 0.5*baseScore + 0.5*substringScore
  134. return finalScore
  135. }
  136. // 检查包含关系,返回0-1的得分
  137. func checkContainment(s1, s2 string) float64 {
  138. // 确保s1是较短的字符串
  139. if len(s1) > len(s2) {
  140. s1, s2 = s2, s1
  141. }
  142. // 完全包含
  143. if strings.Contains(s2, s1) {
  144. // 被包含部分占长字符串的比例
  145. ratio := float64(len(s1)) / float64(len(s2))
  146. // 基础分0.7 + 比例调整(最多加到0.95)
  147. return 0.9 + ratio*0.1
  148. }
  149. return 0
  150. }
  151. // 检查最长公共子串
  152. func checkLongestCommonSubstring(s1, s2 string) float64 {
  153. m := len(s1)
  154. n := len(s2)
  155. // 动态规划表
  156. dp := make([][]int, m+1)
  157. for i := range dp {
  158. dp[i] = make([]int, n+1)
  159. }
  160. maxLen := 0
  161. for i := 1; i <= m; i++ {
  162. for j := 1; j <= n; j++ {
  163. if s1[i-1] == s2[j-1] {
  164. dp[i][j] = dp[i-1][j-1] + 1
  165. if dp[i][j] > maxLen {
  166. maxLen = dp[i][j]
  167. }
  168. }
  169. }
  170. }
  171. // 计算得分:最长公共子串占两个字符串平均长度的比例
  172. avgLen := (m + n) / 2
  173. if avgLen == 0 {
  174. return 0
  175. }
  176. return float64(maxLen) / float64(avgLen)
  177. }