yusuan.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. package main
  2. import (
  3. "log"
  4. "math"
  5. "reflect"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "time"
  10. "app.yhyue.com/moapp/jybase/mongodb"
  11. "github.com/gogf/gf/v2/util/gconv"
  12. )
  13. var regString = `(\d+[.,]{0,}\d+)`
  14. var reg, _ = regexp.Compile(regString)
  15. // 从解析文档中提取全部项目信息
  16. func getProjectItemFromFilejx() {
  17. list, _ := db.Find(cf.YusuanInfo, `{"handle":{"$exists":1}}`, nil, nil, false, -1, -1)
  18. //list, _ := db.Find(cf.YusuanInfo, `{"file_path" : "2024年/内蒙古/内蒙古/政府/2024_准格尔旗人民法院_9.xlsx"}`, nil, nil, false, -1, -1)
  19. pronum := 0
  20. for k, data := range *list {
  21. tmp, _ := data["performance_result"].([]interface{})
  22. if len(tmp) > 0 {
  23. for _, v := range tmp {
  24. t, _ := v.(map[string]interface{})
  25. tmp3, _ := t["sheetdata"].([]interface{})
  26. for _, v := range tmp3 {
  27. item, _ := v.(map[string]interface{})
  28. budget, ok := item["budget"]
  29. if ok {
  30. ty := reflect.TypeOf(budget).Kind()
  31. switch ty {
  32. case reflect.Int:
  33. item["budget_f"] = float64(budget.(int))
  34. case reflect.Int32:
  35. item["budget_f"] = float64(budget.(int32))
  36. case reflect.String:
  37. budget_f := budget.(string)
  38. if strings.Contains(budget_f, ",") {
  39. budget_f = strings.ReplaceAll(budget_f, ",", "")
  40. }
  41. rs, _ := strconv.ParseFloat(budget_f, 64)
  42. item["budget_f"] = rs
  43. default:
  44. item["budget_f"] = budget
  45. }
  46. unit, _ := item["unit"].(string)
  47. if strings.Contains(unit, "万") {
  48. item["budget_f"] = math.Round(item["budget_f"].(float64) * 10000)
  49. }
  50. budget_f := item["budget_f"].(float64)
  51. if unit == "" && budget_f < 1000 {
  52. item["budget_f"] = math.Round(budget_f * 10000)
  53. }
  54. } else {
  55. total_amount, ok := item["total_amount"]
  56. if ok {
  57. ty := reflect.TypeOf(total_amount).Kind()
  58. switch ty {
  59. case reflect.Int:
  60. item["budget_f"] = float64(total_amount.(int))
  61. case reflect.Int32:
  62. item["budget_f"] = float64(total_amount.(int32))
  63. case reflect.String:
  64. budget_f := total_amount.(string)
  65. if strings.Contains(budget_f, ",") {
  66. budget_f = strings.ReplaceAll(budget_f, ",", "")
  67. }
  68. rs, _ := strconv.ParseFloat(budget_f, 64)
  69. item["budget_f"] = rs
  70. default:
  71. item["budget_f"] = total_amount
  72. }
  73. unit, _ := item["unit"].(string)
  74. if strings.Contains(unit, "万") {
  75. item["budget_f"] = math.Round(item["budget_f"].(float64) * 10000)
  76. }
  77. budget_f := item["budget_f"].(float64)
  78. if unit == "" && budget_f < 1000 {
  79. item["budget_f"] = math.Round(budget_f * 10000)
  80. }
  81. }
  82. }
  83. for _, f := range cf.Projectitem_field {
  84. item[f] = data[f]
  85. }
  86. year := gconv.String(item["year"])
  87. projectname := gconv.String(item["projectname"])
  88. procure_content := gconv.String(item["procure_content"])
  89. kpi := gconv.String(item["kpi"])
  90. institution := gconv.String(item["institution"])
  91. //生成hashcode
  92. item["hash_code"] = Hash(year, projectname, procure_content, kpi, institution)
  93. item["fileitem_id"] = mongodb.BsonIdToSId(data["_id"])
  94. pronum++
  95. db.Save(cf.ProjectItem, item)
  96. }
  97. }
  98. }
  99. if k%1000 == 0 {
  100. log.Printf("加载数据量为:%d,生成项目数据量为:%d", k, pronum)
  101. }
  102. }
  103. }
  104. // 匹配客户项目信息
  105. type Data struct {
  106. ismatch bool
  107. isnotmatch bool
  108. matchkey map[string]bool
  109. matchkey_field map[string]bool
  110. matchkey_add map[string]bool
  111. matchkey_add_field map[string]bool
  112. notmatchkey map[string]bool
  113. notmatchkey_field map[string]bool
  114. rule_index string
  115. }
  116. func LoadingHash() map[string]bool {
  117. HashMap := map[string]bool{}
  118. sess := db.GetMgoConn()
  119. defer db.DestoryMongoConn(sess)
  120. it := sess.DB(cf.Collections).C(ucf.Totbale).Find(nil).Select(map[string]interface{}{
  121. "year": 1,
  122. "projectname": 1,
  123. "procure_content": 1,
  124. "kpi": 1,
  125. "institution": 1,
  126. "_id": 1,
  127. "hash_code": 1,
  128. }).Iter()
  129. i := 0
  130. for m := make(map[string]interface{}); it.Next(&m); {
  131. i++
  132. if i%1000 == 0 {
  133. log.Println("count:", i)
  134. }
  135. hash_code := gconv.String(m["hash_code"])
  136. HashMap[hash_code] = true
  137. m = make(map[string]interface{})
  138. }
  139. return HashMap
  140. }
  141. func getCustomerMatchProject() {
  142. log.Println("开始加载历史匹配数据")
  143. HashMap := LoadingHash()
  144. log.Println("历史匹配数据加载完毕,总数据量:", len(HashMap))
  145. log.Println("开始加载数据")
  146. list, _ := db.Find(cf.ProjectItem, nil, nil, nil, false, -1, -1)
  147. log.Println("加载完毕,总数据量:", len(*list))
  148. pipienum := 0
  149. for k, tmp := range *list {
  150. d := &Data{
  151. matchkey: map[string]bool{},
  152. matchkey_field: map[string]bool{},
  153. matchkey_add: map[string]bool{},
  154. matchkey_add_field: map[string]bool{},
  155. notmatchkey: map[string]bool{},
  156. notmatchkey_field: map[string]bool{},
  157. }
  158. for _, rule := range ucf.Rules {
  159. //关键词
  160. mkey := strings.Split(rule.Match, ",")
  161. for _, field := range rule.MatchField.Field {
  162. content, _ := tmp[field].(string)
  163. //关键词匹配
  164. for _, key := range mkey {
  165. keys := strings.Split(key, "&&")
  166. ismatch := true
  167. for _, v := range keys {
  168. if !strings.Contains(content, v) {
  169. ismatch = false
  170. break
  171. }
  172. }
  173. if ismatch {
  174. d.ismatch = true
  175. d.matchkey[key] = true
  176. d.matchkey_field[field] = true
  177. }
  178. }
  179. }
  180. //处理附件词、排除词
  181. if d.ismatch {
  182. d.rule_index = rule.Index
  183. //附加词
  184. addkey := strings.Split(rule.MatchAdd, ",")
  185. if len(addkey) > 0 {
  186. //附加词匹配
  187. for _, key := range addkey {
  188. for _, field := range rule.MatchAddField.Field {
  189. content, _ := tmp[field].(string)
  190. if strings.Contains(content, key) {
  191. d.matchkey_add[key] = true
  192. d.matchkey_add_field[field] = true
  193. }
  194. }
  195. }
  196. }
  197. //排除词
  198. notkey := strings.Split(rule.NotMatch, ",")
  199. if len(notkey) > 0 {
  200. //附加词匹配
  201. for _, key := range notkey {
  202. for _, field := range rule.NotMatchField.Field {
  203. content, _ := tmp[field].(string)
  204. if strings.Contains(content, key) {
  205. d.notmatchkey[key] = true
  206. d.notmatchkey_field[field] = true
  207. }
  208. }
  209. }
  210. }
  211. }
  212. }
  213. if d.ismatch {
  214. //提取
  215. pipienum++
  216. tmp["matchkey"] = getKeyMap(d.matchkey)
  217. tmp["matchkey_filed"] = getKeyMap(d.matchkey_field)
  218. tmp["matchkey_add"] = getKeyMap(d.matchkey_add)
  219. tmp["matchkey_add_filed"] = getKeyMap(d.matchkey_add_field)
  220. tmp["matchkey_not"] = getKeyMap(d.notmatchkey)
  221. tmp["matchkey_not_filed"] = getKeyMap(d.notmatchkey_field)
  222. //TODO 判断判重
  223. hash_code := gconv.String(tmp["hash_code"])
  224. if HashMap[hash_code] {
  225. tmp["repeat"] = -1
  226. } else {
  227. tmp["repeat"] = 1
  228. }
  229. tmp["rule_index"] = d.rule_index
  230. tmp["comeintime"] = time.Now().Unix()
  231. if tmp["contact_person"] == nil {
  232. tmp["contact_person"] = ""
  233. }
  234. db.Save(ucf.Totbale, tmp)
  235. } else {
  236. //不提取
  237. }
  238. if k%1000 == 0 {
  239. log.Printf("加载数量为:%d,匹配数据量为:%d", k, pipienum)
  240. }
  241. }
  242. log.Println(ucf.User, pipienum, "匹配项目已处理完成")
  243. }
  244. func getKeyMap(m map[string]bool) string {
  245. ll := []string{}
  246. for k, _ := range m {
  247. ll = append(ll, k)
  248. }
  249. return strings.Join(ll, ",")
  250. }
  251. // 修复预算
  252. func xiuyusuan() {
  253. list, _ := db.Find("project_hp", nil, nil, nil, false, -1, -1)
  254. for _, tmp := range *list {
  255. total_budget, ok := tmp["total_budget"].(string)
  256. if ok {
  257. if strings.Contains(total_budget, ",") {
  258. log.Println(total_budget)
  259. total_budget = strings.ReplaceAll(total_budget, ",", "")
  260. }
  261. budget, _ := strconv.ParseFloat(total_budget, 64)
  262. log.Println(total_budget, budget)
  263. unit, _ := tmp["unit"].(string)
  264. if strings.Contains(unit, "万") {
  265. budget = budget * 10000
  266. }
  267. //log.Println(total_budget, budget, tmp["_id"])
  268. db.Update(
  269. "project_hp",
  270. map[string]interface{}{"_id": tmp["_id"]},
  271. map[string]interface{}{"$set": map[string]interface{}{"total_budget_f": budget}},
  272. true,
  273. false,
  274. )
  275. }
  276. }
  277. }