clean_time.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. package main
  2. import (
  3. "fmt"
  4. "regexp"
  5. "strconv"
  6. "strings"
  7. "time"
  8. )
  9. var spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n", "\u0001"}
  10. var reg, regA, regB, regC, regD,regE,regF, regAfter ,regAfterBool*regexp.Regexp
  11. const (
  12. T = 365 * 86400
  13. )
  14. var item = map[string]string{
  15. "一": "1", "二": "2", "三": "3", "四": "4", "五": "5",
  16. "六": "6", "七": "7", "八": "8","九": "9", "十": "10", "零": "0", "〇": "0",
  17. "1": "1", "2": "2", "3": "3", "4": "4", "5": "5",
  18. "6": "6", "7": "7", "8": "8", "9": "9", "0": "0",
  19. }
  20. func init() {
  21. //二〇一五年十一月四日十五时
  22. reg, _ = regexp.Compile(`\d+`)
  23. regA, _ = regexp.Compile(`[一|二|三|四|五|六|七|八|九|十|零|〇|1|2|3|4|5|6|7|8|9|0]`)
  24. regB, _ = regexp.Compile(`\d+年\d+月\d+日((上|下)午)?\s*\d+[::时]\d+分?[-—]\d+[::时]\d+时?分?`)
  25. regC, _ = regexp.Compile(`\s*\d+[::时]\d+分?[-—]`)
  26. regD, _ = regexp.Compile(`([一|二|三|四|五|六|七|八|九|十|零|〇]{4})年([一|二|三|四|五|六|七|八|九|十]{1,2})月([一|二|三|四|五|六|七|八|九|十]{1,3})日([一|二|三|四|五|六|七|八|九|十]{1,3})时`)
  27. regE, _ = regexp.Compile(`^([0-9一二三四五六七八九十]+)月(份)?$`)
  28. regF, _ = regexp.Compile(`^(\d{4})(\d{2})$`)
  29. regAfter, _ = regexp.Compile(`(下午D?\d{1,2}[时|:|:|h|H])`)
  30. regAfterBool, _ = regexp.Compile(`(下午D?[1-2][0-9][时|:|:|h|H])`)
  31. }
  32. /*字符时间转时间戳
  33. 支持全角
  34. 20060102->时间戳
  35. 20060102150405->时间戳
  36. 01%02->时间戳
  37. 2006%01%02->时间戳
  38. 2006%01%02%15->时间戳
  39. 2006%01%02%15%04->时间戳
  40. 2006%01%02%15%04%05->时间戳
  41. */
  42. func cleanStrToTimestamp(time_str string,publishtime int64) int64 {
  43. tmp := time_str
  44. //处理类似:二〇一五年十一月四日十五时
  45. cht := regD.FindStringSubmatch(tmp)
  46. if len(cht) == 5 {
  47. y := chineseToNumber(cht[1])
  48. m := 0
  49. for _, v := range []rune(cht[2]) {
  50. it, _ := strconv.Atoi(item[string(v)])
  51. m += it
  52. }
  53. d := 0
  54. for _, v := range []rune(cht[3]) {
  55. it, _ := strconv.Atoi(item[string(v)])
  56. d += it
  57. }
  58. M := 0
  59. for _, v := range []rune(cht[4]) {
  60. it, _ := strconv.Atoi(item[string(v)])
  61. M += it
  62. }
  63. tmp = fmt.Sprintf("%s年%d月%d日%d时", y, m, d, M)
  64. }
  65. //2016年12月7日上午9:00-11:30时 时间范围处理 取后面的时间
  66. if regB.MatchString(tmp) {
  67. tmp = regC.ReplaceAllString(tmp, "")
  68. }
  69. //2017年11月13日下午3时30分
  70. addreptime := int64(0)
  71. //2021年09月10日下午15时30分
  72. if regAfter.MatchString(tmp) && !regAfterBool.MatchString(tmp) {
  73. addreptime = 12 * 60 * 60
  74. }
  75. regRepl, _ := regexp.Compile(`[,,]`)
  76. tmp = regRepl.ReplaceAllString(tmp, "")
  77. for _, v := range spaces {
  78. strings.Replace(tmp, v, " ", -1)
  79. }
  80. tmps := reg.FindAllString(chineseToNumber(tmp), -1)
  81. //处理类似2016-12-0909:30:00时间
  82. if len(tmps) > 2 && len(tmps[2]) > 2 {
  83. newtmp := []string{}
  84. for k, v := range tmps {
  85. if k == 2 {
  86. newtmp = append(newtmp, v[0:2], v[2:])
  87. } else {
  88. newtmp = append(newtmp, v)
  89. }
  90. }
  91. tmps = newtmp
  92. }
  93. timestr := "" //2006-01-02 15:04:05
  94. timestamp := int64(0)
  95. if len(tmps) == 1 {
  96. if len(tmps[0]) == 8 {
  97. timestr = tmps[0][0:4] + "-" + tmps[0][4:6] + "-" + tmps[0][6:8]
  98. t, _ := time.ParseInLocation("2006-01-02-15-04", timestr+"-09-00", time.Local)
  99. timestamp = t.Unix()
  100. } else if len(tmps[0]) == 14 {
  101. timestr = tmps[0][0:4] + "-" + tmps[0][4:6] + "-" + tmps[0][6:8] + " " + tmps[0][8:10] + ":" + tmps[0][10:12] + ":" + tmps[0][12:14]
  102. t, _ := time.ParseInLocation("2006-01-02 15:04:00", timestr, time.Local)
  103. timestamp = t.Unix()
  104. }
  105. } else if len(tmps) == 2 {
  106. timestr = fmt.Sprint(time.Now().Year()) + "-" + MDhmsRepair(tmps[0]) + "-" + MDhmsRepair(tmps[1])
  107. t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
  108. timestamp = t.Unix()
  109. if timestamp<=0 {
  110. timestr = fmt.Sprint(MDhmsRepair(tmps[0]) + "-" + MDhmsRepair(tmps[1])+"-01")
  111. t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
  112. timestamp = t.Unix()
  113. }
  114. } else if len(tmps) == 3 {
  115. timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2])
  116. t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
  117. timestamp = t.Unix()
  118. } else if len(tmps) == 4 {
  119. timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2]) + " " + MDhmsRepair(tmps[3])
  120. t, _ := time.ParseInLocation("2006-01-02 15", timestr, time.Local)
  121. timestamp = t.Unix()
  122. } else if len(tmps) >= 5 {
  123. timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2]) + " " + MDhmsRepair(tmps[3]) + ":" + MDhmsRepair(tmps[4])
  124. t, _ := time.ParseInLocation("2006-01-02 15:04", timestr, time.Local)
  125. timestamp = t.Unix()
  126. }
  127. if regE.MatchString(tmp) && timestamp<=0 {
  128. m := 0
  129. new_str := regE.ReplaceAllString(tmp,"$1")
  130. str := chineseToNumber(new_str)
  131. it, _ := strconv.Atoi(str)
  132. if it >100 {
  133. m = 10+it%100
  134. }else {
  135. m = it
  136. }
  137. if m>0&&m<13 {
  138. m_s := fmt.Sprintf("%d",m)
  139. //优先取发布时间-的年份--待完善
  140. y_s := fmt.Sprintf("%d",time.Unix(publishtime, 0).Year())
  141. //y_s := fmt.Sprintf("%d",time.Now().Year())
  142. timestr = y_s + "-" + MDhmsRepair(m_s) + "-01"
  143. t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
  144. if t.Unix()>publishtime {
  145. timestamp = t.Unix()
  146. }else {
  147. y_s = fmt.Sprintf("%d",time.Unix(publishtime, 0).Year()+1)
  148. timestr = y_s + "-" + MDhmsRepair(m_s) + "-01"
  149. t1, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
  150. timestamp = t1.Unix()
  151. }
  152. }
  153. }
  154. if regF.MatchString(tmp) && timestamp<=0 {
  155. new_str := regF.ReplaceAllString(tmp,"$1-$2-01")
  156. t, _ := time.ParseInLocation("2006-01-02", new_str, time.Local)
  157. timestamp = t.Unix()
  158. }
  159. if timestamp <= 0 || timestamp > (time.Now().Unix()+T) {
  160. timestamp = 0
  161. } else {
  162. if addreptime > 0 {
  163. timestamp += addreptime
  164. }
  165. }
  166. return timestamp
  167. }
  168. //汉子数和全角转数字
  169. func chineseToNumber(con string) string {
  170. tmp := regA.ReplaceAllStringFunc(con, func(key string) string {
  171. if item[key] != "" {
  172. return item[key]
  173. } else {
  174. return key
  175. }
  176. return key
  177. })
  178. return tmp
  179. }
  180. //补位
  181. func MDhmsRepair(t string) string {
  182. if len(t) == 1 {
  183. return "0" + t
  184. } else {
  185. return t
  186. }
  187. }