util.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. package main
  2. import (
  3. "regexp"
  4. "sort"
  5. "strconv"
  6. "strings"
  7. "time"
  8. )
  9. type TagMatching struct {
  10. tagName string // 标签名称
  11. tagCode string // 标签值(保存)
  12. matchField []string //关键词匹配字段
  13. matchKey string //匹配词
  14. matchKeyReg []*RegexpInfo
  15. addField []string //附加词匹配字段
  16. addKey string // 附件词匹配词
  17. addKeyReg []*RegexpInfo
  18. excludeField []string //排除词
  19. excludeKey string //排除词匹配词
  20. excludeKeyReg []*RegexpInfo
  21. //clearField []string // 清理词匹配字段
  22. clearKey []string //清理词匹配字段跟关键词一样
  23. }
  24. type RegexpInfo struct {
  25. keyStr string
  26. regs *regexp.Regexp
  27. }
  28. func GetRegex(key string) []*RegexpInfo {
  29. var infos []*RegexpInfo
  30. for _, s := range strings.Split(key, ",") {
  31. if strings.Contains(s, "&&") {
  32. info := &RegexpInfo{
  33. keyStr: s,
  34. regs: nil,
  35. }
  36. infos = append(infos, info)
  37. } else {
  38. if s == "" {
  39. continue
  40. }
  41. info := &RegexpInfo{
  42. keyStr: s,
  43. regs: regexp.MustCompile(".*(?i)" + s + ".*"),
  44. }
  45. infos = append(infos, info)
  46. }
  47. }
  48. return infos
  49. }
  50. var (
  51. regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
  52. regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
  53. contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
  54. numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
  55. regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
  56. moneyRegChar, _ = regexp.Compile(regStrChar)
  57. regQianw, _ = regexp.Compile(`\d{1,2}千万`)
  58. cutAllSpace, _ = regexp.Compile(`\s*`)
  59. spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
  60. moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",
  61. "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
  62. "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
  63. "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
  64. "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
  65. }
  66. moneyUnit = map[string]float64{
  67. "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
  68. }
  69. )
  70. var currencyItem = map[string]string{
  71. "人民币": "人民币",
  72. "rmb": "人民币",
  73. "RMB": "人民币",
  74. "$": "美元",
  75. "$": "美元",
  76. "美元": "美元",
  77. "港元": "港币",
  78. "港币": "港币",
  79. "澳币": "澳币",
  80. "澳元": "澳币",
  81. }
  82. //获取币种
  83. func GetCurrency(text string) (currency string) {
  84. if text == "" {
  85. return
  86. }
  87. currency = "人民币"
  88. for k, v := range currencyItem {
  89. if strings.Contains(text, k) {
  90. currency = v
  91. return
  92. }
  93. }
  94. return
  95. }
  96. //金额转换
  97. func ObjToMoney(text string) float64 {
  98. isfindUnit := true
  99. ret := capitalMoney(text)
  100. if ret < float64(10000) || ret > float64(50000000000) {
  101. ret2, b := numMoney(text)
  102. isfindUnit = b
  103. if ret2 > ret {
  104. ret = ret2
  105. }
  106. }
  107. f, _ := strconv.ParseFloat(strconv.FormatFloat(ret, 'f', 4, 64), 64)
  108. // if f < 1 {
  109. // f = 0
  110. // }
  111. //如果金额小于50,全文检索单位:万
  112. if f < 50 && f > 0 && isfindUnit {
  113. rep := contentUnit.FindAllStringIndex(text, -1)
  114. if len(rep) > 0 {
  115. f = f * 10000
  116. }
  117. }
  118. return f
  119. }
  120. func capitalMoney(text string) float64 {
  121. nodes := []float64{}
  122. node := float64(0)
  123. tmp := float64(0)
  124. decimals := 0.0
  125. ishaspoint := false //是否含小数点
  126. fnum := float64(0)
  127. end := false
  128. //str := fmt.Sprint(data[0])
  129. //提取第一个大写信息
  130. strmatch := numCapitals.FindAllStringSubmatch(text, -1)
  131. if len(strmatch) > 0 {
  132. text = strmatch[0][0]
  133. }
  134. suffixUnit := float64(1)
  135. if strings.HasSuffix(text, "万") || strings.HasSuffix(text, "万元") || strings.HasSuffix(text, "万元整") {
  136. index := strings.LastIndex(text, "万")
  137. text = text[0:index]
  138. suffixUnit = float64(10000)
  139. }
  140. moneyRegChar.ReplaceAllStringFunc(text, func(key string) string {
  141. if key == "元" || key == "圆" || key == "点" {
  142. ishaspoint = true
  143. }
  144. if v, ok := moneyChar[key].(float64); ok && !end {
  145. if ishaspoint && v > 10 { //排除后面有其他的单位
  146. return ""
  147. }
  148. //fmt.Println(key, v, fnum)
  149. if v < 10 && v >= 0 {
  150. if ishaspoint { //小数部分
  151. if v >= 1 {
  152. fnum = v
  153. } else if v < 1 && v > 0 {
  154. decimals += fnum * v
  155. }
  156. } else {
  157. if tmp != float64(0) {
  158. node += tmp
  159. }
  160. tmp = float64(v)
  161. }
  162. } else if v == 10000 || v == 100000000 { //单位万、亿
  163. if tmp != float64(0) {
  164. node += tmp
  165. tmp = float64(0)
  166. }
  167. nodes = append(nodes, node*float64(v))
  168. node = float64(0)
  169. } else {
  170. if v == 10 && tmp == 0 {
  171. tmp = 1
  172. }
  173. tmp = tmp * float64(v)
  174. node += tmp
  175. tmp = float64(0)
  176. }
  177. }
  178. if key == "整" || key == "正" || key == "分" {
  179. end = true
  180. }
  181. return ""
  182. })
  183. nodes = append(nodes, node, tmp)
  184. ret := float64(0)
  185. for _, v := range nodes {
  186. ret += v
  187. }
  188. return (ret + decimals) * suffixUnit
  189. }
  190. //数字金额转换
  191. func numMoney(text string) (moneyFloat float64, flag bool) {
  192. //tmp := fmt.Sprintf("%f", data[0])
  193. repUnit := float64(1)
  194. if regQianw.MatchString(text) {
  195. text = strings.Replace(text, "千万", "万", -1)
  196. repUnit = float64(1000)
  197. }
  198. text = replaceSymbol(text, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
  199. text = replaceString(text, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
  200. text = CutAllSpace(text)
  201. rets := regNumFloat.FindAllString(text, -1)
  202. fnums := []float64{}
  203. unitstrs := []string{}
  204. if len(rets) > 0 {
  205. pindex := 0 //单位前置
  206. for k, v := range rets {
  207. f, err := strconv.ParseFloat(v, 64)
  208. if err == nil {
  209. fnums = append(fnums, f)
  210. index := strings.Index(text, v)
  211. //单位后置
  212. start := index + len(v)
  213. end := start + 3
  214. //log.Println("vvv", tmp, v, pindex, index, start)
  215. if k > 0 {
  216. if start >= pindex+3 {
  217. pstart := pindex + 3
  218. if pstart >= index {
  219. pstart = index
  220. }
  221. if len(text) > end {
  222. unitstrs = append(unitstrs, text[pstart:index]+text[start:end])
  223. } else {
  224. unitstrs = append(unitstrs, text[pstart:index]+text[start:])
  225. }
  226. } else {
  227. if len(text) > end {
  228. unitstrs = append(unitstrs, text[start:end])
  229. } else {
  230. unitstrs = append(unitstrs, text[start:])
  231. }
  232. }
  233. } else {
  234. if len(text) > end {
  235. if index-3 >= 0 {
  236. unitstrs = append(unitstrs, text[index-3:index]+text[start:end])
  237. } else {
  238. unitstrs = append(unitstrs, text[start:end])
  239. }
  240. } else {
  241. if index-3 >= 0 {
  242. unitstrs = append(unitstrs, text[index-3:index]+text[start:])
  243. } else {
  244. unitstrs = append(unitstrs, text[start:])
  245. }
  246. }
  247. }
  248. pindex = start
  249. }
  250. }
  251. }
  252. //log.Println("unitstrs", fnums, unitstrs)
  253. unit := float64(0)
  254. fnum := float64(0)
  255. for k, v := range fnums {
  256. fnum = v
  257. units := regStrUnit.FindAllString(unitstrs[k], -1)
  258. for _, v := range units {
  259. if moneyUnit[v] != 0 {
  260. unit = moneyUnit[v]
  261. break
  262. }
  263. }
  264. if unit != float64(0) { //取第一个
  265. break
  266. }
  267. }
  268. fnum = fnum * repUnit
  269. if unit == float64(0) {
  270. moneyFloat = fnum
  271. } else {
  272. moneyFloat = fnum * unit
  273. }
  274. if unit == 10000 {
  275. flag = false
  276. } else {
  277. flag = true
  278. }
  279. return
  280. }
  281. //清理所有空白符
  282. func CutAllSpace(text string) string {
  283. tmp := cutAllSpace.ReplaceAllString(text, "")
  284. tmp = replaceSymbol(tmp, spaces)
  285. return tmp
  286. }
  287. //符号替换
  288. func replaceString(con string, ret, rep []string) string {
  289. for k, v := range ret {
  290. if len(rep) > k {
  291. con = strings.Replace(con, v, rep[k], -1)
  292. }
  293. }
  294. return con
  295. }
  296. //过滤符号
  297. func replaceSymbol(con string, rep []string) string {
  298. for _, v := range rep {
  299. con = strings.Replace(con, v, "", -1)
  300. }
  301. return con
  302. }
  303. // @Description python in stopWords
  304. // @Author J 2023/3/7 16:15
  305. func in(target string) bool {
  306. //sort.Strings(str_array)
  307. index := sort.SearchStrings(stopWords, target)
  308. if index < len(stopWords) && stopWords[index] == target {
  309. return true
  310. }
  311. return false
  312. }
  313. func diffNatureDays(t1, t2 int64) int {
  314. if t1 == t2 {
  315. return -1
  316. }
  317. if t1 > t2 {
  318. t1, t2 = t2, t1
  319. }
  320. diffDays := 0
  321. secDiff := t2 - t1
  322. if secDiff > 86400 {
  323. tmpDays := int(secDiff / 86400)
  324. t1 += int64(tmpDays) * 86400
  325. diffDays += tmpDays
  326. }
  327. st := time.Unix(t1, 0)
  328. et := time.Unix(t2, 0)
  329. dateFormatTpl := "20060102"
  330. if st.Format(dateFormatTpl) != et.Format(dateFormatTpl) {
  331. diffDays += 1
  332. }
  333. return diffDays
  334. }
  335. type FollwRecord struct {
  336. Proposed_id string
  337. Infoid string
  338. Title string
  339. Project_stage_code string
  340. Jybxhref string
  341. Project_scale string
  342. Publishtime int64
  343. Createtime int64
  344. }
  345. type ByPtime []FollwRecord
  346. func (a ByPtime) Len() int { return len(a) }
  347. func (a ByPtime) Less(i, j int) bool { return a[i].Publishtime > a[j].Publishtime }
  348. func (a ByPtime) Swap(i, j int) { a[i], a[j] = a[j], a[i] }