123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- package main
- import (
- "regexp"
- "sort"
- "strconv"
- "strings"
- "time"
- )
- type TagMatching struct {
- tagName string // 标签名称
- tagCode string // 标签值(保存)
- matchField []string //关键词匹配字段
- matchKey string //匹配词
- matchKeyReg []*RegexpInfo
- addField []string //附加词匹配字段
- addKey string // 附件词匹配词
- addKeyReg []*RegexpInfo
- excludeField []string //排除词
- excludeKey string //排除词匹配词
- excludeKeyReg []*RegexpInfo
- //clearField []string // 清理词匹配字段
- clearKey []string //清理词匹配字段跟关键词一样
- }
- type RegexpInfo struct {
- keyStr string
- regs *regexp.Regexp
- }
- func GetRegex(key string) []*RegexpInfo {
- var infos []*RegexpInfo
- for _, s := range strings.Split(key, ",") {
- if strings.Contains(s, "&&") {
- info := &RegexpInfo{
- keyStr: s,
- regs: nil,
- }
- infos = append(infos, info)
- } else {
- if s == "" {
- continue
- }
- info := &RegexpInfo{
- keyStr: s,
- regs: regexp.MustCompile(".*(?i)" + s + ".*"),
- }
- infos = append(infos, info)
- }
- }
- return infos
- }
- var (
- regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
- regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
- contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
- numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
- regStrChar = `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
- moneyRegChar, _ = regexp.Compile(regStrChar)
- regQianw, _ = regexp.Compile(`\d{1,2}千万`)
- cutAllSpace, _ = regexp.Compile(`\s*`)
- spaces = []string{"\u3000", "\u2003", "\u00a0", "\t", "\r", "\n"}
- moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",
- "一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
- "六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
- "百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
- "零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
- }
- moneyUnit = map[string]float64{
- "元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
- }
- )
- var currencyItem = map[string]string{
- "人民币": "人民币",
- "rmb": "人民币",
- "RMB": "人民币",
- "$": "美元",
- "$": "美元",
- "美元": "美元",
- "港元": "港币",
- "港币": "港币",
- "澳币": "澳币",
- "澳元": "澳币",
- }
- //获取币种
- func GetCurrency(text string) (currency string) {
- if text == "" {
- return
- }
- currency = "人民币"
- for k, v := range currencyItem {
- if strings.Contains(text, k) {
- currency = v
- return
- }
- }
- return
- }
- //金额转换
- func ObjToMoney(text string) float64 {
- isfindUnit := true
- ret := capitalMoney(text)
- if ret < float64(10000) || ret > float64(50000000000) {
- ret2, b := numMoney(text)
- isfindUnit = b
- if ret2 > ret {
- ret = ret2
- }
- }
- f, _ := strconv.ParseFloat(strconv.FormatFloat(ret, 'f', 4, 64), 64)
- // if f < 1 {
- // f = 0
- // }
- //如果金额小于50,全文检索单位:万
- if f < 50 && f > 0 && isfindUnit {
- rep := contentUnit.FindAllStringIndex(text, -1)
- if len(rep) > 0 {
- f = f * 10000
- }
- }
- return f
- }
- func capitalMoney(text string) float64 {
- nodes := []float64{}
- node := float64(0)
- tmp := float64(0)
- decimals := 0.0
- ishaspoint := false //是否含小数点
- fnum := float64(0)
- end := false
- //str := fmt.Sprint(data[0])
- //提取第一个大写信息
- strmatch := numCapitals.FindAllStringSubmatch(text, -1)
- if len(strmatch) > 0 {
- text = strmatch[0][0]
- }
- suffixUnit := float64(1)
- if strings.HasSuffix(text, "万") || strings.HasSuffix(text, "万元") || strings.HasSuffix(text, "万元整") {
- index := strings.LastIndex(text, "万")
- text = text[0:index]
- suffixUnit = float64(10000)
- }
- moneyRegChar.ReplaceAllStringFunc(text, func(key string) string {
- if key == "元" || key == "圆" || key == "点" {
- ishaspoint = true
- }
- if v, ok := moneyChar[key].(float64); ok && !end {
- if ishaspoint && v > 10 { //排除后面有其他的单位
- return ""
- }
- //fmt.Println(key, v, fnum)
- if v < 10 && v >= 0 {
- if ishaspoint { //小数部分
- if v >= 1 {
- fnum = v
- } else if v < 1 && v > 0 {
- decimals += fnum * v
- }
- } else {
- if tmp != float64(0) {
- node += tmp
- }
- tmp = float64(v)
- }
- } else if v == 10000 || v == 100000000 { //单位万、亿
- if tmp != float64(0) {
- node += tmp
- tmp = float64(0)
- }
- nodes = append(nodes, node*float64(v))
- node = float64(0)
- } else {
- if v == 10 && tmp == 0 {
- tmp = 1
- }
- tmp = tmp * float64(v)
- node += tmp
- tmp = float64(0)
- }
- }
- if key == "整" || key == "正" || key == "分" {
- end = true
- }
- return ""
- })
- nodes = append(nodes, node, tmp)
- ret := float64(0)
- for _, v := range nodes {
- ret += v
- }
- return (ret + decimals) * suffixUnit
- }
- //数字金额转换
- func numMoney(text string) (moneyFloat float64, flag bool) {
- //tmp := fmt.Sprintf("%f", data[0])
- repUnit := float64(1)
- if regQianw.MatchString(text) {
- text = strings.Replace(text, "千万", "万", -1)
- repUnit = float64(1000)
- }
- text = replaceSymbol(text, []string{",", ",", "(", ")", "(", ")", ":", "\n"})
- text = replaceString(text, []string{"万元", "亿元", "."}, []string{"万", "亿", "."})
- text = CutAllSpace(text)
- rets := regNumFloat.FindAllString(text, -1)
- fnums := []float64{}
- unitstrs := []string{}
- if len(rets) > 0 {
- pindex := 0 //单位前置
- for k, v := range rets {
- f, err := strconv.ParseFloat(v, 64)
- if err == nil {
- fnums = append(fnums, f)
- index := strings.Index(text, v)
- //单位后置
- start := index + len(v)
- end := start + 3
- //log.Println("vvv", tmp, v, pindex, index, start)
- if k > 0 {
- if start >= pindex+3 {
- pstart := pindex + 3
- if pstart >= index {
- pstart = index
- }
- if len(text) > end {
- unitstrs = append(unitstrs, text[pstart:index]+text[start:end])
- } else {
- unitstrs = append(unitstrs, text[pstart:index]+text[start:])
- }
- } else {
- if len(text) > end {
- unitstrs = append(unitstrs, text[start:end])
- } else {
- unitstrs = append(unitstrs, text[start:])
- }
- }
- } else {
- if len(text) > end {
- if index-3 >= 0 {
- unitstrs = append(unitstrs, text[index-3:index]+text[start:end])
- } else {
- unitstrs = append(unitstrs, text[start:end])
- }
- } else {
- if index-3 >= 0 {
- unitstrs = append(unitstrs, text[index-3:index]+text[start:])
- } else {
- unitstrs = append(unitstrs, text[start:])
- }
- }
- }
- pindex = start
- }
- }
- }
- //log.Println("unitstrs", fnums, unitstrs)
- unit := float64(0)
- fnum := float64(0)
- for k, v := range fnums {
- fnum = v
- units := regStrUnit.FindAllString(unitstrs[k], -1)
- for _, v := range units {
- if moneyUnit[v] != 0 {
- unit = moneyUnit[v]
- break
- }
- }
- if unit != float64(0) { //取第一个
- break
- }
- }
- fnum = fnum * repUnit
- if unit == float64(0) {
- moneyFloat = fnum
- } else {
- moneyFloat = fnum * unit
- }
- if unit == 10000 {
- flag = false
- } else {
- flag = true
- }
- return
- }
- //清理所有空白符
- func CutAllSpace(text string) string {
- tmp := cutAllSpace.ReplaceAllString(text, "")
- tmp = replaceSymbol(tmp, spaces)
- return tmp
- }
- //符号替换
- func replaceString(con string, ret, rep []string) string {
- for k, v := range ret {
- if len(rep) > k {
- con = strings.Replace(con, v, rep[k], -1)
- }
- }
- return con
- }
- //过滤符号
- func replaceSymbol(con string, rep []string) string {
- for _, v := range rep {
- con = strings.Replace(con, v, "", -1)
- }
- return con
- }
- // @Description python in stopWords
- // @Author J 2023/3/7 16:15
- func in(target string) bool {
- //sort.Strings(str_array)
- index := sort.SearchStrings(stopWords, target)
- if index < len(stopWords) && stopWords[index] == target {
- return true
- }
- return false
- }
- func diffNatureDays(t1, t2 int64) int {
- if t1 == t2 {
- return -1
- }
- if t1 > t2 {
- t1, t2 = t2, t1
- }
- diffDays := 0
- secDiff := t2 - t1
- if secDiff > 86400 {
- tmpDays := int(secDiff / 86400)
- t1 += int64(tmpDays) * 86400
- diffDays += tmpDays
- }
- st := time.Unix(t1, 0)
- et := time.Unix(t2, 0)
- dateFormatTpl := "20060102"
- if st.Format(dateFormatTpl) != et.Format(dateFormatTpl) {
- diffDays += 1
- }
- return diffDays
- }
- type FollwRecord struct {
- Proposed_id string
- Infoid string
- Title string
- Project_stage_code string
- Jybxhref string
- Project_scale string
- Publishtime int64
- Createtime int64
- }
- type ByPtime []FollwRecord
- func (a ByPtime) Len() int { return len(a) }
- func (a ByPtime) Less(i, j int) bool { return a[i].Publishtime > a[j].Publishtime }
- func (a ByPtime) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|