123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- package main
- import (
- "fmt"
- util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "regexp"
- "unicode"
- "unicode/utf8"
- )
- // GetJyURLByID 获取剑鱼地址
- func GetJyURLByID(id string) string {
- var Url = "https://www.jianyu360.com/article/content/%s.html"
- url := fmt.Sprintf(Url, util.CommonEncodeArticle("content", id))
- return url
- }
- // IsHanStart 判断字符串是否以汉字开头或者以字母开头
- func IsHanStart(s string) bool {
- if s == "" {
- return false
- }
- return unicode.Is(unicode.Scripts["Han"], []rune(s)[0]) || unicode.IsLetter([]rune(s)[0])
- }
- // IsHan 判断字符是否为汉字
- func IsHan(c rune) bool {
- return unicode.Is(unicode.Scripts["Han"], c)
- }
- // IsBracketStartWithHan 判断公司名称是否以圆括号开头且括号内汉字开头
- func IsBracketStartWithHan(s string) bool {
- if len(s) == 0 || s[0] != '(' {
- return false
- }
- // 索引 i 和 j 分别是左右圆括号的位置,如果找不到右圆括号则返回 false
- i, j := 0, 0
- for j = i + 1; j < len(s); j++ {
- if s[j] == ')' {
- break
- }
- }
- if j >= len(s) {
- return false
- }
- // 检查圆括号内是否以汉字或字母开头
- bracketContent := s[i+1 : j]
- if len(bracketContent) == 0 || (!unicode.IsLetter(rune(bracketContent[0])) && !IsHan([]rune(bracketContent)[0])) {
- return false
- }
- return true
- }
- // IsCompanyName 判断字符串是否以汉字开头、以括号开头并且括号里面是汉字、以"公司"结尾,其中一个条件符合即返回true,否则返回false
- func IsCompanyName(s string) bool {
- r := []rune(s)
- //if len(r) >= 6 && (string(r[len(r)-6:]) == "有限公司" || string(r[len(r)-6:]) == "股份有限公司") {
- // return (IsHanStart(s) || IsBracketStartWithHan(s))
- //} else if len(r) >= 2 && string(r[len(r)-2:]) == "公司" {
- // return (IsHanStart(s) || IsBracketStartWithHan(s))
- //}
- if len(r) >= 2 {
- return (IsHanStart(s) || IsBracketStartWithHan(s))
- }
- return false
- }
- // GetChineseCharacters 提取字符串中的汉字
- func GetChineseCharacters(s string) string {
- re := regexp.MustCompile(`[\p{Han}]+`)
- return re.FindString(s)
- }
- func getCompanyName(name string) string {
- if IsCompanyName(name) {
- return name
- }
- return GetChineseCharacters(name)
- }
- func IsUnicodeStart(s string) bool {
- if len(s) == 0 {
- return false
- }
- _, size := utf8.DecodeRuneInString(s)
- return size > 0
- }
- // RemoveDuplicateSuffix 去除字符串末尾的重复字词
- func RemoveDuplicateSuffix(str string, suffix string) string {
- // 构建正则表达式:^(.*?)(重复的结尾词)+$
- re := regexp.MustCompile(fmt.Sprintf(`^(.*?)(%s)+$`, suffix))
- matches := re.FindStringSubmatch(str)
- if len(matches) == 3 {
- return matches[1] + matches[2]
- }
- return str
- }
- //func findName(name string) []map[string]interface{} {
- // filter := bson.M{"name": name, "status": 1}
- // info, _ := Mgo.Find(wccBuyer, filter, nil, nil, false, -1, -1)
- //
- // return *info
- //}
- //
- //func findNameID(id string) []map[string]interface{} {
- // filter := bson.M{"name_id": id, "status": 1}
- // info, _ := Mgo.Find(wccBuyer, filter, nil, nil, false, -1, -1)
- //
- // return *info
- //}
- // isStringRepeating 判断字符串内字符完全重复,例如:山东大学山东大学
- func isStringRepeating(str string) bool {
- for i := 0; i < len(str); i++ {
- for j := i + 1; j < len(str); j++ {
- if str[i] != str[j] {
- return false
- }
- }
- }
- return true
- }
|