|
@@ -0,0 +1,882 @@
|
|
|
+package main
|
|
|
+
|
|
|
+import (
|
|
|
+ "bytes"
|
|
|
+ "context"
|
|
|
+ "encoding/json"
|
|
|
+ "fmt"
|
|
|
+ zhipu "github.com/itcwc/go-zhipu/model_api"
|
|
|
+ "github.com/itcwc/go-zhipu/utils"
|
|
|
+ "github.com/olivere/elastic/v7"
|
|
|
+ "io/ioutil"
|
|
|
+ util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
+ "log"
|
|
|
+ "math"
|
|
|
+ "net/http"
|
|
|
+ "reflect"
|
|
|
+ "regexp"
|
|
|
+ "sort"
|
|
|
+ "strconv"
|
|
|
+ "strings"
|
|
|
+ "time"
|
|
|
+ "unicode/utf8"
|
|
|
+)
|
|
|
+
|
|
|
+var REG *regexp.Regexp
|
|
|
+
|
|
|
+type RuleDFA struct {
|
|
|
+ Match []DFA //包含的敏感词
|
|
|
+ MatchNum []int //包含敏感词匹配个数
|
|
|
+ MisMatch DFA //不包含的敏感词
|
|
|
+ MisMatchNum int //不包含敏感词匹配个数
|
|
|
+}
|
|
|
+
|
|
|
+type DFA struct {
|
|
|
+ Link map[string]interface{}
|
|
|
+}
|
|
|
+
|
|
|
+// DealRules 处理识别规则
|
|
|
+func DealRules(rules []string) (i_rule []interface{}) {
|
|
|
+ for _, r := range rules {
|
|
|
+ if strings.HasPrefix(r, "'") && strings.HasSuffix(r, "'") { //正则
|
|
|
+ rs := []rune(r)
|
|
|
+ ru := string(rs[1 : len(rs)-1])
|
|
|
+ rureg, err := regexp.Compile(ru)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("error---rule:", r)
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ i_rule = append(i_rule, []interface{}{rureg}...)
|
|
|
+ } else { //规则,加入到敏感词匹配
|
|
|
+ matchnum := 0
|
|
|
+ mismatchnum := 0
|
|
|
+ isnum1 := false
|
|
|
+ isnum2 := false
|
|
|
+ numArr := make([]int, 0)
|
|
|
+ ruleDFA := &RuleDFA{
|
|
|
+ Match: []DFA{},
|
|
|
+ MisMatch: DFA{},
|
|
|
+ }
|
|
|
+ tmpArr := strings.Split(r, "^")
|
|
|
+ matchTmp := tmpArr[0]
|
|
|
+ ruleTextArr := REG.FindAllString(matchTmp, -1)
|
|
|
+ for _, match := range ruleTextArr {
|
|
|
+ matchnum, isnum1 = GetNum(match)
|
|
|
+ numArr = append(numArr, matchnum)
|
|
|
+ matchArr := GetRule(match, isnum1)
|
|
|
+ tmpDFA := DFA{
|
|
|
+ Link: make(map[string]interface{}),
|
|
|
+ }
|
|
|
+ tmpDFA.AddWord(matchArr...)
|
|
|
+ ruleDFA.Match = append(ruleDFA.Match, tmpDFA)
|
|
|
+ }
|
|
|
+ if len(tmpArr) == 2 {
|
|
|
+ mismatch := tmpArr[1]
|
|
|
+ mismatchnum, isnum2 = GetNum(mismatch)
|
|
|
+ mismatchArr := GetRule(mismatch, isnum2)
|
|
|
+ ruleDFA.MisMatch.AddWord(mismatchArr...)
|
|
|
+ }
|
|
|
+ ruleDFA.MatchNum = numArr
|
|
|
+ ruleDFA.MisMatchNum = mismatchnum
|
|
|
+ i_rule = append(i_rule, []interface{}{ruleDFA}...)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func (d *DFA) AddWord(keys ...string) {
|
|
|
+ d.AddWordAll(true, keys...)
|
|
|
+}
|
|
|
+
|
|
|
+func (d *DFA) AddWordAll(haskey bool, keys ...string) {
|
|
|
+ if d.Link == nil {
|
|
|
+ d.Link = make(map[string]interface{})
|
|
|
+ }
|
|
|
+ for _, key := range keys {
|
|
|
+ nowMap := &d.Link
|
|
|
+ for i := 0; i < len(key); i++ {
|
|
|
+ kc := key[i : i+1]
|
|
|
+ if v, ok := (*nowMap)[kc]; ok {
|
|
|
+ nowMap, _ = v.(*map[string]interface{})
|
|
|
+ } else {
|
|
|
+ newMap := map[string]interface{}{}
|
|
|
+ newMap["YN"] = "0"
|
|
|
+ (*nowMap)[kc] = &newMap
|
|
|
+ nowMap = &newMap
|
|
|
+ }
|
|
|
+ if i == len(key)-1 {
|
|
|
+ (*nowMap)["YN"] = "1"
|
|
|
+ if haskey {
|
|
|
+ (*nowMap)["K"] = key
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (d *DFA) CheckSensitiveWord(src string, n int) (bool, []string) {
|
|
|
+ res := make([]string, 0)
|
|
|
+ tmpMap := make(map[string]int)
|
|
|
+ for j := 0; j < len(src); j++ {
|
|
|
+ nowMap := &d.Link
|
|
|
+ for i := j; i < len(src); i++ {
|
|
|
+ word := src[i : i+1]
|
|
|
+ nowMap, _ = (*nowMap)[word].(*map[string]interface{})
|
|
|
+ if nowMap != nil { // 存在,则判断是否为最后一个
|
|
|
+ if "1" == util.ObjToString((*nowMap)["YN"]) {
|
|
|
+ s := util.ObjToString((*nowMap)["K"])
|
|
|
+ tmpMap[s] = 1
|
|
|
+ //nowMap = &d.Link //匹配到之后继续匹配后边的内容
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ //nowMap = &d.Link
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(tmpMap) >= n {
|
|
|
+ for k, _ := range tmpMap {
|
|
|
+ res = append(res, k)
|
|
|
+ }
|
|
|
+ return true, res
|
|
|
+ }
|
|
|
+ return false, []string{}
|
|
|
+}
|
|
|
+
|
|
|
+// ObjArrToStringArr interface 数组转string 数组
|
|
|
+func ObjArrToStringArr(old []interface{}) []string {
|
|
|
+ defer func() {
|
|
|
+ if r := recover(); r != nil {
|
|
|
+ // 在此处添加错误处理逻辑,例如记录错误日志
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ if old != nil {
|
|
|
+ new := make([]string, 0)
|
|
|
+ for _, v := range old {
|
|
|
+ if strValue, ok := v.(string); ok {
|
|
|
+ new = append(new, strValue)
|
|
|
+ } else {
|
|
|
+ // 在此处添加对非字符串类型值的处理逻辑,例如记录错误日志
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return new
|
|
|
+ } else {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// GetRule 获取规则
|
|
|
+func GetRule(text string, isnum bool) (matchArr []string) {
|
|
|
+ if isnum { //最后一个不是数字
|
|
|
+ if strings.HasPrefix(text, "(") && strings.HasSuffix(text, ")") {
|
|
|
+ text = text[1 : len(text)-1]
|
|
|
+ matchArr = strings.Split(text, "|")
|
|
|
+ }
|
|
|
+ } else if strings.HasPrefix(text, "(") && !isnum {
|
|
|
+ text = text[1 : len(text)-2]
|
|
|
+ matchArr = strings.Split(text, "|")
|
|
|
+ }
|
|
|
+ return matchArr
|
|
|
+}
|
|
|
+
|
|
|
+// GetNum 获取匹配或不匹配的个数
|
|
|
+func GetNum(rule string) (int, bool) {
|
|
|
+ num := 1
|
|
|
+ isnum := strings.HasSuffix(rule, ")")
|
|
|
+ if !isnum { //是数字
|
|
|
+ s := []rune(rule)
|
|
|
+ last := string(s[len(s)-1:])
|
|
|
+ num = IntAll(last)
|
|
|
+ }
|
|
|
+ return num, isnum
|
|
|
+}
|
|
|
+
|
|
|
+func IntAll(num interface{}) int {
|
|
|
+ return IntAllDef(num, 0)
|
|
|
+}
|
|
|
+
|
|
|
+func IntAllDef(num interface{}, defaultNum int) int {
|
|
|
+ if i, ok := num.(int); ok {
|
|
|
+ return int(i)
|
|
|
+ } else if i0, ok0 := num.(int32); ok0 {
|
|
|
+ return int(i0)
|
|
|
+ } else if i1, ok1 := num.(float64); ok1 {
|
|
|
+ return int(i1)
|
|
|
+ } else if i2, ok2 := num.(int64); ok2 {
|
|
|
+ return int(i2)
|
|
|
+ } else if i3, ok3 := num.(float32); ok3 {
|
|
|
+ return int(i3)
|
|
|
+ } else if i4, ok4 := num.(string); ok4 {
|
|
|
+ in, _ := strconv.Atoi(i4)
|
|
|
+ return int(in)
|
|
|
+ } else if i5, ok5 := num.(int16); ok5 {
|
|
|
+ return int(i5)
|
|
|
+ } else if i6, ok6 := num.(int8); ok6 {
|
|
|
+ return int(i6)
|
|
|
+ } else {
|
|
|
+ return defaultNum
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// TagDFAAnalyRules 单独的标签识别规则
|
|
|
+func TagDFAAnalyRules(text string, rules []interface{}) (res []string) {
|
|
|
+ defer util.Catch()
|
|
|
+ for _, r := range rules {
|
|
|
+ rDFA, b := r.(*RuleDFA)
|
|
|
+ //util.Debug(j, "规则===", b, rDFA.Match, rDFA.MatchNum, rDFA.MisMatch, rDFA.MisMatchNum)
|
|
|
+ if b { //规则DFA
|
|
|
+ //util.Debug("res========", res, len(rDFA.MatchNum) == len(rDFA.Match), len(rDFA.MatchNum))
|
|
|
+ if len(rDFA.MatchNum) == len(rDFA.Match) {
|
|
|
+ for i, matchnum := range rDFA.MatchNum {
|
|
|
+ if matchnum >= 1 {
|
|
|
+ btmp, restmp := rDFA.Match[i].CheckSensitiveWord(text, matchnum)
|
|
|
+ if !btmp { //逗号隔开的每条规则不匹配,继续匹配下一条
|
|
|
+ //log.Println("继续匹配")
|
|
|
+ break
|
|
|
+ }
|
|
|
+ res = append(res, restmp...)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// DFAAnalyRules DFA识别规则
|
|
|
+func DFAAnalyRules(text string, rules []interface{}) (bool, []string) {
|
|
|
+ var arr []string
|
|
|
+ //log.Println("len===", len(rules))
|
|
|
+ for _, r := range rules {
|
|
|
+ //log.Println("i--------------", i)
|
|
|
+ ruleReg, ok := r.(*regexp.Regexp)
|
|
|
+ if ok { //正则
|
|
|
+ //log.Println("正则===", ruleReg)
|
|
|
+ textArr := ruleReg.FindAllString(text, -1)
|
|
|
+ if len(textArr) > 0 {
|
|
|
+ regStr := []string{ruleReg.String()}
|
|
|
+ return true, regStr
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ rDFA, b := r.(*RuleDFA)
|
|
|
+ //log.Println(j, "规则===", b, rDFA.Match, rDFA.MatchNum, rDFA.MisMatch, rDFA.MisMatchNum)
|
|
|
+ if b { //规则DFA
|
|
|
+ //b1, b2 := false, false
|
|
|
+ b1, b2 := false, true
|
|
|
+ var res []string
|
|
|
+ //log.Println("res========", res, len(rDFA.MatchNum) == len(rDFA.Match), len(rDFA.MatchNum))
|
|
|
+ if len(rDFA.MatchNum) == len(rDFA.Match) {
|
|
|
+ for i, matchnum := range rDFA.MatchNum {
|
|
|
+ if matchnum >= 1 {
|
|
|
+ btmp, restmp := rDFA.Match[i].CheckSensitiveWord(text, matchnum)
|
|
|
+ //log.Println("btmp====", btmp, restmp)
|
|
|
+ if !btmp { //逗号隔开的每条规则不匹配,继续匹配下一条
|
|
|
+ //log.Println("继续匹配")
|
|
|
+ b2 = false
|
|
|
+ break
|
|
|
+ }
|
|
|
+ res = append(res, restmp...)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !b2 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ //走到这一步证明需要匹配的词正确个数满足要求,下面判断不需要匹配的词的情况
|
|
|
+ mismatchnum := rDFA.MisMatchNum
|
|
|
+ if mismatchnum >= 1 { //有排除词,排除词不应该出现在匹配的文本中
|
|
|
+ b1, _ = rDFA.MisMatch.CheckSensitiveWord(text, mismatchnum)
|
|
|
+ } else {
|
|
|
+ b1 = false
|
|
|
+ }
|
|
|
+ if !b1 { //不要匹配的词满足情况,跳出
|
|
|
+ return true, res
|
|
|
+ } else {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false, arr
|
|
|
+}
|
|
|
+
|
|
|
+// MergeLabelData 处理标记权重
|
|
|
+func MergeLabelData(labelDatas []LabelData) map[string][]LabelData {
|
|
|
+ result := make(map[string][]LabelData)
|
|
|
+
|
|
|
+ for _, data := range labelDatas {
|
|
|
+ // 检查是否已存在相同 Sfield 的数据
|
|
|
+ if existingDatas, ok := result[data.Sfield]; ok {
|
|
|
+ merged := false
|
|
|
+ for i, existingData := range existingDatas {
|
|
|
+ // 如果 Name 和 Sfield 都相同,合并 Weight
|
|
|
+ if existingData.Name == data.Name && existingData.Sfield == data.Sfield {
|
|
|
+ existingDatas[i].TotalWeight = round(existingData.TotalWeight+data.TotalWeight, 2)
|
|
|
+ merged = true
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 如果未合并,添加新数据
|
|
|
+ if !merged {
|
|
|
+ result[data.Sfield] = append(result[data.Sfield], data)
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ result[data.Sfield] = []LabelData{data}
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return result
|
|
|
+}
|
|
|
+
|
|
|
+// 对浮点数进行四舍五入保留指定位数小数
|
|
|
+func round(num float64, decimalPlaces int) float64 {
|
|
|
+ var multiplier float64 = 1
|
|
|
+ for i := 0; i < decimalPlaces; i++ {
|
|
|
+ multiplier *= 10
|
|
|
+ }
|
|
|
+ return math.Round(num*multiplier) / multiplier
|
|
|
+}
|
|
|
+
|
|
|
+// UpdateIndexByID 根据 ID 更新索引
|
|
|
+func UpdateIndexByID(client *elastic.Client, indexName string, id string, updatedData map[string]interface{}) error {
|
|
|
+ // 执行更新操作
|
|
|
+ _, err := client.Update().
|
|
|
+ Index(indexName).
|
|
|
+ Id(id).
|
|
|
+ Doc(updatedData).
|
|
|
+ Do(context.Background())
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// ZpAI 智普AI
|
|
|
+func ZpAI(apiKey, model, name string) (rest map[string]interface{}) {
|
|
|
+ expireAtTime := int64(1719803252) // token 过期时间
|
|
|
+ sys := "请根据我给出的公司名称,依据其单位性质、业务范围和单位职能,准确给出最符合的一个国标行业分类标签,分别给出大类、中类和小类。输出结果以JSON格式返回,格式如下:{\"label1\":\"大类\",\"label2\":\"中类\",\"label3\":\"小类\"}。我只要最匹配 的一个标签,不要返回多个字段;如果无法识别出类别,直接给我空字符串。按照以上要求输出,不要联想,不要无中生有,不要生成解释。单位名称是:"
|
|
|
+ text := fmt.Sprintf(sys, name)
|
|
|
+
|
|
|
+ mssage := zhipu.PostParams{
|
|
|
+ Model: model,
|
|
|
+ Messages: []zhipu.Message{
|
|
|
+ {
|
|
|
+ Role: "user", // 消息的角色信息 详见文档
|
|
|
+ Content: text,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ token, _ := utils.GenerateToken(apiKey, expireAtTime)
|
|
|
+ postResponse, err := zhipu.BeCommonModel(mssage, token)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if choices, ok := postResponse["choices"].([]interface{}); ok {
|
|
|
+ if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
|
|
|
+ if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
|
|
|
+ if content, ok4 := message["content"].(string); ok4 {
|
|
|
+ content = strings.ReplaceAll(content, "\n", "")
|
|
|
+ content = strings.ReplaceAll(content, "json", "")
|
|
|
+ content = strings.ReplaceAll(content, "`", "")
|
|
|
+ err = json.Unmarshal([]byte(content), &rest)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("Unmarshal err", err, "content:", content)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ZpAI1 智普问答,附带业务范围
|
|
|
+func ZpAI1(apiKey, model, name string, businessScope string) (rest map[string]interface{}) {
|
|
|
+ expireAtTime := int64(1751339252) // token 过期时间
|
|
|
+ sys := `请根据我给出的公司名称,依据其单位性质、单位职能和业务范围,准确给出最符合的一个国标行业分类标签,分别给出一级标签、二级标签、三级标签和四级标签,结果以JSON格式返回,格式如下:{"label1":"制造业","label2":"食品制造业","label3":"罐头食品制造","label4":"蔬菜、水果罐头制造"}。我只要最匹配 的一个标签,不要返回多个字段;如果无法识别出类别,直接给我空字符串。按照以上要求输出,不要联想,不要无中生有,不要生成解释。单位名称是:`
|
|
|
+ text := sys + name
|
|
|
+
|
|
|
+ if businessScope != "" {
|
|
|
+ text = text + ";业务范围是:" + businessScope
|
|
|
+ }
|
|
|
+ mssage := zhipu.PostParams{
|
|
|
+ Model: model,
|
|
|
+ Messages: []zhipu.Message{
|
|
|
+ {
|
|
|
+ Role: "user", // 消息的角色信息 详见文档
|
|
|
+ Content: text,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ token, _ := utils.GenerateToken(apiKey, expireAtTime)
|
|
|
+ postResponse, err := zhipu.BeCommonModel(mssage, token)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if choices, ok := postResponse["choices"].([]interface{}); ok {
|
|
|
+ if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
|
|
|
+ if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
|
|
|
+ if content, ok4 := message["content"].(string); ok4 {
|
|
|
+ content = strings.ReplaceAll(content, "\n", "")
|
|
|
+ content = strings.ReplaceAll(content, "json", "")
|
|
|
+ content = strings.ReplaceAll(content, "`", "")
|
|
|
+ err = json.Unmarshal([]byte(content), &rest)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("Unmarshal err", err, "content:", content)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ZpAI2 ZpAI2
|
|
|
+func ZpAI2(apiKey, model, text string) (rest map[string]interface{}) {
|
|
|
+ expireAtTime := int64(1751339252) // token 过期时间
|
|
|
+
|
|
|
+ mssage := zhipu.PostParams{
|
|
|
+ Model: model,
|
|
|
+ Messages: []zhipu.Message{
|
|
|
+ {
|
|
|
+ Role: "user", // 消息的角色信息 详见文档
|
|
|
+ Content: text,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ token, _ := utils.GenerateToken(apiKey, expireAtTime)
|
|
|
+ postResponse, err := zhipu.BeCommonModel(mssage, token)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if choices, ok := postResponse["choices"].([]interface{}); ok {
|
|
|
+ if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
|
|
|
+ if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
|
|
|
+ if content, ok4 := message["content"].(string); ok4 {
|
|
|
+ content = strings.ReplaceAll(content, "\n", "")
|
|
|
+ content = strings.ReplaceAll(content, "json", "")
|
|
|
+ content = strings.ReplaceAll(content, "`", "")
|
|
|
+ err = json.Unmarshal([]byte(content), &rest)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("Unmarshal err", err, "content:", content)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ZpAI3 传参主营业务
|
|
|
+func ZpAI3(apiKey, model, name string, businessScope string) (rest map[string]interface{}) {
|
|
|
+ expireAtTime := int64(1751339252) // token 过期时间
|
|
|
+ sys := `请根据我给出的公司名称,依据其单位性质、单位职能和业务范围,准确给出最符合的二个国标行业分类标签,分别给出大类、中类和小类,并给出每一个的可靠性,可靠性用浮点数表示,比如:0.85,输出结果以JSON格式返回,格式如下:{
|
|
|
+ "result":[
|
|
|
+ {"label1":"制造业","label2":"金属制品业","label3":"金属结构制造","score":0.83},
|
|
|
+ {"label1":"制造业","label2":"金属制品业","label3":"工具制造","score":0.80}
|
|
|
+ ]
|
|
|
+};返回结果按照score 降序排序。我只要最匹配的二个国标行业标签,只需要返回对应的中文。如果无法识别出类别,直接给我空字符串。按照以上要求输出,不要联想,不要无中生有,不要生成解释。`
|
|
|
+
|
|
|
+ text := fmt.Sprintf("%s%s", sys, name)
|
|
|
+
|
|
|
+ if businessScope != "" {
|
|
|
+ text = text + ";主营业务是:" + businessScope
|
|
|
+ }
|
|
|
+ mssage := zhipu.PostParams{
|
|
|
+ Model: model,
|
|
|
+ Messages: []zhipu.Message{
|
|
|
+ {
|
|
|
+ Role: "user", // 消息的角色信息 详见文档
|
|
|
+ Content: text,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ token, _ := utils.GenerateToken(apiKey, expireAtTime)
|
|
|
+ postResponse, err := zhipu.BeCommonModel(mssage, token)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if choices, ok := postResponse["choices"].([]interface{}); ok {
|
|
|
+ if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
|
|
|
+ if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
|
|
|
+ if content, ok4 := message["content"].(string); ok4 {
|
|
|
+ content = strings.ReplaceAll(content, "\n", "")
|
|
|
+ content = strings.ReplaceAll(content, "json", "")
|
|
|
+ content = strings.ReplaceAll(content, "`", "")
|
|
|
+ err = json.Unmarshal([]byte(content), &rest)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("Unmarshal err", err, "content:", content)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ZpAI4 传参主营业务
|
|
|
+func ZpAI4(apiKey, model, name string, businessScope string, names []string) (rest map[string]interface{}) {
|
|
|
+ expireAtTime := int64(1751339252) // token 过期时间
|
|
|
+ sys := `请根据我给出的公司名称,依据其单位性质、单位职能和业务范围,准确给出最符合的二个国标行业分类标签;分别给出一级标签、二级标签、三级标签和四级标签,并给出每一个的可靠性,可靠性用浮点数表示,比如:0.85,输出结果以JSON格式返回,格式如下:
|
|
|
+{
|
|
|
+ "result":[
|
|
|
+ {"label1":"制造业","label2":"通用设备制造业","label3":"通用零部件制造","label4":"机械零部件加工","score":0.83},
|
|
|
+ {"label1":"制造业","label2":"食品制造业","label3":"罐头食品制造","label4":"蔬菜、水果罐头制造","score":0.80}
|
|
|
+ ]
|
|
|
+};返回结果按照score 降序排序。我只要最匹配的二个国标行业标签,只需要返回对应的中文。如果无法识别出类别,直接给我空字符串。按照以上要求输出,不要联想,不要无中生有,不要生成解释。公司是:`
|
|
|
+
|
|
|
+ text := fmt.Sprintf("%s,%s", sys, name)
|
|
|
+ text = text + "请在我提供的国标行标签里返回,国标行业标签有:" + strings.Join(names, ",")
|
|
|
+
|
|
|
+ if businessScope != "" {
|
|
|
+ text = text + ";主营业务是:" + businessScope
|
|
|
+ }
|
|
|
+ mssage := zhipu.PostParams{
|
|
|
+ Model: model,
|
|
|
+ Messages: []zhipu.Message{
|
|
|
+ {
|
|
|
+ Role: "user", // 消息的角色信息 详见文档
|
|
|
+ Content: text,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ token, _ := utils.GenerateToken(apiKey, expireAtTime)
|
|
|
+ postResponse, err := zhipu.BeCommonModel(mssage, token)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if choices, ok := postResponse["choices"].([]interface{}); ok {
|
|
|
+ if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
|
|
|
+ if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
|
|
|
+ if content, ok4 := message["content"].(string); ok4 {
|
|
|
+ content = strings.ReplaceAll(content, "\n", "")
|
|
|
+ content = strings.ReplaceAll(content, "json", "")
|
|
|
+ content = strings.ReplaceAll(content, "`", "")
|
|
|
+ err = json.Unmarshal([]byte(content), &rest)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("Unmarshal err", err, "content:", content)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ZpName 根据国标名称,调用大模型,返回符合标准的国标行业分类名称
|
|
|
+func ZpName(apiKey, model, text string) (rest map[string]interface{}) {
|
|
|
+ expireAtTime := int64(1751339252) // token 过期时间
|
|
|
+ mssage := zhipu.PostParams{
|
|
|
+ Model: model,
|
|
|
+ Messages: []zhipu.Message{
|
|
|
+ {
|
|
|
+ Role: "user", // 消息的角色信息 详见文档
|
|
|
+ Content: text,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ }
|
|
|
+ token, _ := utils.GenerateToken(apiKey, expireAtTime)
|
|
|
+ postResponse, err := zhipu.BeCommonModel(mssage, token)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if choices, ok := postResponse["choices"].([]interface{}); ok {
|
|
|
+ if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
|
|
|
+ if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
|
|
|
+ if content, ok4 := message["content"].(string); ok4 {
|
|
|
+ //rest = content
|
|
|
+ content = strings.ReplaceAll(content, "\n", "")
|
|
|
+ content = strings.ReplaceAll(content, "json", "")
|
|
|
+ content = strings.ReplaceAll(content, "`", "")
|
|
|
+ err = json.Unmarshal([]byte(content), &rest)
|
|
|
+ if err != nil {
|
|
|
+ log.Println("Unmarshal err", err, "content:", content)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func checkString(s string) bool {
|
|
|
+ for _, char := range s {
|
|
|
+ if ('a' <= char && char <= 'z') || ('A' <= char && char <= 'Z') || strings.ContainsAny(string(char), "12345678910") {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false
|
|
|
+}
|
|
|
+
|
|
|
+// extractMainBusiness 根据营业范围提取主营业务
|
|
|
+func extractMainBusiness(business string) (res string) {
|
|
|
+ // 清除开头的 "许可项目:" 或 "一般项目:"
|
|
|
+ if strings.HasPrefix(business, "许可项目:") {
|
|
|
+ business = strings.TrimPrefix(business, "许可项目:")
|
|
|
+ } else if strings.HasPrefix(business, "一般项目:") {
|
|
|
+ business = strings.TrimPrefix(business, "一般项目:")
|
|
|
+ }
|
|
|
+
|
|
|
+ if business == "" {
|
|
|
+ return ""
|
|
|
+ }
|
|
|
+
|
|
|
+ business = extractByDelimiter(business, "。", 1) + "。"
|
|
|
+ firstPunctuation := findFirstPunctuation(business)
|
|
|
+ switch firstPunctuation {
|
|
|
+ case "。":
|
|
|
+ res = extractByDelimiter(business, "。", 1)
|
|
|
+ case "、":
|
|
|
+ secondPunctuation := findSecondPunctuation(business)
|
|
|
+ if secondPunctuation == "、" {
|
|
|
+ if strings.Count(business, ",") > strings.Count(business, ";") {
|
|
|
+ res = extractByDelimiter(business, ",", 2)
|
|
|
+ } else {
|
|
|
+ res = extractByDelimiter(business, ";", 2)
|
|
|
+ }
|
|
|
+ } else if secondPunctuation == ";" {
|
|
|
+ res = extractByDelimiter(business, ";", 2)
|
|
|
+ } else if secondPunctuation == "。" {
|
|
|
+ res = extractByDelimiter(business, "。", 1)
|
|
|
+ } else {
|
|
|
+ res = extractByDelimiter(business, ",", 2)
|
|
|
+ }
|
|
|
+ case ",":
|
|
|
+ res = extractByDelimiter(business, ",", 2)
|
|
|
+ case ";":
|
|
|
+ res = extractByDelimiter(business, ";", 2)
|
|
|
+ default:
|
|
|
+ res = business
|
|
|
+ }
|
|
|
+ if utf8.RuneCountInString(res) > 50 {
|
|
|
+ r := []rune(res)
|
|
|
+ res = string(r[:50])
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// extractByDelimiter 根据指定的分隔符和数量提取字符串
|
|
|
+func extractByDelimiter(s, delimiter string, count int) string {
|
|
|
+ parts := strings.Split(s, delimiter)
|
|
|
+ if len(parts) > count {
|
|
|
+ return strings.Join(parts[:count], delimiter)
|
|
|
+ }
|
|
|
+ return s
|
|
|
+}
|
|
|
+
|
|
|
+// findFirstPunctuation 查找第一个标点符号
|
|
|
+func findFirstPunctuation(s string) string {
|
|
|
+ for _, r := range s {
|
|
|
+ if strings.ContainsRune("。、,;", r) {
|
|
|
+ return string(r)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return ""
|
|
|
+}
|
|
|
+
|
|
|
+// findSecondPunctuation 查找第二个标点符号
|
|
|
+func findSecondPunctuation(s string) string {
|
|
|
+ firstPunct := findFirstPunctuation(s)
|
|
|
+ if firstPunct == "" {
|
|
|
+ return ""
|
|
|
+ }
|
|
|
+ return findFirstPunctuation(s[strings.Index(s, firstPunct)+1:])
|
|
|
+}
|
|
|
+
|
|
|
+// structToMap 结构体转map
|
|
|
+func structToMap(obj interface{}) map[string]interface{} {
|
|
|
+ result := make(map[string]interface{})
|
|
|
+ v := reflect.ValueOf(obj)
|
|
|
+ t := reflect.TypeOf(obj)
|
|
|
+
|
|
|
+ // Ensure the input is a struct
|
|
|
+ if t.Kind() == reflect.Ptr {
|
|
|
+ t = t.Elem()
|
|
|
+ v = v.Elem()
|
|
|
+ }
|
|
|
+
|
|
|
+ if t.Kind() != reflect.Struct {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ for i := 0; i < t.NumField(); i++ {
|
|
|
+ field := t.Field(i)
|
|
|
+ value := v.Field(i)
|
|
|
+
|
|
|
+ // Use the JSON tag if available
|
|
|
+ tag := field.Tag.Get("json")
|
|
|
+ if tag == "" {
|
|
|
+ tag = field.Name
|
|
|
+ }
|
|
|
+
|
|
|
+ result[tag] = value.Interface()
|
|
|
+ }
|
|
|
+
|
|
|
+ return result
|
|
|
+}
|
|
|
+
|
|
|
+// HTTPRequest 封装 HTTP 请求并处理请求和响应为 map 类型
|
|
|
+func HTTPRequest(method, url string, headers map[string]string, body map[string]interface{}) (map[string]interface{}, error) {
|
|
|
+ // 创建 HTTP 客户端
|
|
|
+ client := &http.Client{
|
|
|
+ Timeout: 10 * time.Second, // 设置超时时间
|
|
|
+ }
|
|
|
+
|
|
|
+ // 将请求体转换为 JSON 格式
|
|
|
+ jsonBody, err := json.Marshal(body)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("请求体序列化失败: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 创建 HTTP 请求
|
|
|
+ req, err := http.NewRequest(method, url, bytes.NewBuffer(jsonBody))
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("创建请求失败: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 设置请求头
|
|
|
+ for key, value := range headers {
|
|
|
+ req.Header.Set(key, value)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 发送请求
|
|
|
+ resp, err := client.Do(req)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("请求发送失败: %v", err)
|
|
|
+ }
|
|
|
+ defer resp.Body.Close()
|
|
|
+
|
|
|
+ // 读取响应体
|
|
|
+ respBody, err := ioutil.ReadAll(resp.Body)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("读取响应体失败: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 解析响应体为 map
|
|
|
+ var result map[string]interface{}
|
|
|
+ err = json.Unmarshal(respBody, &result)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("解析响应体失败: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 返回响应的 map
|
|
|
+ return result, nil
|
|
|
+}
|
|
|
+
|
|
|
+// removeLastElement 去除最后一个元素
|
|
|
+func removeLastElement(s string) string {
|
|
|
+ parts := strings.Split(s, "-")
|
|
|
+ if len(parts) > 1 {
|
|
|
+ return strings.Join(parts[:len(parts)-1], "-")
|
|
|
+ }
|
|
|
+ return s
|
|
|
+}
|
|
|
+
|
|
|
+// intersectSlices 求字符串数组交集
|
|
|
+func intersectSlices(slice1, slice2 []string) []string {
|
|
|
+ m := make(map[string]bool)
|
|
|
+ result := []string{}
|
|
|
+
|
|
|
+ for _, item := range slice1 {
|
|
|
+ m[item] = true
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, item := range slice2 {
|
|
|
+ if m[item] {
|
|
|
+ result = append(result, item)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return result
|
|
|
+}
|
|
|
+
|
|
|
+// findIntersection 针对拓扑标签,名称和营业范围求交集
|
|
|
+func findIntersection(data map[string]interface{}) []string {
|
|
|
+ topLabelsName, ok1 := data["top_labels_name"].([]interface{})
|
|
|
+ topLabelsBusinessScope, ok2 := data["top_labels_businessScope"].([]interface{})
|
|
|
+ if !ok1 || !ok2 {
|
|
|
+ return []string{}
|
|
|
+ }
|
|
|
+
|
|
|
+ if len(topLabelsName) == 1 || len(topLabelsBusinessScope) == 1 {
|
|
|
+ return []string{}
|
|
|
+ }
|
|
|
+
|
|
|
+ var processedName, processedScope []string
|
|
|
+ for _, name := range topLabelsName {
|
|
|
+ if nameStr, ok := name.(string); ok {
|
|
|
+ processedName = append(processedName, removeLastElement(nameStr))
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, scope := range topLabelsBusinessScope {
|
|
|
+ if scopeStr, ok := scope.(string); ok {
|
|
|
+ processedScope = append(processedScope, scopeStr)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return intersectSlices(processedName, processedScope)
|
|
|
+}
|
|
|
+
|
|
|
+// removeDuplicates 去除重复字符串
|
|
|
+func removeDuplicates(arr []string) []string {
|
|
|
+ uniqueMap := make(map[string]bool)
|
|
|
+ var result []string
|
|
|
+ for _, str := range arr {
|
|
|
+ if !uniqueMap[str] {
|
|
|
+ uniqueMap[str] = true
|
|
|
+ result = append(result, str)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result
|
|
|
+}
|
|
|
+
|
|
|
+// IsInStringArray 判断数组中是否存在字符串
|
|
|
+func IsInStringArray(str string, arr []string) bool {
|
|
|
+ // 先对字符串数组进行排序
|
|
|
+ sort.Strings(arr)
|
|
|
+ // 使用二分查找算法查找字符串
|
|
|
+ pos := sort.SearchStrings(arr, str)
|
|
|
+ // 如果找到了则返回 true,否则返回 false
|
|
|
+ return pos < len(arr) && arr[pos] == str
|
|
|
+}
|
|
|
+
|
|
|
+// findSubClassifications 根据分类名称查找下级分类
|
|
|
+func findSubClassifications(classifications []IndustryClassification, classificationName string) ([]IndustryClassification, bool) {
|
|
|
+ for _, classification := range classifications {
|
|
|
+ if classification.Name == classificationName {
|
|
|
+ return classification.Children, true
|
|
|
+ }
|
|
|
+ // 递归查找子分类
|
|
|
+ children, found := findSubClassifications(classification.Children, classificationName)
|
|
|
+ if found {
|
|
|
+ return children, true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil, false
|
|
|
+}
|
|
|
+
|
|
|
+// 递归函数,将分类数据填充到 map 中,返回所有的name
|
|
|
+func populateClassificationMap(classifications []IndustryClassification, classificationMap map[string]bool) (names []string) {
|
|
|
+ for _, classification := range classifications {
|
|
|
+ classificationMap[classification.Name] = true
|
|
|
+ names = append(names, classification.Name)
|
|
|
+ if len(classification.Children) > 0 {
|
|
|
+ populateClassificationMap(classification.Children, classificationMap)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|