|
@@ -6,7 +6,6 @@ import (
|
|
"data_ai/ul"
|
|
"data_ai/ul"
|
|
log "github.com/donnie4w/go-logger/logger"
|
|
log "github.com/donnie4w/go-logger/logger"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
- "strings"
|
|
|
|
"sync"
|
|
"sync"
|
|
"unicode/utf8"
|
|
"unicode/utf8"
|
|
)
|
|
)
|
|
@@ -71,19 +70,14 @@ func ExtractFieldInfo(sid string, eid string) {
|
|
// 获取处理数据...
|
|
// 获取处理数据...
|
|
func ResolveInfo(v map[string]interface{}) map[string]interface{} {
|
|
func ResolveInfo(v map[string]interface{}) map[string]interface{} {
|
|
tmpid := ul.BsonTOStringId(v["_id"])
|
|
tmpid := ul.BsonTOStringId(v["_id"])
|
|
- detail := getDetailText(v, tmpid) //获取正文文本
|
|
|
|
title := qu.ObjToString(v["title"])
|
|
title := qu.ObjToString(v["title"])
|
|
- dl := utf8.RuneCountInString(detail) //文本长度
|
|
|
|
- //过滤数据···
|
|
|
|
- if strings.Contains(title, "开标记录") || v["jyfb_data"] != nil {
|
|
|
|
- return map[string]interface{}{}
|
|
|
|
- }
|
|
|
|
- if dl < 20 {
|
|
|
|
|
|
+ detail := getDetailText(v, tmpid) //获取正文文本
|
|
|
|
+ if NotInProgressInfo(title, detail, v) { //过滤信息
|
|
return map[string]interface{}{}
|
|
return map[string]interface{}{}
|
|
}
|
|
}
|
|
//识别结构,短文本结构
|
|
//识别结构,短文本结构
|
|
f_data, shorText := map[string]interface{}{}, false
|
|
f_data, shorText := map[string]interface{}{}, false
|
|
- if dl < 100 {
|
|
|
|
|
|
+ if utf8.RuneCountInString(detail) < 100 {
|
|
shorText = true
|
|
shorText = true
|
|
}
|
|
}
|
|
//文本格式转换
|
|
//文本格式转换
|
|
@@ -124,131 +118,9 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
|
|
|
|
|
|
//强制逻辑判断-
|
|
//强制逻辑判断-
|
|
ForcedLogicDecideInfo(f_data)
|
|
ForcedLogicDecideInfo(f_data)
|
|
- return f_data
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 确认抽取范围
|
|
|
|
-func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
|
|
|
|
- dict := map[string]interface{}{}
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- total := 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Select(map[string]interface{}{"_id": 1, "ai_zhipu": 1}).Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%1000 == 0 {
|
|
|
|
- log.Debug("cur index ", total)
|
|
|
|
- }
|
|
|
|
- if tmp["ai_zhipu"] == nil { //已经识别的数据-不再识别
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- dict[tmpid] = tmpid
|
|
|
|
- }
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
- }
|
|
|
|
- return dict
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 获取附件名字信息
|
|
|
|
-func getpnsinfo(tmp map[string]interface{}) []string {
|
|
|
|
- arr := []string{}
|
|
|
|
- if projectinfo := qu.ObjToMap(tmp["projectinfo"]); projectinfo != nil {
|
|
|
|
- if attachments := qu.ObjToMap((*projectinfo)["attachments"]); attachments != nil {
|
|
|
|
- for _, v := range *attachments {
|
|
|
|
- if info := qu.ObjToMap(v); info != nil {
|
|
|
|
- if filename := qu.ObjToString((*info)["filename"]); filename != "" {
|
|
|
|
- arr = append(arr, filename)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return arr
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 获取正文数据
|
|
|
|
-func getDetailText(v map[string]interface{}, tmpid string) string {
|
|
|
|
- detail := qu.ObjToString(v["detail"])
|
|
|
|
- if ul.IsTool {
|
|
|
|
- detail = qu.ObjToString(v["details"])
|
|
|
|
- filetext := qu.ObjToString(v["filetext"])
|
|
|
|
- if utf8.RuneCountInString(detail) < 100 && filetext != "" {
|
|
|
|
- detail = filetext
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- //if bs := ul.OssGetObject(tmpid); bs != "" {
|
|
|
|
- // detail = bs
|
|
|
|
- //}
|
|
|
|
- }
|
|
|
|
- return detail
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 获取标的物-过滤产权-拟建
|
|
|
|
-func getPurList(v map[string]interface{}, detail string, f_info map[string]interface{}) []map[string]interface{} {
|
|
|
|
- if qu.ObjToString(v["toptype"]) == "拟建" || qu.ObjToString(v["toptype"]) == "产权" {
|
|
|
|
- return []map[string]interface{}{}
|
|
|
|
- }
|
|
|
|
- p_data := map[string]interface{}{}
|
|
|
|
- p_data["detail"] = qu.ObjToString(v["title"]) + "\n" + detail
|
|
|
|
- p_data["site"] = v["site"]
|
|
|
|
- p_data["attach_text"] = v["attach_text"]
|
|
|
|
- p_data["toptype"] = v["toptype"]
|
|
|
|
- if f_info["s_toptype"] != nil {
|
|
|
|
- p_data["toptype"] = f_info["s_toptype"]
|
|
|
|
- }
|
|
|
|
- if p_info := ul.PostPurchasingList(p_data); len(p_info) > 0 {
|
|
|
|
- if qu.IntAll(p_info["status"]) == 200 {
|
|
|
|
- p_list := ul.IsMarkInterfaceMap(p_info["purchasinglist"])
|
|
|
|
- return p_list
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return []map[string]interface{}{}
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 二次校验采购单位
|
|
|
|
-func CheckOutBuyerInfo(f_data map[string]interface{}) {
|
|
|
|
- if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
|
|
|
|
- if zp_buyer := prompt.AcquireBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
|
|
|
|
- if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
|
|
|
|
- f_data["s_buyer"] = ns_buyer
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 合并字段
|
|
|
|
-func MergeInfo(infos []map[string]interface{}) map[string]interface{} {
|
|
|
|
- info := map[string]interface{}{}
|
|
|
|
- for _, v := range infos {
|
|
|
|
- for k1, v1 := range v {
|
|
|
|
- info[k1] = v1
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return info
|
|
|
|
-}
|
|
|
|
|
|
|
|
-// 强制逻辑判断数据
|
|
|
|
-func ForcedLogicDecideInfo(f_data map[string]interface{}) {
|
|
|
|
- //原则大模型
|
|
|
|
- //多单位不能一致
|
|
|
|
- s_buyer := qu.ObjToString(f_data["s_buyer"])
|
|
|
|
- s_winner := qu.ObjToString(f_data["s_winner"])
|
|
|
|
- if s_buyer == s_winner && s_buyer != "" {
|
|
|
|
- /*
|
|
|
|
- 1、若单位名称-不含公司保留采购单位
|
|
|
|
- 2、若单位名称-含公司保留中标单位
|
|
|
|
- */
|
|
|
|
- if strings.Contains(s_buyer, "公司") {
|
|
|
|
- f_data["s_buyer"] = ""
|
|
|
|
- } else {
|
|
|
|
- f_data["s_winner"] = ""
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- //代理机构
|
|
|
|
- if s_agency := qu.ObjToString(f_data["s_agency"]); s_agency != "" {
|
|
|
|
- if s_agency == s_buyer || s_agency == s_winner {
|
|
|
|
- f_data["s_agency"] = ""
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ //返回数据
|
|
|
|
+ return f_data
|
|
}
|
|
}
|
|
|
|
|
|
// 暂时不启用...无限重试
|
|
// 暂时不启用...无限重试
|