|
@@ -6,7 +6,6 @@ import (
|
|
|
"data_ai/ul"
|
|
|
log "github.com/donnie4w/go-logger/logger"
|
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
- "strings"
|
|
|
"sync"
|
|
|
"unicode/utf8"
|
|
|
)
|
|
@@ -71,26 +70,31 @@ func ExtractFieldInfo(sid string, eid string) {
|
|
|
// 获取处理数据...
|
|
|
func ResolveInfo(v map[string]interface{}) map[string]interface{} {
|
|
|
tmpid := ul.BsonTOStringId(v["_id"])
|
|
|
- detail := getDetailText(v, tmpid) //获取正文文本
|
|
|
title := qu.ObjToString(v["title"])
|
|
|
- dl := utf8.RuneCountInString(detail) //文本长度
|
|
|
- //过滤数据···
|
|
|
- if strings.Contains(title, "开标记录") || v["jyfb_data"] != nil || dl < 20 {
|
|
|
+ detail := getDetailText(v, tmpid) //获取正文文本
|
|
|
+ if NotInProgressInfo(title, detail, v) { //过滤信息
|
|
|
return map[string]interface{}{}
|
|
|
}
|
|
|
//识别结构,短文本结构
|
|
|
f_data, shorText := map[string]interface{}{}, false
|
|
|
- if dl < 100 {
|
|
|
+ if utf8.RuneCountInString(detail) < 100 {
|
|
|
shorText = true
|
|
|
}
|
|
|
//文本格式转换
|
|
|
detail = ul.HttpConvertToMarkdown(detail)
|
|
|
- //获取外围字段数据
|
|
|
- f_info_1 := prompt.AcquireExtractFieldInfoFirst(detail, shorText)
|
|
|
- f_info_2 := prompt.AcquireExtractFieldInfoFirst(detail, shorText)
|
|
|
- f_info := MergeInfo([]map[string]interface{}{f_info_1, f_info_2})
|
|
|
+ //短文本判断是否有效性
|
|
|
+ if shorText {
|
|
|
+ if info := prompt.AcquireJudgeShortInfo(detail); info["结果"] != "是" {
|
|
|
+ return map[string]interface{}{}
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //获取外围字段数据-拆分合并字段
|
|
|
+ f_info_1 := prompt.AcquireExtractFieldInfoFirst(detail)
|
|
|
+ f_info_2 := prompt.AcquireExtractFieldInfoSecond(detail)
|
|
|
+ f_info_3 := prompt.AcquireExtractFieldInfoThird(detail)
|
|
|
+ f_info := MergeInfo([]map[string]interface{}{f_info_1, f_info_2, f_info_3})
|
|
|
|
|
|
- //非短文本
|
|
|
+ //非短文本以下识别
|
|
|
if !shorText {
|
|
|
//获取分包信息
|
|
|
if pkg := prompt.AcquireNewMultiplePackageInfo(detail); len(pkg) > 0 {
|
|
@@ -100,111 +104,25 @@ func ResolveInfo(v map[string]interface{}) map[string]interface{} {
|
|
|
s_toptype, s_subtype := prompt.AcquireClassInfo(detail, title, qu.ObjToString(v["toptype"]))
|
|
|
f_info["s_toptype"] = s_toptype
|
|
|
f_info["s_subtype"] = s_subtype
|
|
|
- }
|
|
|
|
|
|
- //调用标的物识别
|
|
|
- if p_list := getPurList(v, detail, f_info); len(p_list) > 0 {
|
|
|
- f_info["purchasinglist"] = p_list
|
|
|
+ //调用标的物识别
|
|
|
+ if p_list := getPurList(v, detail, f_info); len(p_list) > 0 {
|
|
|
+ f_info["purchasinglist"] = p_list
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
//字段清洗
|
|
|
fns := getpnsinfo(v) //获取附件名字
|
|
|
f_data = clean.CleanFieldInfo(f_info, fns)
|
|
|
|
|
|
- //对于某些字段进行二级校验
|
|
|
- if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
|
|
|
- if zp_buyer := prompt.AcquireBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
|
|
|
- if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
|
|
|
- f_data["s_buyer"] = ns_buyer
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return f_data
|
|
|
-}
|
|
|
-
|
|
|
-func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
|
|
|
- dict := map[string]interface{}{}
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
- total := 0
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Select(map[string]interface{}{"_id": 1, "ai_zhipu": 1}).Iter()
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
- if total%1000 == 0 {
|
|
|
- log.Debug("cur index ", total)
|
|
|
- }
|
|
|
- if tmp["ai_zhipu"] == nil { //已经识别的数据-不再识别
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
- dict[tmpid] = tmpid
|
|
|
- }
|
|
|
- tmp = make(map[string]interface{})
|
|
|
- }
|
|
|
- return dict
|
|
|
-}
|
|
|
-
|
|
|
-// 获取附件名字信息
|
|
|
-func getpnsinfo(tmp map[string]interface{}) []string {
|
|
|
- arr := []string{}
|
|
|
- if projectinfo := qu.ObjToMap(tmp["projectinfo"]); projectinfo != nil {
|
|
|
- if attachments := qu.ObjToMap((*projectinfo)["attachments"]); attachments != nil {
|
|
|
- for _, v := range *attachments {
|
|
|
- if info := qu.ObjToMap(v); info != nil {
|
|
|
- if filename := qu.ObjToString((*info)["filename"]); filename != "" {
|
|
|
- arr = append(arr, filename)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return arr
|
|
|
-}
|
|
|
-
|
|
|
-func getDetailText(v map[string]interface{}, tmpid string) string {
|
|
|
- detail := qu.ObjToString(v["detail"])
|
|
|
- if ul.IsTool {
|
|
|
- detail = qu.ObjToString(v["details"])
|
|
|
- filetext := qu.ObjToString(v["filetext"])
|
|
|
- if utf8.RuneCountInString(detail) < 100 && filetext != "" {
|
|
|
- detail = filetext
|
|
|
- }
|
|
|
- } else {
|
|
|
- //if bs := ul.OssGetObject(tmpid); bs != "" {
|
|
|
- // detail = bs
|
|
|
- //}
|
|
|
- }
|
|
|
- return detail
|
|
|
-}
|
|
|
+ //采购单位二级校验
|
|
|
+ CheckOutBuyerInfo(f_data)
|
|
|
|
|
|
-// 获取标的物-过滤产权-拟建
|
|
|
-func getPurList(v map[string]interface{}, detail string, f_info map[string]interface{}) []map[string]interface{} {
|
|
|
- if qu.ObjToString(v["toptype"]) == "拟建" || qu.ObjToString(v["toptype"]) == "产权" {
|
|
|
- return []map[string]interface{}{}
|
|
|
- }
|
|
|
- p_data := map[string]interface{}{}
|
|
|
- p_data["detail"] = qu.ObjToString(v["title"]) + "\n" + detail
|
|
|
- p_data["site"] = v["site"]
|
|
|
- p_data["attach_text"] = v["attach_text"]
|
|
|
- p_data["toptype"] = v["toptype"]
|
|
|
- if f_info["s_toptype"] != nil {
|
|
|
- p_data["toptype"] = f_info["s_toptype"]
|
|
|
- }
|
|
|
- if p_info := ul.PostPurchasingList(p_data); len(p_info) > 0 {
|
|
|
- if qu.IntAll(p_info["status"]) == 200 {
|
|
|
- p_list := ul.IsMarkInterfaceMap(p_info["purchasinglist"])
|
|
|
- return p_list
|
|
|
- }
|
|
|
- }
|
|
|
- return []map[string]interface{}{}
|
|
|
-}
|
|
|
+ //强制逻辑判断-
|
|
|
+ ForcedLogicDecideInfo(f_data)
|
|
|
|
|
|
-// 合并字段
|
|
|
-func MergeInfo(infos []map[string]interface{}) map[string]interface{} {
|
|
|
- info := map[string]interface{}{}
|
|
|
- for _, v := range infos {
|
|
|
- for k1, v1 := range v {
|
|
|
- info[k1] = v1
|
|
|
- }
|
|
|
- }
|
|
|
- return info
|
|
|
+ //返回数据
|
|
|
+ return f_data
|
|
|
}
|
|
|
|
|
|
// 暂时不启用...无限重试
|