|
@@ -0,0 +1,354 @@
|
|
|
+package extract
|
|
|
+
|
|
|
+import (
|
|
|
+ "bytes"
|
|
|
+ "data_ai/ul"
|
|
|
+ "encoding/json"
|
|
|
+ "fmt"
|
|
|
+ log "github.com/donnie4w/go-logger/logger"
|
|
|
+ "io"
|
|
|
+ qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
|
+ "net/http"
|
|
|
+ "strings"
|
|
|
+ "sync"
|
|
|
+ "time"
|
|
|
+)
|
|
|
+
|
|
|
+func duibifenbao() {
|
|
|
+ dataArr, _ := ul.BidMgo.Find("zktest_sample_data_source_deepseek", map[string]interface{}{}, nil, map[string]interface{}{})
|
|
|
+ dict1, dict2, dict3 := map[string]interface{}{}, map[string]interface{}{}, map[string]interface{}{}
|
|
|
+ for _, v := range dataArr {
|
|
|
+ tmpid := ul.BsonTOStringId(v["_id"])
|
|
|
+ ai_zhipu := qu.ObjToMap(v["ai_zhipu"])
|
|
|
+ ai_deepseek := qu.ObjToMap(v["ai_deepseek"])
|
|
|
+ ext_package := qu.ObjToMap(v["package"])
|
|
|
+ data := map[string]interface{}{}
|
|
|
+ if ai_zhipu != nil {
|
|
|
+ s_pkg := *qu.ObjToMap((*ai_zhipu)["s_pkg"])
|
|
|
+ com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
|
|
|
+ if len(com_package) > 1 {
|
|
|
+ dict1[tmpid] = tmpid
|
|
|
+ data["zhipu"] = 1
|
|
|
+ } else {
|
|
|
+ data["zhipu"] = 0
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ai_deepseek != nil {
|
|
|
+ s_pkg := *qu.ObjToMap((*ai_deepseek)["s_pkg"])
|
|
|
+ com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
|
|
|
+ if len(com_package) > 1 {
|
|
|
+ dict2[tmpid] = tmpid
|
|
|
+ data["deepseek"] = 1
|
|
|
+ } else {
|
|
|
+ data["deepseek"] = 0
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ext_package != nil {
|
|
|
+ if len(*ext_package) > 1 {
|
|
|
+ dict3[tmpid] = tmpid
|
|
|
+ data["extract"] = 1
|
|
|
+ } else {
|
|
|
+ data["extract"] = 0
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(data) > 0 {
|
|
|
+ data["_id"] = v["_id"]
|
|
|
+ data["href"] = v["href"]
|
|
|
+ data["jyhref"] = fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
|
|
|
+ ul.BidMgo.Save("zktest_0225", data)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func test1() {
|
|
|
+ dict1 := map[string]interface{}{}
|
|
|
+ dict2 := map[string]interface{}{}
|
|
|
+ for k, _ := range dict2 {
|
|
|
+ if dict1[k] == nil {
|
|
|
+ log.Debug(k)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.Debug("is over ...")
|
|
|
+}
|
|
|
+
|
|
|
+// 合并
|
|
|
+func merge1(ai_coll string, ai_key string) {
|
|
|
+ dataArr, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
|
|
|
+ log.Debug("查询量···", len(dataArr))
|
|
|
+ pool_mgo := make(chan bool, 10)
|
|
|
+ wg_mgo := &sync.WaitGroup{}
|
|
|
+ for k, v := range dataArr {
|
|
|
+ if k%100 == 0 {
|
|
|
+ log.Debug("cur index ", k)
|
|
|
+ }
|
|
|
+ if v[ai_key] == nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ pool_mgo <- true
|
|
|
+ wg_mgo.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-pool_mgo
|
|
|
+ wg_mgo.Done()
|
|
|
+ }()
|
|
|
+ u_id := ul.BsonTOStringId(tmp["_id"])
|
|
|
+ if u_id != "664af2af66cf0db42a3d217e" {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ data := *qu.ObjToMap(tmp[ai_key])
|
|
|
+ tmp["ai_zhipu"] = data
|
|
|
+ update_info := make(map[string]interface{}, 0)
|
|
|
+ ul.ChooseCheckDataAI(tmp, &update_info)
|
|
|
+ if u_id != "" {
|
|
|
+ //ul.SourceMgo.UpdateById(ai_coll, u_id, map[string]interface{}{
|
|
|
+ // "$set": update_info,
|
|
|
+ //})
|
|
|
+ }
|
|
|
+ }(v)
|
|
|
+ }
|
|
|
+ wg_mgo.Wait()
|
|
|
+ log.Debug("is over ... ")
|
|
|
+}
|
|
|
+
|
|
|
+// 对比程序
|
|
|
+func compare1(ai_coll string) {
|
|
|
+ fields := map[string]string{
|
|
|
+ "toptype": "string",
|
|
|
+ "subtype": "string",
|
|
|
+ "area": "string",
|
|
|
+ "city": "string",
|
|
|
+ "projectname": "string",
|
|
|
+ "projectcode": "string",
|
|
|
+ "buyer": "string",
|
|
|
+ "s_winner": "string",
|
|
|
+ "budget": "float",
|
|
|
+ "bidamount": "float",
|
|
|
+ }
|
|
|
+
|
|
|
+ dataArr1, _ := ul.PyMgo.Find("standard_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
|
|
|
+ dataArr2, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
|
|
|
+ log.Debug("查询数量:", len(dataArr1), len(dataArr2))
|
|
|
+ biaozhu, check_exclude, exclude_all := creat(dataArr1, false) //标注数据···
|
|
|
+ deepseek, _, _ := creat(dataArr2, false)
|
|
|
+ dataArr1 = nil
|
|
|
+ dataArr2 = nil
|
|
|
+ //计数
|
|
|
+ tj_deepseek := duibi(fields, biaozhu, deepseek, check_exclude, exclude_all)
|
|
|
+ log.Debug("...................")
|
|
|
+ arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
|
|
|
+ for _, v := range arr {
|
|
|
+ t2, s2 := tj_deepseek[v]["total"], tj_deepseek[v]["same"]
|
|
|
+ f2 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
|
|
|
+ log.Debug(f2)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// 构建数据
|
|
|
+func creat(dataArr []map[string]interface{}, zhipu bool) (map[string]map[string]interface{}, map[string]map[string]interface{}, map[string]interface{}) {
|
|
|
+ dict := map[string]map[string]interface{}{}
|
|
|
+ check_exclude := map[string]map[string]interface{}{}
|
|
|
+ exclude_all := map[string]interface{}{}
|
|
|
+ for _, biaozhu := range dataArr {
|
|
|
+ tmpid := ul.BsonTOStringId(biaozhu["_id"])
|
|
|
+ if biaozhu["check_exclude"] != nil {
|
|
|
+ check_exclude[tmpid] = *qu.ObjToMap(biaozhu["check_exclude"])
|
|
|
+ }
|
|
|
+ if biaozhu["exclude_all"] != nil {
|
|
|
+ exclude_all[tmpid] = qu.IntAll(biaozhu["exclude_all"])
|
|
|
+ }
|
|
|
+ if zhipu {
|
|
|
+ if biaozhu["ai_deepseek"] != nil {
|
|
|
+ biaozhu = *qu.ObjToMap(biaozhu["ai_deepseek"])
|
|
|
+ }
|
|
|
+ toptype := qu.ObjToString(biaozhu["s_toptype"])
|
|
|
+ subtype := qu.ObjToString(biaozhu["s_subtype"])
|
|
|
+ area := qu.ObjToString(biaozhu["s_area"])
|
|
|
+ city := qu.ObjToString(biaozhu["s_city"])
|
|
|
+ projectname := qu.ObjToString(biaozhu["s_projectname"])
|
|
|
+ projectcode := qu.ObjToString(biaozhu["s_projectcode"])
|
|
|
+ budget := qu.Float64All(biaozhu["s_budget"])
|
|
|
+ bidamount := qu.Float64All(biaozhu["s_bidamount"])
|
|
|
+ buyer := qu.ObjToString(biaozhu["s_buyer"])
|
|
|
+ s_winner := qu.ObjToString(biaozhu["s_winner"])
|
|
|
+ info := map[string]interface{}{}
|
|
|
+ info["toptype"] = toptype
|
|
|
+ info["subtype"] = subtype
|
|
|
+ info["area"] = area
|
|
|
+ info["city"] = city
|
|
|
+ info["projectname"] = projectname
|
|
|
+ info["projectcode"] = projectcode
|
|
|
+ info["budget"] = budget
|
|
|
+ info["bidamount"] = bidamount
|
|
|
+ info["buyer"] = buyer
|
|
|
+ info["s_winner"] = s_winner
|
|
|
+ dict[tmpid] = info
|
|
|
+ } else {
|
|
|
+ toptype := qu.ObjToString(biaozhu["toptype"])
|
|
|
+ subtype := qu.ObjToString(biaozhu["subtype"])
|
|
|
+ area := qu.ObjToString(biaozhu["area"])
|
|
|
+ city := qu.ObjToString(biaozhu["city"])
|
|
|
+ projectname := qu.ObjToString(biaozhu["projectname"])
|
|
|
+ projectcode := qu.ObjToString(biaozhu["projectcode"])
|
|
|
+ budget := qu.Float64All(biaozhu["budget"])
|
|
|
+ bidamount := qu.Float64All(biaozhu["bidamount"])
|
|
|
+ buyer := qu.ObjToString(biaozhu["buyer"])
|
|
|
+ s_winner := qu.ObjToString(biaozhu["s_winner"])
|
|
|
+ info := map[string]interface{}{}
|
|
|
+ info["toptype"] = toptype
|
|
|
+ info["subtype"] = subtype
|
|
|
+ info["area"] = area
|
|
|
+ info["city"] = city
|
|
|
+ info["projectname"] = projectname
|
|
|
+ info["projectcode"] = projectcode
|
|
|
+ info["budget"] = budget
|
|
|
+ info["bidamount"] = bidamount
|
|
|
+ info["buyer"] = buyer
|
|
|
+ info["s_winner"] = s_winner
|
|
|
+ dict[tmpid] = info
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return dict, check_exclude, exclude_all
|
|
|
+}
|
|
|
+
|
|
|
+func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}, check_exclude map[string]map[string]interface{}, exclude_all map[string]interface{}) map[string]map[string]int {
|
|
|
+ //计数
|
|
|
+ tj := map[string]map[string]int{}
|
|
|
+ for tmpid, tmp := range source {
|
|
|
+ bz := biaozhu[tmpid]
|
|
|
+ exclude := check_exclude[tmpid]
|
|
|
+ if qu.IntAll(exclude_all[tmpid]) == 1 {
|
|
|
+ continue //整条过滤
|
|
|
+ }
|
|
|
+ for filed, typeof := range fields {
|
|
|
+ if exclude[filed] != nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ nums := tj[filed]
|
|
|
+ if nums == nil {
|
|
|
+ nums = map[string]int{}
|
|
|
+ }
|
|
|
+ if typeof == "string" {
|
|
|
+ b_value := qu.ObjToString(bz[filed])
|
|
|
+ s_value := qu.ObjToString(tmp[filed])
|
|
|
+ //字符串通用转换
|
|
|
+ b_value, s_value = c(b_value), c(s_value)
|
|
|
+
|
|
|
+ if b_value == "" && s_value == "" {
|
|
|
+
|
|
|
+ } else {
|
|
|
+ nums["total"] = qu.IntAll(nums["total"]) + 1
|
|
|
+ if b_value == s_value {
|
|
|
+ nums["same"] = qu.IntAll(nums["same"]) + 1
|
|
|
+ } else {
|
|
|
+ if filed == "buyer" {
|
|
|
+ //log.Debug("标注:", b_value, "~", "模板:", s_value)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if typeof == "float" {
|
|
|
+ b_value := qu.Float64All(bz[filed])
|
|
|
+ s_value := qu.Float64All(tmp[filed])
|
|
|
+ if b_value == 0.0 && s_value == 0.0 {
|
|
|
+
|
|
|
+ } else {
|
|
|
+ nums["total"] = qu.IntAll(nums["total"]) + 1
|
|
|
+ if b_value == s_value {
|
|
|
+ nums["same"] = qu.IntAll(nums["same"]) + 1
|
|
|
+ } else {
|
|
|
+ if filed == "budget" {
|
|
|
+ //log.Debug(fmt.Sprintf("%f", b_value), "~", fmt.Sprintf("%f", s_value), "~", tmpid)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+
|
|
|
+ }
|
|
|
+ tj[filed] = nums
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return tj
|
|
|
+}
|
|
|
+
|
|
|
+func update1() {
|
|
|
+ dataArr, _ := ul.BidMgo.Find("zktest_deepseek_0124", map[string]interface{}{}, nil, map[string]interface{}{})
|
|
|
+ for _, v := range dataArr {
|
|
|
+ //tmpid := ul.BsonTOStringId(v["_id"])
|
|
|
+ if v["ai_zhipu"] != nil {
|
|
|
+ ai_zhipu := *qu.ObjToMap(v["ai_zhipu"])
|
|
|
+ if ai_zhipu["s_pkg"] != nil {
|
|
|
+ s_pkg := *qu.ObjToMap(ai_zhipu["s_pkg"])
|
|
|
+ s_budget := qu.Float64All(s_pkg["s_budget"])
|
|
|
+ s_bidamount := qu.Float64All(s_pkg["s_bidamount"])
|
|
|
+ s_winner := qu.ObjToString(s_pkg["s_winner"])
|
|
|
+ if s_budget > 0.0 && s_budget > qu.Float64All(ai_zhipu["s_budget"]) {
|
|
|
+ ai_zhipu["s_budget"] = s_budget
|
|
|
+ }
|
|
|
+ if s_bidamount > 0.0 && s_bidamount > qu.Float64All(ai_zhipu["s_bidamount"]) {
|
|
|
+ ai_zhipu["s_bidamount"] = s_bidamount
|
|
|
+ }
|
|
|
+ if s_winner != "" {
|
|
|
+ ai_zhipu["s_winner"] = s_winner
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ul.BidMgo.Save("zktest_deepseek_0124_1", map[string]interface{}{
|
|
|
+ "ai_zhipu": ai_zhipu,
|
|
|
+ "_id": v["_id"],
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.Debug("is over ...")
|
|
|
+}
|
|
|
+
|
|
|
+func export1() {
|
|
|
+ dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
|
|
|
+ pool_mgo := make(chan bool, 1)
|
|
|
+ wg_mgo := &sync.WaitGroup{}
|
|
|
+ for _, v := range dataArr {
|
|
|
+ pool_mgo <- true
|
|
|
+ wg_mgo.Add(1)
|
|
|
+ go func(tmp map[string]interface{}) {
|
|
|
+ defer func() {
|
|
|
+ <-pool_mgo
|
|
|
+ wg_mgo.Done()
|
|
|
+ }()
|
|
|
+ tmpid := ul.BsonTOStringId(v["_id"])
|
|
|
+ data := ul.BidMgo.FindById("bidding", tmpid)
|
|
|
+ if len(data) == 0 || data == nil {
|
|
|
+ log.Debug("异常")
|
|
|
+ }
|
|
|
+ ul.BidMgo.Save("zktest_sample_data_source_4", data)
|
|
|
+ }(v)
|
|
|
+ }
|
|
|
+ wg_mgo.Wait()
|
|
|
+ log.Debug("is over ...")
|
|
|
+}
|
|
|
+
|
|
|
+// 替换字符串数据
|
|
|
+func c(s string) string {
|
|
|
+ s = strings.ReplaceAll(s, "(", "(")
|
|
|
+ s = strings.ReplaceAll(s, ")", ")")
|
|
|
+ s = strings.ReplaceAll(s, ",", ",")
|
|
|
+ s = strings.ReplaceAll(s, " ", "")
|
|
|
+ s = strings.ReplaceAll(s, "、", "")
|
|
|
+
|
|
|
+ return s
|
|
|
+}
|
|
|
+
|
|
|
+func post1(data map[string]interface{}) map[string]interface{} {
|
|
|
+ info := map[string]interface{}{}
|
|
|
+ client := &http.Client{Timeout: 2 * time.Second}
|
|
|
+ jsonStr, _ := json.Marshal(data)
|
|
|
+ resp, err := client.Post("http://127.0.0.1:12321/clean/deepseek", "application/json", bytes.NewBuffer(jsonStr))
|
|
|
+ if err != nil {
|
|
|
+ return info
|
|
|
+ }
|
|
|
+ res, err := io.ReadAll(resp.Body)
|
|
|
+ if err != nil {
|
|
|
+ return info
|
|
|
+ }
|
|
|
+ err = json.Unmarshal(res, &info)
|
|
|
+ if err != nil {
|
|
|
+ return info
|
|
|
+ }
|
|
|
+ return info
|
|
|
+}
|