123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354 |
- package extract
- import (
- "bytes"
- "data_ai/ul"
- "encoding/json"
- "fmt"
- log "github.com/donnie4w/go-logger/logger"
- "io"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "net/http"
- "strings"
- "sync"
- "time"
- )
- func duibifenbao() {
- dataArr, _ := ul.BidMgo.Find("zktest_sample_data_source_deepseek", map[string]interface{}{}, nil, map[string]interface{}{})
- dict1, dict2, dict3 := map[string]interface{}{}, map[string]interface{}{}, map[string]interface{}{}
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- ai_zhipu := qu.ObjToMap(v["ai_zhipu"])
- ai_deepseek := qu.ObjToMap(v["ai_deepseek"])
- ext_package := qu.ObjToMap(v["package"])
- data := map[string]interface{}{}
- if ai_zhipu != nil {
- s_pkg := *qu.ObjToMap((*ai_zhipu)["s_pkg"])
- com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
- if len(com_package) > 1 {
- dict1[tmpid] = tmpid
- data["zhipu"] = 1
- } else {
- data["zhipu"] = 0
- }
- }
- if ai_deepseek != nil {
- s_pkg := *qu.ObjToMap((*ai_deepseek)["s_pkg"])
- com_package := ul.IsMarkInterfaceMap(s_pkg["com_package"])
- if len(com_package) > 1 {
- dict2[tmpid] = tmpid
- data["deepseek"] = 1
- } else {
- data["deepseek"] = 0
- }
- }
- if ext_package != nil {
- if len(*ext_package) > 1 {
- dict3[tmpid] = tmpid
- data["extract"] = 1
- } else {
- data["extract"] = 0
- }
- }
- if len(data) > 0 {
- data["_id"] = v["_id"]
- data["href"] = v["href"]
- data["jyhref"] = fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- ul.BidMgo.Save("zktest_0225", data)
- }
- }
- }
- func test1() {
- dict1 := map[string]interface{}{}
- dict2 := map[string]interface{}{}
- for k, _ := range dict2 {
- if dict1[k] == nil {
- log.Debug(k)
- }
- }
- log.Debug("is over ...")
- }
- // 合并
- func merge1(ai_coll string, ai_key string) {
- dataArr, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
- log.Debug("查询量···", len(dataArr))
- pool_mgo := make(chan bool, 10)
- wg_mgo := &sync.WaitGroup{}
- for k, v := range dataArr {
- if k%100 == 0 {
- log.Debug("cur index ", k)
- }
- if v[ai_key] == nil {
- continue
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- u_id := ul.BsonTOStringId(tmp["_id"])
- if u_id != "664af2af66cf0db42a3d217e" {
- return
- }
- data := *qu.ObjToMap(tmp[ai_key])
- tmp["ai_zhipu"] = data
- update_info := make(map[string]interface{}, 0)
- ul.ChooseCheckDataAI(tmp, &update_info)
- if u_id != "" {
- //ul.SourceMgo.UpdateById(ai_coll, u_id, map[string]interface{}{
- // "$set": update_info,
- //})
- }
- }(v)
- }
- wg_mgo.Wait()
- log.Debug("is over ... ")
- }
- // 对比程序
- func compare1(ai_coll string) {
- fields := map[string]string{
- "toptype": "string",
- "subtype": "string",
- "area": "string",
- "city": "string",
- "projectname": "string",
- "projectcode": "string",
- "buyer": "string",
- "s_winner": "string",
- "budget": "float",
- "bidamount": "float",
- }
- dataArr1, _ := ul.PyMgo.Find("standard_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
- dataArr2, _ := ul.SourceMgo.Find(ai_coll, map[string]interface{}{}, nil, map[string]interface{}{})
- log.Debug("查询数量:", len(dataArr1), len(dataArr2))
- biaozhu, check_exclude, exclude_all := creat(dataArr1, false) //标注数据···
- deepseek, _, _ := creat(dataArr2, false)
- dataArr1 = nil
- dataArr2 = nil
- //计数
- tj_deepseek := duibi(fields, biaozhu, deepseek, check_exclude, exclude_all)
- log.Debug("...................")
- arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
- for _, v := range arr {
- t2, s2 := tj_deepseek[v]["total"], tj_deepseek[v]["same"]
- f2 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
- log.Debug(f2)
- }
- }
- // 构建数据
- func creat(dataArr []map[string]interface{}, zhipu bool) (map[string]map[string]interface{}, map[string]map[string]interface{}, map[string]interface{}) {
- dict := map[string]map[string]interface{}{}
- check_exclude := map[string]map[string]interface{}{}
- exclude_all := map[string]interface{}{}
- for _, biaozhu := range dataArr {
- tmpid := ul.BsonTOStringId(biaozhu["_id"])
- if biaozhu["check_exclude"] != nil {
- check_exclude[tmpid] = *qu.ObjToMap(biaozhu["check_exclude"])
- }
- if biaozhu["exclude_all"] != nil {
- exclude_all[tmpid] = qu.IntAll(biaozhu["exclude_all"])
- }
- if zhipu {
- if biaozhu["ai_deepseek"] != nil {
- biaozhu = *qu.ObjToMap(biaozhu["ai_deepseek"])
- }
- toptype := qu.ObjToString(biaozhu["s_toptype"])
- subtype := qu.ObjToString(biaozhu["s_subtype"])
- area := qu.ObjToString(biaozhu["s_area"])
- city := qu.ObjToString(biaozhu["s_city"])
- projectname := qu.ObjToString(biaozhu["s_projectname"])
- projectcode := qu.ObjToString(biaozhu["s_projectcode"])
- budget := qu.Float64All(biaozhu["s_budget"])
- bidamount := qu.Float64All(biaozhu["s_bidamount"])
- buyer := qu.ObjToString(biaozhu["s_buyer"])
- s_winner := qu.ObjToString(biaozhu["s_winner"])
- info := map[string]interface{}{}
- info["toptype"] = toptype
- info["subtype"] = subtype
- info["area"] = area
- info["city"] = city
- info["projectname"] = projectname
- info["projectcode"] = projectcode
- info["budget"] = budget
- info["bidamount"] = bidamount
- info["buyer"] = buyer
- info["s_winner"] = s_winner
- dict[tmpid] = info
- } else {
- toptype := qu.ObjToString(biaozhu["toptype"])
- subtype := qu.ObjToString(biaozhu["subtype"])
- area := qu.ObjToString(biaozhu["area"])
- city := qu.ObjToString(biaozhu["city"])
- projectname := qu.ObjToString(biaozhu["projectname"])
- projectcode := qu.ObjToString(biaozhu["projectcode"])
- budget := qu.Float64All(biaozhu["budget"])
- bidamount := qu.Float64All(biaozhu["bidamount"])
- buyer := qu.ObjToString(biaozhu["buyer"])
- s_winner := qu.ObjToString(biaozhu["s_winner"])
- info := map[string]interface{}{}
- info["toptype"] = toptype
- info["subtype"] = subtype
- info["area"] = area
- info["city"] = city
- info["projectname"] = projectname
- info["projectcode"] = projectcode
- info["budget"] = budget
- info["bidamount"] = bidamount
- info["buyer"] = buyer
- info["s_winner"] = s_winner
- dict[tmpid] = info
- }
- }
- return dict, check_exclude, exclude_all
- }
- func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}, check_exclude map[string]map[string]interface{}, exclude_all map[string]interface{}) map[string]map[string]int {
- //计数
- tj := map[string]map[string]int{}
- for tmpid, tmp := range source {
- bz := biaozhu[tmpid]
- exclude := check_exclude[tmpid]
- if qu.IntAll(exclude_all[tmpid]) == 1 {
- continue //整条过滤
- }
- for filed, typeof := range fields {
- if exclude[filed] != nil {
- continue
- }
- nums := tj[filed]
- if nums == nil {
- nums = map[string]int{}
- }
- if typeof == "string" {
- b_value := qu.ObjToString(bz[filed])
- s_value := qu.ObjToString(tmp[filed])
- //字符串通用转换
- b_value, s_value = c(b_value), c(s_value)
- if b_value == "" && s_value == "" {
- } else {
- nums["total"] = qu.IntAll(nums["total"]) + 1
- if b_value == s_value {
- nums["same"] = qu.IntAll(nums["same"]) + 1
- } else {
- if filed == "buyer" {
- //log.Debug("标注:", b_value, "~", "模板:", s_value)
- }
- }
- }
- } else if typeof == "float" {
- b_value := qu.Float64All(bz[filed])
- s_value := qu.Float64All(tmp[filed])
- if b_value == 0.0 && s_value == 0.0 {
- } else {
- nums["total"] = qu.IntAll(nums["total"]) + 1
- if b_value == s_value {
- nums["same"] = qu.IntAll(nums["same"]) + 1
- } else {
- if filed == "budget" {
- //log.Debug(fmt.Sprintf("%f", b_value), "~", fmt.Sprintf("%f", s_value), "~", tmpid)
- }
- }
- }
- } else {
- }
- tj[filed] = nums
- }
- }
- return tj
- }
- func update1() {
- dataArr, _ := ul.BidMgo.Find("zktest_deepseek_0124", map[string]interface{}{}, nil, map[string]interface{}{})
- for _, v := range dataArr {
- //tmpid := ul.BsonTOStringId(v["_id"])
- if v["ai_zhipu"] != nil {
- ai_zhipu := *qu.ObjToMap(v["ai_zhipu"])
- if ai_zhipu["s_pkg"] != nil {
- s_pkg := *qu.ObjToMap(ai_zhipu["s_pkg"])
- s_budget := qu.Float64All(s_pkg["s_budget"])
- s_bidamount := qu.Float64All(s_pkg["s_bidamount"])
- s_winner := qu.ObjToString(s_pkg["s_winner"])
- if s_budget > 0.0 && s_budget > qu.Float64All(ai_zhipu["s_budget"]) {
- ai_zhipu["s_budget"] = s_budget
- }
- if s_bidamount > 0.0 && s_bidamount > qu.Float64All(ai_zhipu["s_bidamount"]) {
- ai_zhipu["s_bidamount"] = s_bidamount
- }
- if s_winner != "" {
- ai_zhipu["s_winner"] = s_winner
- }
- }
- ul.BidMgo.Save("zktest_deepseek_0124_1", map[string]interface{}{
- "ai_zhipu": ai_zhipu,
- "_id": v["_id"],
- })
- }
- }
- log.Debug("is over ...")
- }
- func export1() {
- dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- pool_mgo := make(chan bool, 1)
- wg_mgo := &sync.WaitGroup{}
- for _, v := range dataArr {
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(v["_id"])
- data := ul.BidMgo.FindById("bidding", tmpid)
- if len(data) == 0 || data == nil {
- log.Debug("异常")
- }
- ul.BidMgo.Save("zktest_sample_data_source_4", data)
- }(v)
- }
- wg_mgo.Wait()
- log.Debug("is over ...")
- }
- // 替换字符串数据
- func c(s string) string {
- s = strings.ReplaceAll(s, "(", "(")
- s = strings.ReplaceAll(s, ")", ")")
- s = strings.ReplaceAll(s, ",", ",")
- s = strings.ReplaceAll(s, " ", "")
- s = strings.ReplaceAll(s, "、", "")
- return s
- }
- func post1(data map[string]interface{}) map[string]interface{} {
- info := map[string]interface{}{}
- client := &http.Client{Timeout: 2 * time.Second}
- jsonStr, _ := json.Marshal(data)
- resp, err := client.Post("http://127.0.0.1:12321/clean/deepseek", "application/json", bytes.NewBuffer(jsonStr))
- if err != nil {
- return info
- }
- res, err := io.ReadAll(resp.Body)
- if err != nil {
- return info
- }
- err = json.Unmarshal(res, &info)
- if err != nil {
- return info
- }
- return info
- }
|