123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273 |
- package main
- import (
- "data_ai/tool"
- "data_ai/udp"
- "data_ai/ul"
- "fmt"
- log "github.com/donnie4w/go-logger/logger"
- "github.com/gogf/gf/v2/util/gconv"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "strings"
- "sync"
- "unicode/utf8"
- )
- func init() {
- ul.IsLocal = false //是否本地
- ul.IsTool = false //是否工具
- ul.IsFull = false //是否全量
- if ul.IsTool {
- ul.InitToolVar()
- } else {
- ul.InitGlobalVar()
- if !ul.IsFull {
- udp.InitProcessVar()
- }
- }
- if !ul.IsTool {
- ul.InitOss(ul.IsLocal)
- }
- log.Debug("测试一下转换MD方法···")
- detail := `<table border=\"1\"><tbody><tr><th>采购人名称</th><td>云南中医药大学</td></tr><tr><th>中标(成交)供应商名称</th><td>云南智蓝云鸽信息科技有限公司</td></tr><tr><th>合同金额</th><td>224.99万元 人民币</td></tr><tr><th>合同期限</th><td>年</td></tr><tr><th>合同签署时间</th><td>2023-12-29 00:00:00</td></tr></tbody></table>`
- new_detail := ul.HttpConvertToMarkdown(detail)
- log.Debug(new_detail)
- }
- func main() {
- if ul.IsTool {
- tool.StartToolInfo()
- return
- }
- //extract.TestSingleFieldInfo("bidding", "677cf41c3309c0998bb6ddda")
- lock := make(chan bool)
- <-lock
- }
- // 对比程序
- func compare1() {
- fields := map[string]string{
- "toptype": "string",
- "subtype": "string",
- "area": "string",
- "city": "string",
- "projectname": "string",
- "projectcode": "string",
- "buyer": "string",
- "s_winner": "string",
- "budget": "float",
- "bidamount": "float",
- }
- dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
- dataArr1, _ := ul.BidMgo.Find("zktest_sample_data_source_1", map[string]interface{}{}, nil, map[string]interface{}{})
- dataArr2, _ := ul.BidMgo.Find("zktest_sample_data_source_2", map[string]interface{}{}, nil, map[string]interface{}{})
- dataArr3, _ := ul.BidMgo.Find("zktest_sample_data_source_3", map[string]interface{}{}, nil, map[string]interface{}{})
- biaozhu := creat(dataArr, false) //标注数据···
- source1 := creat(dataArr1, true)
- source2 := creat(dataArr2, true)
- source3 := creat(dataArr3, true)
- log.Debug("数据源:", len(biaozhu))
- log.Debug("对比源:", len(source1))
- log.Debug("对比源:", len(source2))
- log.Debug("对比源:", len(source3))
- dataArr = nil
- dataArr1 = nil
- dataArr2 = nil
- dataArr3 = nil
- //计数
- tj1 := duibi(fields, biaozhu, source1)
- tj2 := duibi(fields, biaozhu, source2)
- tj3 := duibi(fields, biaozhu, source3)
- log.Debug("...................")
- arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
- for _, v := range arr {
- t1, s1 := tj1[v]["total"], tj1[v]["same"]
- t2, s2 := tj2[v]["total"], tj2[v]["same"]
- t3, s3 := tj3[v]["total"], tj3[v]["same"]
- f1 := fmt.Sprintf("模型flash~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t1, s1, (float64(s1)/float64(t1))*100.0, "%")
- f2 := fmt.Sprintf("模型air~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t2, s2, (float64(s2)/float64(t2))*100.0, "%")
- f3 := fmt.Sprintf("模型deepseek~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t3, s3, (float64(s3)/float64(t3))*100.0, "%")
- log.Debug(f1)
- log.Debug(f2)
- log.Debug(f3)
- }
- }
- // 构建数据
- func creat(dataArr []map[string]interface{}, is_zhipu bool) map[string]map[string]interface{} {
- dict := map[string]map[string]interface{}{}
- for _, biaozhu := range dataArr {
- if is_zhipu {
- ai_zhipu := *qu.ObjToMap(biaozhu["ai_zhipu"])
- if len(ai_zhipu) > 0 {
- } else {
- continue
- }
- }
- tmpid := ul.BsonTOStringId(biaozhu["_id"])
- toptype := qu.ObjToString(biaozhu["toptype"])
- subtype := qu.ObjToString(biaozhu["subtype"])
- area := qu.ObjToString(biaozhu["area"])
- city := qu.ObjToString(biaozhu["city"])
- projectname := qu.ObjToString(biaozhu["projectname"])
- projectcode := qu.ObjToString(biaozhu["projectcode"])
- budget := qu.Float64All(biaozhu["budget"])
- bidamount := qu.Float64All(biaozhu["bidamount"])
- buyer := qu.ObjToString(biaozhu["buyer"])
- s_winner := qu.ObjToString(biaozhu["s_winner"])
- info := map[string]interface{}{}
- info["toptype"] = toptype
- info["subtype"] = subtype
- info["area"] = area
- info["city"] = city
- info["projectname"] = projectname
- info["projectcode"] = projectcode
- info["budget"] = budget
- info["bidamount"] = bidamount
- info["buyer"] = buyer
- info["s_winner"] = s_winner
- dict[tmpid] = info
- }
- return dict
- }
- func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}) map[string]map[string]int {
- //计数
- tj := map[string]map[string]int{}
- for tmpid, tmp := range source {
- bz := biaozhu[tmpid]
- for filed, typeof := range fields {
- nums := tj[filed]
- if nums == nil {
- nums = map[string]int{}
- }
- if typeof == "string" {
- b_value := qu.ObjToString(bz[filed])
- s_value := qu.ObjToString(tmp[filed])
- if b_value == "" && s_value == "" {
- } else {
- nums["total"] = qu.IntAll(nums["total"]) + 1
- if b_value == s_value {
- nums["same"] = qu.IntAll(nums["same"]) + 1
- }
- }
- } else if typeof == "float" {
- b_value := qu.Float64All(bz[filed])
- s_value := qu.Float64All(tmp[filed])
- if b_value == 0.0 && s_value == 0.0 {
- } else {
- nums["total"] = qu.IntAll(nums["total"]) + 1
- if b_value == s_value {
- nums["same"] = qu.IntAll(nums["same"]) + 1
- } else {
- //if filed == "budget" {
- // if b_value == 0.0 {
- // log.Debug(tmpid)
- // }
- //}
- }
- }
- } else {
- }
- tj[filed] = nums
- }
- }
- return tj
- }
- func export1() {
- dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- pool_mgo := make(chan bool, 1)
- wg_mgo := &sync.WaitGroup{}
- for _, v := range dataArr {
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(v["_id"])
- data := ul.BidMgo.FindById("bidding", tmpid)
- if len(data) == 0 || data == nil {
- log.Debug("异常")
- }
- ul.BidMgo.Save("zktest_sample_data_source_3", data)
- //ul.BidMgo.Save("zktest_sample_data_source_2", data)
- }(v)
- }
- wg_mgo.Wait()
- log.Debug("is over ...")
- }
- // 测试调试数据
- func test1() {
- q, total := map[string]interface{}{
- "_id": map[string]interface{}{
- "$gt": ul.StringTOBsonId("677638b13309c0998ba2488f"),
- "$lte": ul.StringTOBsonId("6777a3d23309c0998ba89797"),
- },
- }, 0
- count, _ := ul.BidMgo.Count("bidding", q)
- log.Debug("查询数量:", count)
- pool_mgo := make(chan bool, 10)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.BidMgo.GetMgoConn()
- defer ul.BidMgo.DestoryMongoConn(sess)
- it := sess.DB(ul.BidMgo.DbName).C(ul.Bid_Name).Find(&q).Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%1000 == 0 {
- log.Debug("cur ai index ", total)
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- //tmpid := ul.BsonTOStringId(tmp["_id"])
- s1, s2 := gconv.String(tmp["budget"]), gconv.String(tmp["bidamount"])
- r1, r2 := strings.Split(s1, "."), strings.Split(s2, ".")
- if len(r1) == 2 {
- l1 := utf8.RuneCountInString(r1[1])
- if l1 > 3 {
- //log.Debug("问题金额:", tmpid, "~", s1)
- ul.BidMgo.Save("zktest_err_0106", tmp)
- return
- }
- }
- if len(r2) == 2 {
- l2 := utf8.RuneCountInString(r2[1])
- if l2 > 3 {
- //log.Debug("问题金额:", tmpid, "~", s2)
- ul.BidMgo.Save("zktest_err_0106", tmp)
- return
- }
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("is over ...", total)
- }
- func c(s string) string {
- s = strings.ReplaceAll(s, "(", "(")
- s = strings.ReplaceAll(s, ")", ")")
- s = strings.ReplaceAll(s, ",", ",")
- return s
- }
|