123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- package main
- import (
- "data_ai/extract"
- "data_ai/tool"
- "data_ai/udp"
- "data_ai/ul"
- "fmt"
- log "github.com/donnie4w/go-logger/logger"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "strings"
- "sync"
- )
- func init() {
- ul.IsLocal = true //是否本地
- ul.IsTool = false //是否工具
- ul.IsFull = false //是否全量
- if ul.IsTool {
- ul.InitToolVar()
- } else {
- ul.InitGlobalVar()
- if !ul.IsFull {
- if ul.ModelType == "deepseek" {
- go extract.RunDeepSeek()
- } else {
- udp.InitProcessVar()
- }
- }
- }
- if !ul.IsTool {
- ul.InitOss(ul.IsLocal)
- }
- log.Debug("测试一下转换MD方法···")
- detail := `<table border=\"1\"><tbody><tr><th>采购人名称</th><td>云南中医药大学</td></tr><tr><th>中标(成交)供应商名称</th><td>云南智蓝云鸽信息科技有限公司</td></tr><tr><th>合同金额</th><td>224.99万元 人民币</td></tr><tr><th>合同期限</th><td>年</td></tr><tr><th>合同签署时间</th><td>2023-12-29 00:00:00</td></tr></tbody></table>`
- new_detail := ul.HttpConvertToMarkdown(detail)
- log.Debug(new_detail)
- }
- func main() {
- if ul.IsTool {
- tool.StartToolInfo()
- return
- }
- update1()
- lock := make(chan bool)
- <-lock
- }
- func update1() {
- dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- data := ul.BidMgo.FindById("zktest_sample_data_source_0", tmpid)
- title := qu.ObjToString(data["title"])
- detail := qu.ObjToString(data["detail"])
- ul.BidMgo.UpdateById("zktest_sample_data", tmpid, map[string]interface{}{
- "$set": map[string]interface{}{
- "detail": title + "\n" + detail,
- },
- })
- }
- log.Debug("is over ...")
- }
- func export1() {
- dataArr, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- pool_mgo := make(chan bool, 1)
- wg_mgo := &sync.WaitGroup{}
- for _, v := range dataArr {
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(v["_id"])
- data := ul.BidMgo.FindById("bidding", tmpid)
- if len(data) == 0 || data == nil {
- log.Debug("异常")
- }
- ul.BidMgo.Save("zktest_sample_data_source_4", data)
- }(v)
- }
- wg_mgo.Wait()
- log.Debug("is over ...")
- }
- // 对比程序
- func compare1() {
- fields := map[string]string{
- "toptype": "string",
- "subtype": "string",
- "area": "string",
- "city": "string",
- "projectname": "string",
- "projectcode": "string",
- "buyer": "string",
- "s_winner": "string",
- "budget": "float",
- "bidamount": "float",
- }
- dataArr1, _ := ul.BidMgo.Find("zktest_sample_data", map[string]interface{}{}, nil, map[string]interface{}{})
- dataArr2, _ := ul.BidMgo.Find("zktest_deepseek_0122", map[string]interface{}{}, nil, map[string]interface{}{})
- biaozhu := creat(dataArr1, false) //标注数据···
- deepseek := creat(dataArr2, true)
- dataArr1 = nil
- dataArr2 = nil
- //计数
- tj := duibi(fields, biaozhu, deepseek)
- log.Debug("...................")
- arr := []string{"toptype", "subtype", "area", "city", "projectname", "projectcode", "buyer", "budget", "s_winner", "bidamount"}
- for _, v := range arr {
- t1, s1 := tj[v]["total"], tj[v]["same"]
- f1 := fmt.Sprintf("模型flash~字段:%s 总计:%d 一致:%d 一致率:%.2f%s", v, t1, s1, (float64(s1)/float64(t1))*100.0, "%")
- log.Debug(f1)
- }
- }
- // 构建数据
- func creat(dataArr []map[string]interface{}, deepseek bool) map[string]map[string]interface{} {
- dict := map[string]map[string]interface{}{}
- for _, biaozhu := range dataArr {
- if deepseek {
- tmpid := ul.BsonTOStringId(biaozhu["_id"])
- toptype := qu.ObjToString(biaozhu["s_toptype"])
- subtype := qu.ObjToString(biaozhu["s_subtype"])
- area := qu.ObjToString(biaozhu["s_area"])
- city := qu.ObjToString(biaozhu["s_city"])
- projectname := qu.ObjToString(biaozhu["s_projectname"])
- projectcode := qu.ObjToString(biaozhu["s_projectcode"])
- budget := qu.Float64All(biaozhu["s_budget"])
- bidamount := qu.Float64All(biaozhu["s_bidamount"])
- buyer := qu.ObjToString(biaozhu["s_buyer"])
- s_winner := qu.ObjToString(biaozhu["s_winner"])
- info := map[string]interface{}{}
- info["toptype"] = toptype
- info["subtype"] = subtype
- info["area"] = area
- info["city"] = city
- info["projectname"] = projectname
- info["projectcode"] = projectcode
- info["budget"] = budget
- info["bidamount"] = bidamount
- info["buyer"] = buyer
- info["s_winner"] = s_winner
- dict[tmpid] = info
- } else {
- tmpid := ul.BsonTOStringId(biaozhu["_id"])
- toptype := qu.ObjToString(biaozhu["toptype"])
- subtype := qu.ObjToString(biaozhu["subtype"])
- area := qu.ObjToString(biaozhu["area"])
- city := qu.ObjToString(biaozhu["city"])
- projectname := qu.ObjToString(biaozhu["projectname"])
- projectcode := qu.ObjToString(biaozhu["projectcode"])
- budget := qu.Float64All(biaozhu["budget"])
- bidamount := qu.Float64All(biaozhu["bidamount"])
- buyer := qu.ObjToString(biaozhu["buyer"])
- s_winner := qu.ObjToString(biaozhu["s_winner"])
- info := map[string]interface{}{}
- info["toptype"] = toptype
- info["subtype"] = subtype
- info["area"] = area
- info["city"] = city
- info["projectname"] = projectname
- info["projectcode"] = projectcode
- info["budget"] = budget
- info["bidamount"] = bidamount
- info["buyer"] = buyer
- info["s_winner"] = s_winner
- dict[tmpid] = info
- }
- }
- return dict
- }
- func duibi(fields map[string]string, biaozhu map[string]map[string]interface{}, source map[string]map[string]interface{}) map[string]map[string]int {
- //计数
- tj := map[string]map[string]int{}
- for tmpid, tmp := range source {
- bz := biaozhu[tmpid]
- for filed, typeof := range fields {
- nums := tj[filed]
- if nums == nil {
- nums = map[string]int{}
- }
- if typeof == "string" {
- b_value := qu.ObjToString(bz[filed])
- s_value := qu.ObjToString(tmp[filed])
- if b_value == "" && s_value == "" {
- } else {
- nums["total"] = qu.IntAll(nums["total"]) + 1
- if b_value == s_value {
- nums["same"] = qu.IntAll(nums["same"]) + 1
- }
- }
- } else if typeof == "float" {
- b_value := qu.Float64All(bz[filed])
- s_value := qu.Float64All(tmp[filed])
- if b_value == 0.0 && s_value == 0.0 {
- } else {
- nums["total"] = qu.IntAll(nums["total"]) + 1
- if b_value == s_value {
- nums["same"] = qu.IntAll(nums["same"]) + 1
- } else {
- //if filed == "budget" {
- // if b_value == 0.0 {
- // log.Debug(tmpid)
- // }
- //}
- }
- }
- } else {
- }
- tj[filed] = nums
- }
- }
- return tj
- }
- // 测试调试数据
- func c(s string) string {
- s = strings.ReplaceAll(s, "(", "(")
- s = strings.ReplaceAll(s, ")", ")")
- s = strings.ReplaceAll(s, ",", ",")
- return s
- }
|