123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- package extract
- import (
- "data_ai/prompt"
- "data_ai/ul"
- "fmt"
- log "github.com/donnie4w/go-logger/logger"
- new_xlsx "github.com/tealeg/xlsx/v3"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "os"
- "sync"
- "unicode/utf8"
- )
- func TestSingleFieldInfo(name string, tmpid string) {
- tmp := ul.SourceMgo.FindById(name, tmpid)
- data := ResolveInfo(tmp)
- //最终结果...
- for k, v := range data {
- log.Debug(k, "~", v)
- }
- }
- func TestPackageInfo() {
- query := map[string]interface{}{
- "new_pkg": map[string]interface{}{
- "$exists": 1,
- },
- }
- dataArr, _ := ul.SourceMgo.Find("ai_41411_zhipu", query, nil, map[string]interface{}{})
- log.Debug("查询数量...", len(dataArr))
- os.Remove("test.xlsx")
- f := new_xlsx.NewFile()
- sheet, _ := f.AddSheet("数据信息")
- row := sheet.AddRow()
- writeRow(row, []string{"唯一标识", "站点", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- ttt := ul.SourceMgo.FindById("ai_41411", tmpid)
- site := qu.ObjToString(ttt["site"])
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- p_info := *qu.ObjToMap(v["new_pkg"])
- p_arr := ul.IsMarkInterfaceMap(p_info["分包信息"])
- for _, v1 := range p_arr {
- row = sheet.AddRow()
- arr := []string{}
- arr = append(arr, tmpid)
- arr = append(arr, site)
- arr = append(arr, jyhref)
- arr = append(arr, qu.ObjToString(v1["包项目名称"]))
- arr = append(arr, qu.ObjToString(v1["中标单位"]))
- arr = append(arr, qu.ObjToString(v1["中标金额"]))
- writeRow(row, arr)
- }
- }
- if err := f.Save("test.xlsx"); err != nil {
- fmt.Println("保存xlsx失败:", err)
- } else {
- fmt.Println("保存xlsx成功:", err)
- }
- log.Debug("is over ...")
- return
- //分包判断,获取信息
- pool_mgo := make(chan bool, 80)
- wg_mgo := &sync.WaitGroup{}
- for k, v := range dataArr {
- if k%10 == 0 {
- log.Debug(k, "~", v["_id"])
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(v map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(v["_id"])
- data := ul.SourceMgo.FindById("ai_41411", tmpid)
- if detail := qu.ObjToString(data["detail"]); utf8.RuneCountInString(detail) > 100 {
- pkg := prompt.AcquireMultiplePackageInfo(detail)
- //最终结果...
- ul.SourceMgo.UpdateById("ai_41411_zhipu", tmpid, map[string]interface{}{
- "$set": map[string]interface{}{
- "new_pkg": pkg,
- },
- })
- }
- }(v)
- }
- wg_mgo.Wait()
- }
- // 更新链接
- func TestUpdateJyhref(name string) {
- dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
- "$set": map[string]interface{}{
- "jyhref": jyhref,
- },
- })
- }
- log.Debug("is over ...")
- }
- func writeRow(row *new_xlsx.Row, arr []string) {
- for _, v := range arr {
- row.AddCell().Value = v
- }
- }
|