123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- package extract
- import (
- "data_ai/prompt"
- "data_ai/ul"
- "fmt"
- log "github.com/donnie4w/go-logger/logger"
- new_xlsx "github.com/tealeg/xlsx/v3"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "os"
- "sync"
- "unicode/utf8"
- )
- func TestSingleFieldInfo(name string, tmpid string) {
- tmp := ul.SourceMgo.FindById(name, tmpid)
- if len(tmp) == 0 || tmp == nil {
- log.Debug("未查询到数据...", tmpid)
- return
- }
- data := ResolveInfo(tmp)
- //最终结果...
- for k, v := range data {
- log.Debug(k, "~", v)
- }
- }
- func TestIsPackage() {
- tmpArr := []string{
- "669e83fe66cf0db42a6520b3",
- "669e892066cf0db42a652c9b",
- "669e904966cf0db42a653b5d",
- "669f16f466cf0db42a669069",
- "669f186c66cf0db42a669bf0",
- "669efb6766cf0db42a65e0b4",
- "669f004266cf0db42a65f201",
- "669f02a666cf0db42a65fff3",
- "669f172766cf0db42a669193",
- "669ec89566cf0db42a659020",
- "669e86b266cf0db42a6526ac",
- "669e86e466cf0db42a6527b7",
- "669e87b766cf0db42a652a3e",
- "669f082d66cf0db42a662323",
- "669e95e966cf0db42a654dd1",
- "669ea39466cf0db42a656311",
- "669f140366cf0db42a66772f",
- "669ee59466cf0db42a65b8aa",
- "669f05a166cf0db42a66117b",
- "669e90d666cf0db42a653e0a",
- "669f08c466cf0db42a66273c",
- "669f155166cf0db42a6682c7",
- "669ef0ff66cf0db42a65c83a",
- "669efdc166cf0db42a65e8f3",
- "669f090066cf0db42a6629d0",
- "669f111366cf0db42a665ce7",
- "669f15fb66cf0db42a668901",
- "669f0baa66cf0db42a663a72",
- "669f039766cf0db42a66044e",
- "669eff3e66cf0db42a65ee73",
- "669f12c366cf0db42a666b9d",
- "669e913b66cf0db42a653ffc",
- "669e833466cf0db42a651e3a",
- "669f071e66cf0db42a661b03",
- "669f1a1266cf0db42a66a892",
- "669f0aec66cf0db42a6635e8",
- "669f169c66cf0db42a668e1d",
- "669ed6c966cf0db42a65a75d",
- "669f072866cf0db42a661b26",
- "669f185866cf0db42a669af0",
- "669f15d366cf0db42a6687aa",
- "669f182466cf0db42a669960",
- "669f0ed066cf0db42a664e5c",
- "669f076466cf0db42a661cd4",
- "669f172966cf0db42a6691c0",
- "669f198466cf0db42a66a385",
- "669f1ad366cf0db42a66afb9",
- "669f156666cf0db42a668403",
- "669f093c66cf0db42a662c08",
- "669f0d8266cf0db42a6646cb",
- "669f06e866cf0db42a661a1d",
- "669f1bd766cf0db42a66b86e",
- "669efcd066cf0db42a65e4f4",
- }
- pkgArr := []int{
- 1,
- 1,
- 1,
- 1,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 0,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 1,
- 0,
- 1,
- 0,
- 0,
- 1,
- 0,
- 1,
- 0,
- 1,
- 1,
- 0,
- 1,
- 0,
- 0,
- 1,
- 1,
- 1,
- 1,
- 1,
- 1,
- 0,
- 1,
- 1,
- 0,
- 1,
- }
- ok := 0
- for k, v := range tmpArr {
- data := ul.SourceMgo.FindById("ai_41411", v)
- if len(data) == 0 {
- data = ul.SourceMgo.FindById("ai_294", v)
- }
- detail := qu.ObjToString(data["detail"])
- ispkg := prompt.AcquireIsPackageInfo(detail)
- if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) {
- ok++
- } else {
- log.Debug("错误~", v)
- }
- }
- log.Debug("is over ~ ", len(tmpArr)-ok)
- }
- func TestPackageInfo() {
- query := map[string]interface{}{
- "new_pkg": map[string]interface{}{
- "$exists": 1,
- },
- }
- dataArr, _ := ul.SourceMgo.Find("ai_41411_zhipu", query, nil, map[string]interface{}{})
- log.Debug("查询数量...", len(dataArr))
- os.Remove("test.xlsx")
- f := new_xlsx.NewFile()
- sheet, _ := f.AddSheet("数据信息")
- row := sheet.AddRow()
- writeRow(row, []string{"唯一标识", "站点", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- ttt := ul.SourceMgo.FindById("ai_41411", tmpid)
- site := qu.ObjToString(ttt["site"])
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- p_info := *qu.ObjToMap(v["new_pkg"])
- p_arr := ul.IsMarkInterfaceMap(p_info["分包信息"])
- for _, v1 := range p_arr {
- row = sheet.AddRow()
- arr := []string{}
- arr = append(arr, tmpid)
- arr = append(arr, site)
- arr = append(arr, jyhref)
- arr = append(arr, qu.ObjToString(v1["包项目名称"]))
- arr = append(arr, qu.ObjToString(v1["中标单位"]))
- arr = append(arr, qu.ObjToString(v1["中标金额"]))
- writeRow(row, arr)
- }
- }
- if err := f.Save("test.xlsx"); err != nil {
- fmt.Println("保存xlsx失败:", err)
- } else {
- fmt.Println("保存xlsx成功:", err)
- }
- log.Debug("is over ...")
- return
- //分包判断,获取信息
- pool_mgo := make(chan bool, 80)
- wg_mgo := &sync.WaitGroup{}
- for k, v := range dataArr {
- if k%10 == 0 {
- log.Debug(k, "~", v["_id"])
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(v map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(v["_id"])
- data := ul.SourceMgo.FindById("ai_41411", tmpid)
- if detail := qu.ObjToString(data["detail"]); utf8.RuneCountInString(detail) > 100 {
- pkg := prompt.AcquireMultiplePackageInfo(detail)
- //最终结果...
- ul.SourceMgo.UpdateById("ai_41411_zhipu", tmpid, map[string]interface{}{
- "$set": map[string]interface{}{
- "new_pkg": pkg,
- },
- })
- }
- }(v)
- }
- wg_mgo.Wait()
- }
- // 更新链接
- func TestUpdateJyhref(name string) {
- dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
- "$set": map[string]interface{}{
- "jyhref": jyhref,
- },
- })
- }
- log.Debug("is over ...")
- }
- func writeRow(row *new_xlsx.Row, arr []string) {
- for _, v := range arr {
- row.AddCell().Value = v
- }
- }
|