123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764 |
- package extract
- import (
- "data_ai/clean"
- "data_ai/prompt"
- "data_ai/ul"
- "fmt"
- log "github.com/donnie4w/go-logger/logger"
- new_xlsx "github.com/tealeg/xlsx/v3"
- qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
- "os"
- "strings"
- "sync"
- "time"
- "unicode/utf8"
- )
- // 验证单条数据···
- func TestSingleFieldInfo(name string, tmpid string) {
- now := time.Now().Unix()
- tmp := ul.BidMgo.FindById(name, tmpid)
- if len(tmp) == 0 || tmp == nil {
- log.Debug("未查询到数据...", tmpid)
- return
- }
- data := ResolveInfo(tmp)
- //最终结果...
- for k, v := range data {
- log.Debug(k, "~", v)
- }
- log.Debug("耗时···", time.Now().Unix()-now)
- }
- // 调试错误数据···
- func TestErrInfo0923() {
- arr := []string{
- "66e39b7bb25c3e1debf2cb66",
- "66e39b7bb25c3e1debf2cb52",
- "66e39b76b25c3e1debf2cb1c",
- "66e39b71b25c3e1debf2ca58",
- "66e39b71b25c3e1debf2ca47",
- "66e39b71b25c3e1debf2ca3e",
- "66e39b71b25c3e1debf2ca1d",
- "66e39b62b25c3e1debf2c9a0",
- "66e39b0db25c3e1debf2c788",
- "66e39af3b25c3e1debf2c711",
- "66e39ae5b25c3e1debf2c6ac",
- "66e39ad5b25c3e1debf2c607",
- "66e39ad5b25c3e1debf2c5fc",
- "66e39ad1b25c3e1debf2c5e1",
- "66e39acbb25c3e1debf2c56c",
- "66e39ac6b25c3e1debf2c533",
- "66e39ac1b25c3e1debf2c526",
- "66e39a9eb25c3e1debf2c4e7",
- "66e39a80b25c3e1debf2c441",
- "66e39a5cb25c3e1debf2c357",
- "66e39a30b25c3e1debf2c18f",
- "66e39a12b25c3e1debf2c0cc",
- "66e39a08b25c3e1debf2c050",
- "66e39a08b25c3e1debf2bfce",
- "66e3993fb25c3e1debf2b875",
- "66e3990db25c3e1debf2b55b",
- "66e398f1b25c3e1debf2b4bc",
- "66e397ccb25c3e1debf2abed",
- "66e397b9b25c3e1debf2ab81",
- "66e3977db25c3e1debf2a7ae",
- "66e396b3b25c3e1debf2a049",
- "66e396b3b25c3e1debf29f97",
- "66e3969eb25c3e1debf29e62",
- "66e395feb25c3e1debf29abb",
- "66e395e0b25c3e1debf298d2",
- "66e395d6b25c3e1debf297e6",
- "66e395ccb25c3e1debf296d1",
- "66e39554b25c3e1debf29331",
- "66e39517b25c3e1debf29160",
- "66e394c7b25c3e1debf28f42",
- "66e394bdb25c3e1debf28ef6",
- "66e394b3b25c3e1debf28e48",
- "66e3944fb25c3e1debf28ab5",
- "66e393ccb25c3e1debf28729",
- "66e393c2b25c3e1debf286dd",
- "66e393c2b25c3e1debf286a5",
- "66e393aeb25c3e1debf28572",
- "66e3934ab25c3e1debf28423",
- "66e39322b25c3e1debf282c6",
- "66e392d1b25c3e1debf2809d",
- "66e39212b25c3e1debf279d1",
- "66e39209b25c3e1debf279c7",
- "66e391f5b25c3e1debf2779e",
- "66e391eab25c3e1debf2773f",
- "66e391e0b25c3e1debf276a8",
- "66e39168b25c3e1debf27347",
- "66e3912cb25c3e1debf2714a",
- "66e390e6b25c3e1debf26ee7",
- "66e390e4b25c3e1debf26e7c",
- "66e390b3b25c3e1debf26cce",
- "66e3906cb25c3e1debf26a8f",
- "66e3901bb25c3e1debf26822",
- "66e38ff5b25c3e1debf26714",
- "66e38fd5b25c3e1debf26694",
- "66e38fb8b25c3e1debf265a4",
- "66e38f90b25c3e1debf264c5",
- "66e38f7bb25c3e1debf263de",
- "66e38f68b25c3e1debf263b8",
- "66e38f5eb25c3e1debf2638c",
- "66e38f4ab25c3e1debf2633b",
- "66e38f40b25c3e1debf2631a",
- "66e38f18b25c3e1debf261c4",
- "66e38ef8b25c3e1debf260e7",
- "66e38ec8b25c3e1debf26063",
- "66e38eb3b25c3e1debf26017",
- "66e38e95b25c3e1debf25f78",
- "66e38e6db25c3e1debf25ef0",
- "66e38e61b25c3e1debf25eb4",
- "66e38a57b25c3e1debf24a45",
- "66e38a47b25c3e1debf24a09",
- "66e38a47b25c3e1debf249dd",
- "66e38a47b25c3e1debf249a1",
- "66e38a47b25c3e1debf24998",
- "66e38a47b25c3e1debf24995",
- "66e38a1fb25c3e1debf2494a",
- "66e389f3b25c3e1debf2482e",
- "66e389f3b25c3e1debf24824",
- "66e389f3b25c3e1debf2481d",
- "66e389f3b25c3e1debf24819",
- "66e389f3b25c3e1debf24816",
- "66e389f3b25c3e1debf24808",
- "66e389d5b25c3e1debf247ec",
- "66e389adb25c3e1debf24792",
- "66e389a6b25c3e1debf24733",
- "66e389a6b25c3e1debf24727",
- "66e389a3b25c3e1debf246f3",
- "66e3899cb25c3e1debf246ee",
- "66e3874db25c3e1debf23d91",
- "66e38739b25c3e1debf23d28",
- "66e38720b25c3e1debf23cf3",
- "66e386f3b25c3e1debf23cac",
- "66e386dfb25c3e1debf23c5b",
- "66e386dfb25c3e1debf23c5a",
- "66e386d5b25c3e1debf23c1b",
- "66e38699b25c3e1debf23b3b",
- "66e38694b25c3e1debf23b35",
- "66e3867bb25c3e1debf23af4",
- "66e38671b25c3e1debf23ad6",
- "66e38671b25c3e1debf23aa3",
- "66e38671b25c3e1debf23a97",
- "66e38661b25c3e1debf23a6a",
- "66e38657b25c3e1debf23a27",
- "66e3864db25c3e1debf23a10",
- "66e385f3b25c3e1debf23954",
- "66e385dfb25c3e1debf2393a",
- "66e385d0b25c3e1debf238de",
- "66e385c6b25c3e1debf23896",
- "66e385adb25c3e1debf2381b",
- "66e385a3b25c3e1debf237eb",
- "66e385a3b25c3e1debf237e0",
- "66e38570b25c3e1debf23778",
- "66e38566b25c3e1debf23765",
- "66e3855cb25c3e1debf23755",
- "66e38534b25c3e1debf2369e",
- "66e38526b25c3e1debf2367c",
- "66e3851cb25c3e1debf23646",
- "66e3851cb25c3e1debf2363d",
- "66e3851cb25c3e1debf2363a",
- "66e38512b25c3e1debf23621",
- "66e38512b25c3e1debf2360e",
- "66e384f8b25c3e1debf23590",
- "66e384eeb25c3e1debf23572",
- "66e384c2b25c3e1debf2350b",
- "66e3847fb25c3e1debf23474",
- }
- pool_mgo := make(chan bool, 200)
- wg_mgo := &sync.WaitGroup{}
- log.Debug("预计处理条数···", len(arr))
- for k, v := range arr {
- if k%10 == 0 {
- log.Debug("cur index ", k)
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(v string) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- data := ul.BidMgo.FindById("bidding", v)
- if len(data) >= 0 && data != nil {
- detail := qu.ObjToString(data["detail"])
- filetext := qu.ObjToString(data["filetext"]) //此处为附件信息···
- title := qu.ObjToString(data["title"])
- if strings.Contains(title, "开标记录") { //开标记录舍弃
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 0,
- "s_id": v,
- })
- log.Debug("开标记录舍弃···")
- return
- }
- if data["jyfb_data"] != nil { //剑鱼发布舍弃qi
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 0,
- "s_id": v,
- })
- log.Debug("剑鱼发布舍弃···")
- return
- }
- if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
- detail = filetext
- }
- if utf8.RuneCountInString(detail) < 100 {
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 0,
- "s_id": v,
- })
- log.Debug("长度不符舍弃···")
- return
- }
- //获取外围字段数据
- //分包判断-获取信息
- //ispkg, pkg := false, map[string]interface{}{}
- //if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
- // if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
- //
- // }
- //}
- pkg := prompt.AcquireMultiplePackageInfo(detail)
- if s_pkg, ok := pkg["s_pkg"].(map[string]map[string]interface{}); ok {
- if len(s_pkg) <= 0 {
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 0,
- "s_id": v,
- })
- } else if len(s_pkg) == 1 {
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 1,
- "pkg": pkg,
- "s_id": v,
- })
- } else {
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 2,
- "s_id": v,
- "pkg": pkg,
- })
- }
- } else {
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": 0,
- "s_id": v,
- })
- }
- } else {
- log.Debug("未查询到数据···")
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
- "ispkg": -1,
- })
- }
- }(v)
- }
- wg_mgo.Wait()
- log.Debug("is over ···")
- }
- // 导出需要修复的
- func TestFullJinOrCodeInfo() {
- q := map[string]interface{}{}
- pool_mgo := make(chan bool, 20)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total := 0
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%10000 == 0 {
- log.Debug("cur index ", total)
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(tmp["_id"])
- isPcode, update := false, map[string]interface{}{}
- ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
- o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
- if ext_ai_record != nil {
- o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
- o_budget = qu.Float64All((*ext_ai_record)["budget"])
- o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
- }
- if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
- if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
- update["budget"] = filterAmount(r_budget, o_budget)
- }
- }
- if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
- if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
- update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
- }
- }
- //对于编号
- if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
- if o_projectcode != projectcode {
- if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
- fns := getpnsinfo(data) //获取附件名字
- for _, v := range fns {
- if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
- if strings.Contains(v, projectcode) {
- isPcode = true
- break
- }
- }
- }
- if isPcode {
- update["projectcode"] = o_projectcode
- }
- }
- }
- }
- if len(update) > 0 {
- //更新抽取表
- ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
- "$set": update,
- })
- //保存待修复表
- update["_id"] = tmp["_id"]
- ul.SourceMgo.Save("zzzzz_kkk_uc_0907", update)
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("repair ai is over ...")
- }
- // 修复金额和编号
- func TestRepairJinOrCodeInfo() {
- q := map[string]interface{}{}
- pool_mgo := make(chan bool, 20)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total := 0
- it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%10000 == 0 {
- log.Debug("cur index ", total)
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(tmp["_id"])
- isPcode, update := false, map[string]interface{}{}
- ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
- o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
- if ext_ai_record != nil {
- o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
- o_budget = qu.Float64All((*ext_ai_record)["budget"])
- o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
- }
- if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
- if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
- update["budget"] = filterAmount(r_budget, o_budget)
- }
- }
- if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
- if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
- update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
- }
- }
- //对于编号
- if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
- if o_projectcode != projectcode {
- if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
- fns := getpnsinfo(data) //获取附件名字
- for _, v := range fns {
- if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
- if strings.Contains(v, projectcode) {
- isPcode = true
- break
- }
- }
- }
- if isPcode {
- update["projectcode"] = o_projectcode
- }
- }
- }
- }
- if len(update) > 0 {
- ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
- "$set": update,
- })
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("repair ai is over ...")
- }
- // 筛选金额
- func filterAmount(f1 float64, f2 float64) float64 {
- //选取一个合适的金额 ...
- if f1 > f2 {
- if f1 > 100000000.0 {
- return f2
- } else {
- return f1
- }
- } else if f1 < f2 {
- if f2 > 100000000.0 {
- return f1
- } else {
- return f2
- }
- } else {
- return f1
- }
- }
- func TestExportJinErInfo() {
- q := map[string]interface{}{}
- pool_mgo := make(chan bool, 20)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total, isok := 0, 0
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%10000 == 0 {
- log.Debug("cur index ", total)
- }
- isok++
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(tmp["_id"])
- budget := qu.Float64All(tmp["budget"])
- bidamount := qu.Float64All(tmp["bidamount"])
- saveinfo := map[string]interface{}{}
- if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
- ext_budget := qu.Float64All((*ext_ai_record)["budget"])
- ext_bidamount := qu.Float64All((*ext_ai_record)["bidamount"])
- if budget > 0.0 && ext_budget > 0.0 {
- if budget/ext_budget == 10000.0 || ext_budget/budget == 10000.0 {
- saveinfo["budget"] = budget
- saveinfo["ext_budget"] = ext_budget
- }
- }
- if bidamount > 0.0 && ext_bidamount > 0.0 {
- if bidamount/ext_bidamount == 10000.0 || ext_bidamount/bidamount == 10000.0 {
- saveinfo["bidamount"] = bidamount
- saveinfo["ext_bidamount"] = ext_bidamount
- }
- }
- }
- if len(saveinfo) > 0 && tmpid != "" {
- saveinfo["toptype"] = tmp["toptype"]
- saveinfo["subtype"] = tmp["subtype"]
- saveinfo["href"] = tmp["href"]
- saveinfo["jyhref"] = tmp["jytest_href"]
- ul.SourceMgo.Save("zktest_zzzzzkkk_0903", saveinfo)
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("repair ai is over ...", isok)
- }
- // 修正buyer等字段
- func TestRepairBuyerInfo(name string) {
- q := map[string]interface{}{}
- pool_mgo := make(chan bool, 20)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total, isok := 0, 0
- it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%10000 == 0 {
- log.Debug("cur index ", total)
- }
- isok++
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(tmp["_id"])
- buyer := qu.ObjToString(tmp["buyer"])
- agency := qu.ObjToString(tmp["agency"])
- winner := qu.ObjToString(tmp["winner"])
- update := map[string]interface{}{}
- if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
- o_buyer := qu.ObjToString((*ext_ai_record)["buyer"])
- if buyer == agency && o_buyer != "" {
- update["buyer"] = o_buyer
- }
- o_winner := qu.ObjToString((*ext_ai_record)["winner"])
- if o_winner != "" && strings.Contains(winner, o_winner) && o_winner != o_winner {
- update["winner"] = o_winner
- }
- }
- if len(update) > 0 && tmpid != "" {
- ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
- "$set": update,
- })
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("repair ai is over ...", isok)
- }
- func TestDelUpBuyerAi() {
- dataArr, _ := ul.SourceMgo.Find("zktest_buyer_0828_new", map[string]interface{}{}, nil, nil)
- pool_mgo := make(chan bool, 50)
- wg_mgo := &sync.WaitGroup{}
- for k, v := range dataArr {
- if k%1000 == 0 {
- log.Debug(k, "~", v["_id"])
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(v map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- buyer := qu.ObjToString(v["buyer"])
- tmpid := ul.BsonTOStringId(v["_id"])
- data1 := ul.SourceMgo.FindById("result_20220218", tmpid)
- if len(data1) > 0 {
- ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
- "$set": map[string]interface{}{"buyer": buyer},
- })
- }
- data2 := ul.SourceMgo.FindById("result_20220219", tmpid)
- if len(data2) > 0 {
- ul.SourceMgo.UpdateById("result_20220219", tmpid, map[string]interface{}{
- "$set": map[string]interface{}{"buyer": buyer},
- })
- }
- }(v)
- }
- wg_mgo.Wait()
- log.Debug("del ai is over ...")
- }
- func TestAiBuyerInfo() {
- //dataArr, _ := ul.SourceMgo.Find("zktest_buyer_info", map[string]interface{}{}, nil, nil)
- q := map[string]interface{}{}
- pool_mgo := make(chan bool, 50)
- wg_mgo := &sync.WaitGroup{}
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- total, isok := 0, 0
- it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%1000 == 0 {
- log.Debug("cur index ", total)
- }
- isok++
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- tmpid := ul.BsonTOStringId(tmp["_id"])
- if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" {
- if zp_buyer := prompt.AcquireBuyerInfo(buyer); zp_buyer["实体单位"] != nil {
- if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
- ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
- "$set": map[string]interface{}{"buyer": ns_buyer},
- })
- }
- }
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("repair ai is over ...", isok)
- }
- func TestExportAiBuyer() {
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- pool_mgo := make(chan bool, 10)
- wg_mgo := &sync.WaitGroup{}
- q, total := map[string]interface{}{
- "_id": map[string]interface{}{
- "$lte": ul.StringTOBsonId("66cd8299b25c3e1deb9488dd"),
- },
- }, 0
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Select(map[string]interface{}{
- "ai_zhipu": 1,
- "ext_ai_record": 1,
- }).Iter()
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%10000 == 0 {
- log.Debug("cur index ", total, "~", tmp["_id"])
- }
- pool_mgo <- true
- wg_mgo.Add(1)
- go func(tmp map[string]interface{}) {
- defer func() {
- <-pool_mgo
- wg_mgo.Done()
- }()
- ai_buyer, ext_buyer := "", ""
- if ai_zhipu := qu.ObjToMap(tmp["ai_zhipu"]); ai_zhipu != nil {
- ai_buyer = qu.ObjToString((*ai_zhipu)["s_buyer"])
- }
- if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
- ext_buyer = qu.ObjToString((*ext_ai_record)["buyer"])
- }
- if ai_buyer != "" {
- ul.SourceMgo.Save("zktest_buyer_0827", map[string]interface{}{
- "_id": tmp["_id"],
- "ai_buyer": ai_buyer,
- "ext_buyer": ext_buyer,
- })
- }
- }(tmp)
- tmp = make(map[string]interface{})
- }
- wg_mgo.Wait()
- log.Debug("export is over ", total)
- }
- func TestIsPackage() {
- tmpArr := []string{}
- pkgArr := []int{}
- ok := 0
- for k, v := range tmpArr {
- data := ul.SourceMgo.FindById("ai_41411", v)
- if len(data) == 0 {
- data = ul.SourceMgo.FindById("ai_294", v)
- }
- detail := qu.ObjToString(data["detail"])
- ispkg := prompt.AcquireIsPackageInfo(detail)
- if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) {
- ok++
- } else {
- log.Debug("错误~", v)
- }
- }
- log.Debug("is over ~ ", len(tmpArr)-ok)
- }
- func TestPackageInfo() {
- sess := ul.SourceMgo.GetMgoConn()
- defer ul.SourceMgo.DestoryMongoConn(sess)
- q, total := map[string]interface{}{"ai_zhipu.ispkg": true}, 0
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("-_id").Iter()
- isok := 0
- os.Remove("test.xlsx")
- f := new_xlsx.NewFile()
- sheet, _ := f.AddSheet("数据信息")
- row := sheet.AddRow()
- writeRow(row, []string{"序号", "唯一标识", "站点", "项目名称", "一级分类", "二级分类", "原文链接", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
- if total%10000 == 0 {
- log.Debug("cur index ", total, "~", isok)
- }
- tmpid := ul.BsonTOStringId(tmp["_id"])
- site := qu.ObjToString(tmp["site"])
- projectname := qu.ObjToString(tmp["projectname"])
- toptype := qu.ObjToString(tmp["toptype"])
- subtype := qu.ObjToString(tmp["subtype"])
- href := qu.ObjToString(tmp["href"])
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
- if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
- if s_info := qu.ObjToMap((*s_pkg)["s_pkg"]); s_info != nil && len(*s_info) > 1 {
- isok++
- for _, v := range *s_info {
- if v1 := qu.ObjToMap(v); v1 != nil {
- row = sheet.AddRow()
- arr := []string{}
- arr = append(arr, fmt.Sprintf("%d", isok))
- arr = append(arr, tmpid)
- arr = append(arr, site)
- arr = append(arr, projectname)
- arr = append(arr, toptype)
- arr = append(arr, subtype)
- arr = append(arr, href)
- arr = append(arr, jyhref)
- arr = append(arr, qu.ObjToString((*v1)["name"]))
- arr = append(arr, qu.ObjToString((*v1)["winner"]))
- bidamount := qu.Float64All((*v1)["bidamount"])
- if bidamount > 0.0 {
- arr = append(arr, fmt.Sprintf("%.2f", bidamount))
- } else {
- arr = append(arr, "")
- }
- writeRow(row, arr)
- }
- }
- if isok > 1000 {
- break
- }
- }
- }
- tmp = make(map[string]interface{})
- }
- log.Debug("is over ", total, isok)
- if err := f.Save("test.xlsx"); err != nil {
- fmt.Println("保存xlsx失败:", err)
- } else {
- fmt.Println("保存xlsx成功:", err)
- }
- log.Debug("is over ...")
- }
- // 更新链接
- func TestUpdateJyhref(name string) {
- dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
- for _, v := range dataArr {
- tmpid := ul.BsonTOStringId(v["_id"])
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
- ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
- "$set": map[string]interface{}{
- "jyhref": jyhref,
- },
- })
- }
- log.Debug("is over ...")
- }
- func writeRow(row *new_xlsx.Row, arr []string) {
- for _, v := range arr {
- row.AddCell().Value = v
- }
- }
|