|
@@ -1,15 +1,12 @@
|
|
package extract
|
|
package extract
|
|
|
|
|
|
import (
|
|
import (
|
|
- "data_ai/clean"
|
|
|
|
"data_ai/prompt"
|
|
"data_ai/prompt"
|
|
"data_ai/ul"
|
|
"data_ai/ul"
|
|
"fmt"
|
|
"fmt"
|
|
log "github.com/donnie4w/go-logger/logger"
|
|
log "github.com/donnie4w/go-logger/logger"
|
|
new_xlsx "github.com/tealeg/xlsx/v3"
|
|
new_xlsx "github.com/tealeg/xlsx/v3"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
|
|
- "os"
|
|
|
|
- "strings"
|
|
|
|
"sync"
|
|
"sync"
|
|
"time"
|
|
"time"
|
|
"unicode/utf8"
|
|
"unicode/utf8"
|
|
@@ -31,515 +28,37 @@ func TestSingleFieldInfo(name string, tmpid string) {
|
|
log.Debug("耗时···", time.Now().Unix()-now)
|
|
log.Debug("耗时···", time.Now().Unix()-now)
|
|
}
|
|
}
|
|
|
|
|
|
-// 调试错误数据···
|
|
|
|
-func TestErrInfo0923() {
|
|
|
|
- arr := []string{
|
|
|
|
- "66e39b7bb25c3e1debf2cb66",
|
|
|
|
- "66e39b7bb25c3e1debf2cb52",
|
|
|
|
- "66e39b76b25c3e1debf2cb1c",
|
|
|
|
- "66e39b71b25c3e1debf2ca58",
|
|
|
|
- "66e39b71b25c3e1debf2ca47",
|
|
|
|
- "66e39b71b25c3e1debf2ca3e",
|
|
|
|
- "66e39b71b25c3e1debf2ca1d",
|
|
|
|
- "66e39b62b25c3e1debf2c9a0",
|
|
|
|
- "66e39b0db25c3e1debf2c788",
|
|
|
|
- "66e39af3b25c3e1debf2c711",
|
|
|
|
- "66e39ae5b25c3e1debf2c6ac",
|
|
|
|
- "66e39ad5b25c3e1debf2c607",
|
|
|
|
- "66e39ad5b25c3e1debf2c5fc",
|
|
|
|
- "66e39ad1b25c3e1debf2c5e1",
|
|
|
|
- "66e39acbb25c3e1debf2c56c",
|
|
|
|
- "66e39ac6b25c3e1debf2c533",
|
|
|
|
- "66e39ac1b25c3e1debf2c526",
|
|
|
|
- "66e39a9eb25c3e1debf2c4e7",
|
|
|
|
- "66e39a80b25c3e1debf2c441",
|
|
|
|
- "66e39a5cb25c3e1debf2c357",
|
|
|
|
- "66e39a30b25c3e1debf2c18f",
|
|
|
|
- "66e39a12b25c3e1debf2c0cc",
|
|
|
|
- "66e39a08b25c3e1debf2c050",
|
|
|
|
- "66e39a08b25c3e1debf2bfce",
|
|
|
|
- "66e3993fb25c3e1debf2b875",
|
|
|
|
- "66e3990db25c3e1debf2b55b",
|
|
|
|
- "66e398f1b25c3e1debf2b4bc",
|
|
|
|
- "66e397ccb25c3e1debf2abed",
|
|
|
|
- "66e397b9b25c3e1debf2ab81",
|
|
|
|
- "66e3977db25c3e1debf2a7ae",
|
|
|
|
- "66e396b3b25c3e1debf2a049",
|
|
|
|
- "66e396b3b25c3e1debf29f97",
|
|
|
|
- "66e3969eb25c3e1debf29e62",
|
|
|
|
- "66e395feb25c3e1debf29abb",
|
|
|
|
- "66e395e0b25c3e1debf298d2",
|
|
|
|
- "66e395d6b25c3e1debf297e6",
|
|
|
|
- "66e395ccb25c3e1debf296d1",
|
|
|
|
- "66e39554b25c3e1debf29331",
|
|
|
|
- "66e39517b25c3e1debf29160",
|
|
|
|
- "66e394c7b25c3e1debf28f42",
|
|
|
|
- "66e394bdb25c3e1debf28ef6",
|
|
|
|
- "66e394b3b25c3e1debf28e48",
|
|
|
|
- "66e3944fb25c3e1debf28ab5",
|
|
|
|
- "66e393ccb25c3e1debf28729",
|
|
|
|
- "66e393c2b25c3e1debf286dd",
|
|
|
|
- "66e393c2b25c3e1debf286a5",
|
|
|
|
- "66e393aeb25c3e1debf28572",
|
|
|
|
- "66e3934ab25c3e1debf28423",
|
|
|
|
- "66e39322b25c3e1debf282c6",
|
|
|
|
- "66e392d1b25c3e1debf2809d",
|
|
|
|
- "66e39212b25c3e1debf279d1",
|
|
|
|
- "66e39209b25c3e1debf279c7",
|
|
|
|
- "66e391f5b25c3e1debf2779e",
|
|
|
|
- "66e391eab25c3e1debf2773f",
|
|
|
|
- "66e391e0b25c3e1debf276a8",
|
|
|
|
- "66e39168b25c3e1debf27347",
|
|
|
|
- "66e3912cb25c3e1debf2714a",
|
|
|
|
- "66e390e6b25c3e1debf26ee7",
|
|
|
|
- "66e390e4b25c3e1debf26e7c",
|
|
|
|
- "66e390b3b25c3e1debf26cce",
|
|
|
|
- "66e3906cb25c3e1debf26a8f",
|
|
|
|
- "66e3901bb25c3e1debf26822",
|
|
|
|
- "66e38ff5b25c3e1debf26714",
|
|
|
|
- "66e38fd5b25c3e1debf26694",
|
|
|
|
- "66e38fb8b25c3e1debf265a4",
|
|
|
|
- "66e38f90b25c3e1debf264c5",
|
|
|
|
- "66e38f7bb25c3e1debf263de",
|
|
|
|
- "66e38f68b25c3e1debf263b8",
|
|
|
|
- "66e38f5eb25c3e1debf2638c",
|
|
|
|
- "66e38f4ab25c3e1debf2633b",
|
|
|
|
- "66e38f40b25c3e1debf2631a",
|
|
|
|
- "66e38f18b25c3e1debf261c4",
|
|
|
|
- "66e38ef8b25c3e1debf260e7",
|
|
|
|
- "66e38ec8b25c3e1debf26063",
|
|
|
|
- "66e38eb3b25c3e1debf26017",
|
|
|
|
- "66e38e95b25c3e1debf25f78",
|
|
|
|
- "66e38e6db25c3e1debf25ef0",
|
|
|
|
- "66e38e61b25c3e1debf25eb4",
|
|
|
|
- "66e38a57b25c3e1debf24a45",
|
|
|
|
- "66e38a47b25c3e1debf24a09",
|
|
|
|
- "66e38a47b25c3e1debf249dd",
|
|
|
|
- "66e38a47b25c3e1debf249a1",
|
|
|
|
- "66e38a47b25c3e1debf24998",
|
|
|
|
- "66e38a47b25c3e1debf24995",
|
|
|
|
- "66e38a1fb25c3e1debf2494a",
|
|
|
|
- "66e389f3b25c3e1debf2482e",
|
|
|
|
- "66e389f3b25c3e1debf24824",
|
|
|
|
- "66e389f3b25c3e1debf2481d",
|
|
|
|
- "66e389f3b25c3e1debf24819",
|
|
|
|
- "66e389f3b25c3e1debf24816",
|
|
|
|
- "66e389f3b25c3e1debf24808",
|
|
|
|
- "66e389d5b25c3e1debf247ec",
|
|
|
|
- "66e389adb25c3e1debf24792",
|
|
|
|
- "66e389a6b25c3e1debf24733",
|
|
|
|
- "66e389a6b25c3e1debf24727",
|
|
|
|
- "66e389a3b25c3e1debf246f3",
|
|
|
|
- "66e3899cb25c3e1debf246ee",
|
|
|
|
- "66e3874db25c3e1debf23d91",
|
|
|
|
- "66e38739b25c3e1debf23d28",
|
|
|
|
- "66e38720b25c3e1debf23cf3",
|
|
|
|
- "66e386f3b25c3e1debf23cac",
|
|
|
|
- "66e386dfb25c3e1debf23c5b",
|
|
|
|
- "66e386dfb25c3e1debf23c5a",
|
|
|
|
- "66e386d5b25c3e1debf23c1b",
|
|
|
|
- "66e38699b25c3e1debf23b3b",
|
|
|
|
- "66e38694b25c3e1debf23b35",
|
|
|
|
- "66e3867bb25c3e1debf23af4",
|
|
|
|
- "66e38671b25c3e1debf23ad6",
|
|
|
|
- "66e38671b25c3e1debf23aa3",
|
|
|
|
- "66e38671b25c3e1debf23a97",
|
|
|
|
- "66e38661b25c3e1debf23a6a",
|
|
|
|
- "66e38657b25c3e1debf23a27",
|
|
|
|
- "66e3864db25c3e1debf23a10",
|
|
|
|
- "66e385f3b25c3e1debf23954",
|
|
|
|
- "66e385dfb25c3e1debf2393a",
|
|
|
|
- "66e385d0b25c3e1debf238de",
|
|
|
|
- "66e385c6b25c3e1debf23896",
|
|
|
|
- "66e385adb25c3e1debf2381b",
|
|
|
|
- "66e385a3b25c3e1debf237eb",
|
|
|
|
- "66e385a3b25c3e1debf237e0",
|
|
|
|
- "66e38570b25c3e1debf23778",
|
|
|
|
- "66e38566b25c3e1debf23765",
|
|
|
|
- "66e3855cb25c3e1debf23755",
|
|
|
|
- "66e38534b25c3e1debf2369e",
|
|
|
|
- "66e38526b25c3e1debf2367c",
|
|
|
|
- "66e3851cb25c3e1debf23646",
|
|
|
|
- "66e3851cb25c3e1debf2363d",
|
|
|
|
- "66e3851cb25c3e1debf2363a",
|
|
|
|
- "66e38512b25c3e1debf23621",
|
|
|
|
- "66e38512b25c3e1debf2360e",
|
|
|
|
- "66e384f8b25c3e1debf23590",
|
|
|
|
- "66e384eeb25c3e1debf23572",
|
|
|
|
- "66e384c2b25c3e1debf2350b",
|
|
|
|
- "66e3847fb25c3e1debf23474",
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- pool_mgo := make(chan bool, 200)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- log.Debug("预计处理条数···", len(arr))
|
|
|
|
- for k, v := range arr {
|
|
|
|
- if k%10 == 0 {
|
|
|
|
- log.Debug("cur index ", k)
|
|
|
|
- }
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(v string) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- data := ul.BidMgo.FindById("bidding", v)
|
|
|
|
- if len(data) >= 0 && data != nil {
|
|
|
|
- detail := qu.ObjToString(data["detail"])
|
|
|
|
- filetext := qu.ObjToString(data["filetext"]) //此处为附件信息···
|
|
|
|
- title := qu.ObjToString(data["title"])
|
|
|
|
- if strings.Contains(title, "开标记录") { //开标记录舍弃
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 0,
|
|
|
|
- "s_id": v,
|
|
|
|
- })
|
|
|
|
- log.Debug("开标记录舍弃···")
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
- if data["jyfb_data"] != nil { //剑鱼发布舍弃qi
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 0,
|
|
|
|
- "s_id": v,
|
|
|
|
- })
|
|
|
|
- log.Debug("剑鱼发布舍弃···")
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
- if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
|
|
|
|
- detail = filetext
|
|
|
|
- }
|
|
|
|
- if utf8.RuneCountInString(detail) < 100 {
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 0,
|
|
|
|
- "s_id": v,
|
|
|
|
- })
|
|
|
|
- log.Debug("长度不符舍弃···")
|
|
|
|
- return
|
|
|
|
- }
|
|
|
|
- //获取外围字段数据
|
|
|
|
- //分包判断-获取信息
|
|
|
|
- //ispkg, pkg := false, map[string]interface{}{}
|
|
|
|
- //if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
|
|
|
|
- // if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
|
|
|
|
- //
|
|
|
|
- // }
|
|
|
|
- //}
|
|
|
|
- pkg := prompt.AcquireMultiplePackageInfo(detail)
|
|
|
|
- if s_pkg, ok := pkg["s_pkg"].(map[string]map[string]interface{}); ok {
|
|
|
|
- if len(s_pkg) <= 0 {
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 0,
|
|
|
|
- "s_id": v,
|
|
|
|
- })
|
|
|
|
- } else if len(s_pkg) == 1 {
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 1,
|
|
|
|
- "pkg": pkg,
|
|
|
|
- "s_id": v,
|
|
|
|
- })
|
|
|
|
- } else {
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 2,
|
|
|
|
- "s_id": v,
|
|
|
|
- "pkg": pkg,
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": 0,
|
|
|
|
- "s_id": v,
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- } else {
|
|
|
|
- log.Debug("未查询到数据···")
|
|
|
|
- ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
|
|
|
|
- "ispkg": -1,
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- }(v)
|
|
|
|
- }
|
|
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("is over ···")
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 导出需要修复的
|
|
|
|
-func TestFullJinOrCodeInfo() {
|
|
|
|
- q := map[string]interface{}{}
|
|
|
|
- pool_mgo := make(chan bool, 20)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- total := 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%10000 == 0 {
|
|
|
|
- log.Debug("cur index ", total)
|
|
|
|
- }
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- isPcode, update := false, map[string]interface{}{}
|
|
|
|
- ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
|
|
|
|
- o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
|
|
|
|
- if ext_ai_record != nil {
|
|
|
|
- o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
|
|
|
|
- o_budget = qu.Float64All((*ext_ai_record)["budget"])
|
|
|
|
- o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
|
|
|
|
- if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
|
|
|
|
- update["budget"] = filterAmount(r_budget, o_budget)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
|
|
|
|
- if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
|
|
|
|
- update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- //对于编号
|
|
|
|
- if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
|
|
|
|
- if o_projectcode != projectcode {
|
|
|
|
- if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
|
|
|
|
- fns := getpnsinfo(data) //获取附件名字
|
|
|
|
- for _, v := range fns {
|
|
|
|
- if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
|
|
|
|
- if strings.Contains(v, projectcode) {
|
|
|
|
- isPcode = true
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if isPcode {
|
|
|
|
- update["projectcode"] = o_projectcode
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if len(update) > 0 {
|
|
|
|
- //更新抽取表
|
|
|
|
- ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
|
|
|
|
- "$set": update,
|
|
|
|
- })
|
|
|
|
- //保存待修复表
|
|
|
|
- update["_id"] = tmp["_id"]
|
|
|
|
- ul.SourceMgo.Save("zzzzz_kkk_uc_0907", update)
|
|
|
|
- }
|
|
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
- }
|
|
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("repair ai is over ...")
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 修复金额和编号
|
|
|
|
-func TestRepairJinOrCodeInfo() {
|
|
|
|
- q := map[string]interface{}{}
|
|
|
|
- pool_mgo := make(chan bool, 20)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- total := 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%10000 == 0 {
|
|
|
|
- log.Debug("cur index ", total)
|
|
|
|
- }
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- isPcode, update := false, map[string]interface{}{}
|
|
|
|
- ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
|
|
|
|
- o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
|
|
|
|
- if ext_ai_record != nil {
|
|
|
|
- o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
|
|
|
|
- o_budget = qu.Float64All((*ext_ai_record)["budget"])
|
|
|
|
- o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
|
|
|
|
- if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
|
|
|
|
- update["budget"] = filterAmount(r_budget, o_budget)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
|
|
|
|
- if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
|
|
|
|
- update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- //对于编号
|
|
|
|
- if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
|
|
|
|
- if o_projectcode != projectcode {
|
|
|
|
- if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
|
|
|
|
- fns := getpnsinfo(data) //获取附件名字
|
|
|
|
- for _, v := range fns {
|
|
|
|
- if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
|
|
|
|
- if strings.Contains(v, projectcode) {
|
|
|
|
- isPcode = true
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if isPcode {
|
|
|
|
- update["projectcode"] = o_projectcode
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if len(update) > 0 {
|
|
|
|
- ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
|
|
|
|
- "$set": update,
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
- }
|
|
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("repair ai is over ...")
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 筛选金额
|
|
|
|
-func filterAmount(f1 float64, f2 float64) float64 {
|
|
|
|
- //选取一个合适的金额 ...
|
|
|
|
- if f1 > f2 {
|
|
|
|
- if f1 > 100000000.0 {
|
|
|
|
- return f2
|
|
|
|
- } else {
|
|
|
|
- return f1
|
|
|
|
- }
|
|
|
|
- } else if f1 < f2 {
|
|
|
|
- if f2 > 100000000.0 {
|
|
|
|
- return f1
|
|
|
|
- } else {
|
|
|
|
- return f2
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- return f1
|
|
|
|
|
|
+// 验证单条数据···
|
|
|
|
+func TestSinglePackageInfo(name string, tmpid string) {
|
|
|
|
+ now := time.Now().Unix()
|
|
|
|
+ tmp := ul.BidMgo.FindById(name, tmpid)
|
|
|
|
+ if len(tmp) == 0 || tmp == nil {
|
|
|
|
+ log.Debug("未查询到数据...", tmpid)
|
|
|
|
+ return
|
|
}
|
|
}
|
|
-}
|
|
|
|
-
|
|
|
|
-func TestExportJinErInfo() {
|
|
|
|
- q := map[string]interface{}{}
|
|
|
|
- pool_mgo := make(chan bool, 20)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- total, isok := 0, 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%10000 == 0 {
|
|
|
|
- log.Debug("cur index ", total)
|
|
|
|
- }
|
|
|
|
- isok++
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- budget := qu.Float64All(tmp["budget"])
|
|
|
|
- bidamount := qu.Float64All(tmp["bidamount"])
|
|
|
|
- saveinfo := map[string]interface{}{}
|
|
|
|
- if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
|
|
|
|
- ext_budget := qu.Float64All((*ext_ai_record)["budget"])
|
|
|
|
- ext_bidamount := qu.Float64All((*ext_ai_record)["bidamount"])
|
|
|
|
- if budget > 0.0 && ext_budget > 0.0 {
|
|
|
|
- if budget/ext_budget == 10000.0 || ext_budget/budget == 10000.0 {
|
|
|
|
- saveinfo["budget"] = budget
|
|
|
|
- saveinfo["ext_budget"] = ext_budget
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if bidamount > 0.0 && ext_bidamount > 0.0 {
|
|
|
|
- if bidamount/ext_bidamount == 10000.0 || ext_bidamount/bidamount == 10000.0 {
|
|
|
|
- saveinfo["bidamount"] = bidamount
|
|
|
|
- saveinfo["ext_bidamount"] = ext_bidamount
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if len(saveinfo) > 0 && tmpid != "" {
|
|
|
|
- saveinfo["toptype"] = tmp["toptype"]
|
|
|
|
- saveinfo["subtype"] = tmp["subtype"]
|
|
|
|
- saveinfo["href"] = tmp["href"]
|
|
|
|
- saveinfo["jyhref"] = tmp["jytest_href"]
|
|
|
|
- ul.SourceMgo.Save("zktest_zzzzzkkk_0903", saveinfo)
|
|
|
|
- }
|
|
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
|
|
+ detail := qu.ObjToString(tmp["detail"])
|
|
|
|
+ filetext := qu.ObjToString(tmp["filetext"]) //此处为附件信息···
|
|
|
|
+ if utf8.RuneCountInString(detail) < 100 {
|
|
|
|
+ detail = filetext
|
|
}
|
|
}
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("repair ai is over ...", isok)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-// 修正buyer等字段
|
|
|
|
-func TestRepairBuyerInfo(name string) {
|
|
|
|
- q := map[string]interface{}{}
|
|
|
|
- pool_mgo := make(chan bool, 20)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- total, isok := 0, 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%10000 == 0 {
|
|
|
|
- log.Debug("cur index ", total)
|
|
|
|
- }
|
|
|
|
- isok++
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- buyer := qu.ObjToString(tmp["buyer"])
|
|
|
|
- agency := qu.ObjToString(tmp["agency"])
|
|
|
|
- winner := qu.ObjToString(tmp["winner"])
|
|
|
|
- update := map[string]interface{}{}
|
|
|
|
- if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
|
|
|
|
- o_buyer := qu.ObjToString((*ext_ai_record)["buyer"])
|
|
|
|
- if buyer == agency && o_buyer != "" {
|
|
|
|
- update["buyer"] = o_buyer
|
|
|
|
- }
|
|
|
|
- o_winner := qu.ObjToString((*ext_ai_record)["winner"])
|
|
|
|
- if o_winner != "" && strings.Contains(winner, o_winner) && o_winner != o_winner {
|
|
|
|
- update["winner"] = o_winner
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if len(update) > 0 && tmpid != "" {
|
|
|
|
- ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
|
|
|
|
- "$set": update,
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
|
|
+ detail = ul.ConvertToMarkdown(detail)
|
|
|
|
+ pkg := prompt.AcquireNewMultiplePackageInfo(detail)
|
|
|
|
+ //最终结果...
|
|
|
|
+ for k, v := range pkg {
|
|
|
|
+ log.Debug(k, "~", v)
|
|
}
|
|
}
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("repair ai is over ...", isok)
|
|
|
|
-
|
|
|
|
|
|
+ log.Debug("耗时···", time.Now().Unix()-now)
|
|
}
|
|
}
|
|
|
|
|
|
-func TestDelUpBuyerAi() {
|
|
|
|
- dataArr, _ := ul.SourceMgo.Find("zktest_buyer_0828_new", map[string]interface{}{}, nil, nil)
|
|
|
|
|
|
+// 新分包数据···
|
|
|
|
+func TestNewPackageInfo1010() {
|
|
|
|
+ dataArr, _ := ul.SourceMgo.Find("zktest_info_0930", map[string]interface{}{}, nil, nil)
|
|
|
|
+ log.Debug("数量···", len(dataArr))
|
|
pool_mgo := make(chan bool, 50)
|
|
pool_mgo := make(chan bool, 50)
|
|
wg_mgo := &sync.WaitGroup{}
|
|
wg_mgo := &sync.WaitGroup{}
|
|
for k, v := range dataArr {
|
|
for k, v := range dataArr {
|
|
- if k%1000 == 0 {
|
|
|
|
- log.Debug(k, "~", v["_id"])
|
|
|
|
|
|
+ if k%50 == 0 {
|
|
|
|
+ log.Debug("cur index ", k)
|
|
}
|
|
}
|
|
pool_mgo <- true
|
|
pool_mgo <- true
|
|
wg_mgo.Add(1)
|
|
wg_mgo.Add(1)
|
|
@@ -548,197 +67,15 @@ func TestDelUpBuyerAi() {
|
|
<-pool_mgo
|
|
<-pool_mgo
|
|
wg_mgo.Done()
|
|
wg_mgo.Done()
|
|
}()
|
|
}()
|
|
-
|
|
|
|
- buyer := qu.ObjToString(v["buyer"])
|
|
|
|
- tmpid := ul.BsonTOStringId(v["_id"])
|
|
|
|
- data1 := ul.SourceMgo.FindById("result_20220218", tmpid)
|
|
|
|
- if len(data1) > 0 {
|
|
|
|
- ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
|
|
|
|
- "$set": map[string]interface{}{"buyer": buyer},
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
- data2 := ul.SourceMgo.FindById("result_20220219", tmpid)
|
|
|
|
- if len(data2) > 0 {
|
|
|
|
- ul.SourceMgo.UpdateById("result_20220219", tmpid, map[string]interface{}{
|
|
|
|
- "$set": map[string]interface{}{"buyer": buyer},
|
|
|
|
- })
|
|
|
|
|
|
+ new_v := v
|
|
|
|
+ data := ResolveInfo(v)
|
|
|
|
+ if len(data) > 0 {
|
|
|
|
+ new_v["ai_zhipu"] = data
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+ ul.SourceMgo.Save("zktest_info_0930_new", new_v)
|
|
}(v)
|
|
}(v)
|
|
}
|
|
}
|
|
-
|
|
|
|
wg_mgo.Wait()
|
|
wg_mgo.Wait()
|
|
- log.Debug("del ai is over ...")
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func TestAiBuyerInfo() {
|
|
|
|
- //dataArr, _ := ul.SourceMgo.Find("zktest_buyer_info", map[string]interface{}{}, nil, nil)
|
|
|
|
-
|
|
|
|
- q := map[string]interface{}{}
|
|
|
|
- pool_mgo := make(chan bool, 50)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- total, isok := 0, 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%1000 == 0 {
|
|
|
|
- log.Debug("cur index ", total)
|
|
|
|
- }
|
|
|
|
- isok++
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" {
|
|
|
|
- if zp_buyer := prompt.AcquireBuyerInfo(buyer); zp_buyer["实体单位"] != nil {
|
|
|
|
- if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
|
|
|
|
- ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
|
|
|
|
- "$set": map[string]interface{}{"buyer": ns_buyer},
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
- }
|
|
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("repair ai is over ...", isok)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func TestExportAiBuyer() {
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- pool_mgo := make(chan bool, 10)
|
|
|
|
- wg_mgo := &sync.WaitGroup{}
|
|
|
|
- q, total := map[string]interface{}{
|
|
|
|
- "_id": map[string]interface{}{
|
|
|
|
- "$lte": ul.StringTOBsonId("66cd8299b25c3e1deb9488dd"),
|
|
|
|
- },
|
|
|
|
- }, 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Select(map[string]interface{}{
|
|
|
|
- "ai_zhipu": 1,
|
|
|
|
- "ext_ai_record": 1,
|
|
|
|
- }).Iter()
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%10000 == 0 {
|
|
|
|
- log.Debug("cur index ", total, "~", tmp["_id"])
|
|
|
|
- }
|
|
|
|
- pool_mgo <- true
|
|
|
|
- wg_mgo.Add(1)
|
|
|
|
- go func(tmp map[string]interface{}) {
|
|
|
|
- defer func() {
|
|
|
|
- <-pool_mgo
|
|
|
|
- wg_mgo.Done()
|
|
|
|
- }()
|
|
|
|
- ai_buyer, ext_buyer := "", ""
|
|
|
|
- if ai_zhipu := qu.ObjToMap(tmp["ai_zhipu"]); ai_zhipu != nil {
|
|
|
|
- ai_buyer = qu.ObjToString((*ai_zhipu)["s_buyer"])
|
|
|
|
- }
|
|
|
|
- if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
|
|
|
|
- ext_buyer = qu.ObjToString((*ext_ai_record)["buyer"])
|
|
|
|
- }
|
|
|
|
- if ai_buyer != "" {
|
|
|
|
- ul.SourceMgo.Save("zktest_buyer_0827", map[string]interface{}{
|
|
|
|
- "_id": tmp["_id"],
|
|
|
|
- "ai_buyer": ai_buyer,
|
|
|
|
- "ext_buyer": ext_buyer,
|
|
|
|
- })
|
|
|
|
- }
|
|
|
|
- }(tmp)
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
- }
|
|
|
|
- wg_mgo.Wait()
|
|
|
|
- log.Debug("export is over ", total)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func TestIsPackage() {
|
|
|
|
- tmpArr := []string{}
|
|
|
|
- pkgArr := []int{}
|
|
|
|
- ok := 0
|
|
|
|
- for k, v := range tmpArr {
|
|
|
|
- data := ul.SourceMgo.FindById("ai_41411", v)
|
|
|
|
- if len(data) == 0 {
|
|
|
|
- data = ul.SourceMgo.FindById("ai_294", v)
|
|
|
|
- }
|
|
|
|
- detail := qu.ObjToString(data["detail"])
|
|
|
|
- ispkg := prompt.AcquireIsPackageInfo(detail)
|
|
|
|
- if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) {
|
|
|
|
- ok++
|
|
|
|
- } else {
|
|
|
|
- log.Debug("错误~", v)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- log.Debug("is over ~ ", len(tmpArr)-ok)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func TestPackageInfo() {
|
|
|
|
- sess := ul.SourceMgo.GetMgoConn()
|
|
|
|
- defer ul.SourceMgo.DestoryMongoConn(sess)
|
|
|
|
- q, total := map[string]interface{}{"ai_zhipu.ispkg": true}, 0
|
|
|
|
- it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("-_id").Iter()
|
|
|
|
- isok := 0
|
|
|
|
- os.Remove("test.xlsx")
|
|
|
|
- f := new_xlsx.NewFile()
|
|
|
|
- sheet, _ := f.AddSheet("数据信息")
|
|
|
|
- row := sheet.AddRow()
|
|
|
|
- writeRow(row, []string{"序号", "唯一标识", "站点", "项目名称", "一级分类", "二级分类", "原文链接", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
|
|
|
|
- for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
|
|
|
|
- if total%10000 == 0 {
|
|
|
|
- log.Debug("cur index ", total, "~", isok)
|
|
|
|
- }
|
|
|
|
- tmpid := ul.BsonTOStringId(tmp["_id"])
|
|
|
|
- site := qu.ObjToString(tmp["site"])
|
|
|
|
- projectname := qu.ObjToString(tmp["projectname"])
|
|
|
|
- toptype := qu.ObjToString(tmp["toptype"])
|
|
|
|
- subtype := qu.ObjToString(tmp["subtype"])
|
|
|
|
- href := qu.ObjToString(tmp["href"])
|
|
|
|
- jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
|
|
|
|
- ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
|
|
|
|
- if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
|
|
|
|
- if s_info := qu.ObjToMap((*s_pkg)["s_pkg"]); s_info != nil && len(*s_info) > 1 {
|
|
|
|
- isok++
|
|
|
|
- for _, v := range *s_info {
|
|
|
|
- if v1 := qu.ObjToMap(v); v1 != nil {
|
|
|
|
- row = sheet.AddRow()
|
|
|
|
- arr := []string{}
|
|
|
|
- arr = append(arr, fmt.Sprintf("%d", isok))
|
|
|
|
- arr = append(arr, tmpid)
|
|
|
|
- arr = append(arr, site)
|
|
|
|
- arr = append(arr, projectname)
|
|
|
|
- arr = append(arr, toptype)
|
|
|
|
- arr = append(arr, subtype)
|
|
|
|
- arr = append(arr, href)
|
|
|
|
- arr = append(arr, jyhref)
|
|
|
|
- arr = append(arr, qu.ObjToString((*v1)["name"]))
|
|
|
|
- arr = append(arr, qu.ObjToString((*v1)["winner"]))
|
|
|
|
- bidamount := qu.Float64All((*v1)["bidamount"])
|
|
|
|
- if bidamount > 0.0 {
|
|
|
|
- arr = append(arr, fmt.Sprintf("%.2f", bidamount))
|
|
|
|
- } else {
|
|
|
|
- arr = append(arr, "")
|
|
|
|
- }
|
|
|
|
- writeRow(row, arr)
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if isok > 1000 {
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- tmp = make(map[string]interface{})
|
|
|
|
- }
|
|
|
|
- log.Debug("is over ", total, isok)
|
|
|
|
-
|
|
|
|
- if err := f.Save("test.xlsx"); err != nil {
|
|
|
|
- fmt.Println("保存xlsx失败:", err)
|
|
|
|
- } else {
|
|
|
|
- fmt.Println("保存xlsx成功:", err)
|
|
|
|
- }
|
|
|
|
log.Debug("is over ...")
|
|
log.Debug("is over ...")
|
|
}
|
|
}
|
|
|
|
|