123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564 |
- package main
- import (
- "crypto/sha256"
- "encoding/hex"
- "fmt"
- "log"
- "math/big"
- "regexp"
- "strings"
- util "app.yhyue.com/moapp/jybase/common"
- "app.yhyue.com/moapp/jybase/encrypt"
- mg "app.yhyue.com/moapp/jybase/mongodb"
- "github.com/gogf/gf/v2/util/gconv"
- "github.com/tealeg/xlsx"
- )
- type config struct {
- Mongodb struct {
- Main *mgoConf
- }
- }
- type mgoConf struct {
- Address string
- Size int
- DbName string
- UserName string
- Password string
- Collection string
- Collection_back string
- }
- var (
- Sysconfig *config
- MQFW mg.MongodbSim
- )
- func init() {
- util.ReadConfig(&Sysconfig)
- MQFW = mg.MongodbSim{
- MongodbAddr: Sysconfig.Mongodb.Main.Address,
- Size: Sysconfig.Mongodb.Main.Size,
- DbName: Sysconfig.Mongodb.Main.DbName,
- UserName: Sysconfig.Mongodb.Main.UserName,
- Password: Sysconfig.Mongodb.Main.Password,
- }
- MQFW.InitPool()
- }
- func xiufu() {
- //
- sess := MQFW.GetMgoConn()
- defer MQFW.DestoryMongoConn(sess)
- query := map[string]interface{}{
- "district": "开发区",
- "city": "呼和浩特市",
- }
- i := 0
- it := sess.DB("yusuan").C("yusuan_fileitem").Find(query).Iter()
- for tp := make(map[string]interface{}); it.Next(&tp); {
- _id := mg.BsonIdToSId(tp["_id"])
- file_path := gconv.String(tp["file_path"])
- file_path = strings.ReplaceAll(file_path, `/开发区/`, `/呼和浩特经济技术开发区/`)
- MQFW.UpdateById("yusuan_fileitem", _id, map[string]interface{}{
- "$set": map[string]interface{}{
- "file_path": file_path,
- "district": "呼和浩特经济技术开发区",
- },
- })
- i++
- if i%100 == 0 {
- log.Println("i:", i)
- }
- tp = make(map[string]interface{})
- }
- log.Println(i)
- }
- //是否正文匹配表格不匹配 : table:0不带表格 1带表格
- //正文 全取 表格 匹配
- func projectHash(year, projectname, procure_content, kpi, institution string) string {
- projectname = RemoveNewlines(projectname)
- procure_content = RemoveNewlines(procure_content)
- kpi = RemoveNewlines(kpi)
- institution = RemoveNewlines(institution)
- return fmt.Sprintf("%s@@%s@@%s@@%s", projectname, procure_content, kpi, institution)
- }
- func purchasingHash(year, projectname, pro_code, pro_item, institution, number, unitprice string) string {
- projectname = RemoveNewlines(projectname)
- pro_item = RemoveNewlines(pro_item)
- pro_code = RemoveNewlines(pro_code)
- institution = RemoveNewlines(institution)
- unitprice = gconv.String(gconv.Float64(unitprice))
- s := fmt.Sprintf("%s@@%s@@%s@@%s@@%s", projectname, pro_item, number, unitprice, institution)
- return s
- }
- func initZXZ() map[string]bool {
- m := map[string]bool{}
- for _, v := range []string{"1.xlsx", "2.xlsx", "3.xlsx", "4.xlsx", "5.xlsx", "6.xlsx", "7.xlsx"} {
- // 打开 Excel 文件
- xlFile, err := xlsx.OpenFile(v)
- if err != nil {
- fmt.Println("Error:", err)
- }
- // 获取第一个工作表
- sheet := xlFile.Sheets[0]
- // 遍历每一行
- for i, row := range sheet.Rows {
- // 获取 FGHI 列的值
- if i >= 3 {
- projectname := row.Cells[7].String()
- procure_content := row.Cells[8].String()
- kpi := row.Cells[9].String()
- institution := row.Cells[10].String()
- projectname = RemoveNewlines(projectname)
- procure_content = RemoveNewlines(procure_content)
- kpi = RemoveNewlines(kpi)
- institution = RemoveNewlines(institution)
- s := fmt.Sprintf("%s@@%s@@%s@@%s", projectname, procure_content, kpi, institution)
- m[s] = true
- }
- }
- }
- for _, v := range []string{"20230523_1.xlsx", "20240510_1.xlsx", "20240510_2.xlsx"} {
- // for _, v := range []string{"20230523_1.xlsx"} {
- // 打开 Excel 文件
- xlFile, err := xlsx.OpenFile(v)
- if err != nil {
- fmt.Println("Error:", err)
- }
- // 获取第一个工作表
- sheet := xlFile.Sheets[0]
- // 遍历每一行
- for i, row := range sheet.Rows {
- // 获取 FGHI 列的值
- if i >= 2 {
- projectname := row.Cells[7].String()
- pro_item := row.Cells[8].String()
- number := row.Cells[9].String()
- totalprice := row.Cells[10].String()
- institution := row.Cells[11].String()
- projectname = RemoveNewlines(projectname)
- pro_item = RemoveNewlines(pro_item)
- number = RemoveNewlines(number)
- totalprice = RemoveNewlines(totalprice)
- institution = RemoveNewlines(institution)
- s := fmt.Sprintf("%s@@%s@@%s@@%s@@%s", projectname, pro_item, number, totalprice, institution)
- m[s] = true
- }
- }
- }
- log.Println(len(m))
- return m
- }
- func initJCL() map[string]bool {
- m := map[string]bool{}
- for _, v := range []string{"j1.xlsx", "j2.xlsx", "j3.xlsx", "j4.xlsx", "j5.xlsx", "j6.xlsx", "j7.xlsx", "j8.xlsx"} {
- // 打开 Excel 文件
- xlFile, err := xlsx.OpenFile(v)
- if err != nil {
- fmt.Println("Error:", err)
- }
- // 获取第一个工作表
- sheet := xlFile.Sheets[0]
- // 遍历每一行
- for i, row := range sheet.Rows {
- // 获取 FGHI 列的值
- if i >= 3 {
- projectname := row.Cells[7].String()
- procure_content := row.Cells[8].String()
- kpi := row.Cells[9].String()
- institution := row.Cells[10].String()
- projectname = RemoveNewlines(projectname)
- procure_content = RemoveNewlines(procure_content)
- kpi = RemoveNewlines(kpi)
- institution = RemoveNewlines(institution)
- s := fmt.Sprintf("%s@@%s@@%s@@%s", projectname, procure_content, kpi, institution)
- m[s] = true
- }
- }
- }
- // for _, v := range []string{"mx_j1.xlsx", "mx_j2.xlsx", "mx_j3.xlsx", "mx_j4.xlsx"} {
- for _, v := range []string{"mx_j3.xlsx"} {
- // 打开 Excel 文件
- xlFile, err := xlsx.OpenFile(v)
- if err != nil {
- fmt.Println("Error:", err)
- }
- // 获取第一个工作表
- sheet := xlFile.Sheets[0]
- // 遍历每一行
- for i, row := range sheet.Rows {
- // 获取 FGHI 列的值
- if i >= 2 {
- projectname := row.Cells[7].String()
- pro_item := row.Cells[8].String()
- number := row.Cells[9].String()
- totalprice := row.Cells[10].String()
- institution := row.Cells[11].String()
- projectname = RemoveNewlines(projectname)
- pro_item = RemoveNewlines(pro_item)
- number = RemoveNewlines(number)
- totalprice = RemoveNewlines(totalprice)
- institution = RemoveNewlines(institution)
- if institution == "石家庄市城市更新促进中心" {
- log.Println(row.Cells[10])
- log.Println(row.Cells[10].String())
- }
- s := fmt.Sprintf("%s@@%s@@%s@@%s@@%s", projectname, pro_item, number, totalprice, institution)
- m[s] = true
- if institution == "石家庄市城市更新促进中心" {
- log.Println("---", s)
- }
- }
- }
- }
- log.Println(len(m))
- return m
- }
- // SafeDivide 安全的除法操作,解决浮点数精度丢失问题
- func SafeDivide(a, b float64) float64 {
- // 将 float64 转换为 big.Rat
- aRat := big.NewRat(int64(a*1e9), 1e9)
- bRat := big.NewRat(int64(b*1e9), 1e9)
- // 执行除法操作
- result := new(big.Rat).Quo(aRat, bRat)
- // 将结果转换回 float64
- resultFloat, _ := result.Float64()
- return resultFloat
- }
- func main() {
- dbname := "hp_zxl_2"
- JCL_hashMap := initJCL()
- // for k, _ := range JCL_hashMap {
- // if strings.Contains(k, "石家庄市人民代表大会常务委员会") {
- // log.Println(k)
- // }
- // }
- // purchasing, _ := MQFW.Find("purchasing_huipu", map[string]interface{}{
- // "file_path": "2024年/河北/石家庄市/政府/2024_石家庄市人大常委会_1.pdf",
- // "unitprice": "0.40",
- // }, nil, nil, false, -1, -1)
- // log.Println("len:", len(*purchasing))
- // for _, pv := range *purchasing {
- // pv["type"] = "purchasing"
- // year := gconv.String(pv["year"])
- // projectname := gconv.String(pv["projectname"])
- // pro_code := gconv.String(pv["pro_code"])
- // pro_item := gconv.String(pv["pro_item"])
- // institution := gconv.String(pv["institution"])
- // number := gconv.String(pv["number"])
- // totalprice := gconv.String(pv["totalprice"])
- // projectname = RemoveNewlines(projectname)
- // pro_item = RemoveNewlines(pro_item)
- // institution = RemoveNewlines(institution)
- // pv["projectname"] = projectname
- // pv["pro_item"] = pro_item
- // pv["institution"] = institution
- // hs := purchasingHash(year, projectname, pro_code, pro_item, institution, number, totalprice)
- // log.Println("hs", hs)
- // log.Println(JCL_hashMap[hs])
- // }
- // return
- hashMap := initZXZ()
- saveMap := map[string]bool{}
- //
- sess := MQFW.GetMgoConn()
- defer MQFW.DestoryMongoConn(sess)
- query := map[string]interface{}{
- "exists_key": 1,
- }
- it := sess.DB("yusuan").C("yusuan_txt").Find(query).Select(map[string]interface{}{
- "detail": 1,
- "file_path": 1,
- }).Iter()
- notfind := 0
- ccc := 0
- for tp := make(map[string]interface{}); it.Next(&tp); {
- file_path := gconv.String(tp["file_path"])
- detail := gconv.Maps(tp["detail"])
- ccc++
- if ccc%100 == 0 {
- log.Println("txt 解析:", ccc)
- }
- project, _ := MQFW.Find("yusuan_project", map[string]interface{}{
- "file_path": file_path,
- }, nil, nil, false, -1, -1)
- purchasing, _ := MQFW.Find("yusuan_purchasing", map[string]interface{}{
- "file_path": file_path,
- }, nil, nil, false, -1, -1)
- if (project == nil || len(*project) == 0) && (purchasing == nil || len(*purchasing) == 0) {
- fmt.Println(file_path)
- notfind++
- continue
- }
- for _, v := range detail {
- key_words := gconv.String(v["key_words"])
- paragraph := gconv.String(v["paragraph"])
- table := gconv.Int(v["table"])
- //是否正文匹配表格不匹配 : table:0不带表格 1带表格
- //正文 全取 表格 匹配
- if table == 0 {
- if project != nil && len(*project) > 0 {
- for _, pv := range *project {
- pv["matchkey"] = key_words
- pv["paragraph"] = paragraph
- pv["table"] = table
- pv["type"] = "project"
- year := gconv.String((pv)["year"])
- projectname := gconv.String((pv)["projectname"])
- procure_content := gconv.String((pv)["procure_content"])
- kpi := gconv.String((pv)["kpi"])
- institution := gconv.String((pv)["institution"])
- projectname = RemoveNewlines(projectname)
- institution = RemoveNewlines(institution)
- pv["projectname"] = projectname
- pv["institution"] = institution
- hs := projectHash(year, projectname, procure_content, kpi, institution)
- if saveMap[hs] {
- continue
- }
- saveMap[hs] = true
- pv["zxz"] = util.If(hashMap[hs], "是", "否")
- pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
- if !JCL_hashMap[hs] {
- hash_code := gconv.String(pv["hash_code"])
- if MQFW.Count("project_huipu", map[string]interface{}{
- "hash_code": hash_code,
- }) > 0 {
- pv["jcl"] = "是"
- }
- }
- id := MQFW.Save(dbname, pv)
- if id != "" {
- MQFW.UpdateById(dbname, id, map[string]interface{}{
- "$set": map[string]interface{}{
- "eid": encrypt.EncodeArticleId2ByCheck(id),
- },
- })
- }
- }
- }
- if purchasing != nil && len(*purchasing) > 0 {
- for _, pv := range *purchasing {
- pv["matchkey"] = key_words
- pv["paragraph"] = paragraph
- pv["table"] = table
- pv["type"] = "purchasing"
- year := gconv.String(pv["year"])
- projectname := gconv.String(pv["projectname"])
- pro_code := gconv.String(pv["pro_code"])
- pro_item := gconv.String(pv["pro_item"])
- institution := gconv.String(pv["institution"])
- number := gconv.String(pv["number"])
- totalprice := gconv.String(pv["totalprice"])
- projectname = RemoveNewlines(projectname)
- pro_item = RemoveNewlines(pro_item)
- institution = RemoveNewlines(institution)
- pv["projectname"] = projectname
- pv["pro_item"] = pro_item
- pv["institution"] = institution
- hs := purchasingHash(year, projectname, pro_code, pro_item, institution, number, totalprice)
- if saveMap[hs] {
- continue
- }
- saveMap[hs] = true
- pv["zxz"] = util.If(hashMap[hs], "是", "否")
- pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
- if !JCL_hashMap[hs] {
- hash_code := gconv.String(pv["hash_code"])
- if MQFW.Count("purchasing_huipu", map[string]interface{}{
- "hash_code": hash_code,
- }) > 0 {
- pv["jcl"] = "是"
- }
- }
- id := MQFW.Save(dbname, pv)
- if id != "" {
- MQFW.UpdateById(dbname, id, map[string]interface{}{
- "$set": map[string]interface{}{
- "eid": encrypt.EncodeArticleId2ByCheck(id),
- },
- })
- }
- }
- }
- } else {
- //表格 匹配
- for _, vkey := range strings.Split(key_words, ",") {
- if project != nil && len(*project) > 0 {
- for _, pv := range *project {
- pv["matchkey"] = key_words
- pv["paragraph"] = paragraph
- pv["table"] = table
- pv["type"] = "project"
- year := gconv.String((pv)["year"])
- projectname := gconv.String((pv)["projectname"])
- procure_content := gconv.String((pv)["procure_content"])
- kpi := gconv.String((pv)["kpi"])
- institution := gconv.String((pv)["institution"])
- projectname = RemoveNewlines(projectname)
- institution = RemoveNewlines(institution)
- pv["projectname"] = projectname
- pv["institution"] = institution
- if !strings.Contains(projectname, vkey) && !strings.Contains(procure_content, vkey) && !strings.Contains(kpi, vkey) {
- continue
- pv["table_jiexi"] = 1 //漏解析
- }
- hs := projectHash(year, projectname, procure_content, kpi, institution)
- if saveMap[hs] {
- continue
- }
- if gconv.String(pv["fileitem_id"]) == "660491d7138c4f04f70f5838" {
- log.Println(hs)
- }
- saveMap[hs] = true
- pv["zxz"] = util.If(hashMap[hs], "是", "否")
- pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
- if !JCL_hashMap[hs] {
- hash_code := gconv.String(pv["hash_code"])
- if MQFW.Count("project_huipu", map[string]interface{}{
- "hash_code": hash_code,
- }) > 0 {
- pv["jcl"] = "是"
- }
- }
- id := MQFW.Save(dbname, pv)
- if id != "" {
- MQFW.UpdateById(dbname, id, map[string]interface{}{
- "$set": map[string]interface{}{
- "eid": encrypt.EncodeArticleId2ByCheck(id),
- },
- })
- }
- }
- }
- if purchasing != nil && len(*purchasing) > 0 {
- for _, pv := range *purchasing {
- pv["matchkey"] = key_words
- pv["paragraph"] = paragraph
- pv["table"] = table
- pv["type"] = "purchasing"
- year := gconv.String(pv["year"])
- projectname := gconv.String(pv["projectname"])
- pro_code := gconv.String(pv["pro_code"])
- pro_item := gconv.String(pv["pro_item"])
- institution := gconv.String(pv["institution"])
- number := gconv.String(pv["number"])
- totalprice := gconv.String(pv["totalprice"])
- projectname = RemoveNewlines(projectname)
- institution = RemoveNewlines(institution)
- pro_item = RemoveNewlines(pro_item)
- pv["projectname"] = projectname
- pv["institution"] = institution
- pv["pro_item"] = pro_item
- if !strings.Contains(projectname, vkey) && !strings.Contains(pro_item, vkey) {
- continue
- pv["table_jiexi"] = 1 //漏解析
- }
- hs := purchasingHash(year, projectname, pro_code, pro_item, institution, number, totalprice)
- if saveMap[hs] {
- continue
- }
- saveMap[hs] = true
- pv["zxz"] = util.If(hashMap[hs], "是", "否")
- pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
- if !JCL_hashMap[hs] {
- hash_code := gconv.String(pv["hash_code"])
- if MQFW.Count("purchasing_huipu", map[string]interface{}{
- "hash_code": hash_code,
- }) > 0 {
- pv["jcl"] = "是"
- }
- }
- id := MQFW.Save(dbname, pv)
- if id != "" {
- MQFW.UpdateById(dbname, id, map[string]interface{}{
- "$set": map[string]interface{}{
- "eid": encrypt.EncodeArticleId2ByCheck(id),
- },
- })
- }
- }
- }
- }
- }
- }
- tp = make(map[string]interface{})
- }
- log.Println("未解析到数据数量:", notfind)
- }
- //生成hashCode
- func HashCode(input string) string {
- hash := sha256.Sum256([]byte(input))
- hashString := hex.EncodeToString(hash[:])
- return hashString
- }
- func RemoveNewlines(str string) string {
- regex := regexp.MustCompile(`\r?\n`)
- return regex.ReplaceAllString(str, "")
- }
|