main.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. package main
  2. import (
  3. "crypto/sha256"
  4. "encoding/hex"
  5. "fmt"
  6. "log"
  7. "math/big"
  8. "regexp"
  9. "strings"
  10. util "app.yhyue.com/moapp/jybase/common"
  11. "app.yhyue.com/moapp/jybase/encrypt"
  12. mg "app.yhyue.com/moapp/jybase/mongodb"
  13. "github.com/gogf/gf/v2/util/gconv"
  14. "github.com/tealeg/xlsx"
  15. )
  16. type config struct {
  17. Mongodb struct {
  18. Main *mgoConf
  19. }
  20. }
  21. type mgoConf struct {
  22. Address string
  23. Size int
  24. DbName string
  25. UserName string
  26. Password string
  27. Collection string
  28. Collection_back string
  29. }
  30. var (
  31. Sysconfig *config
  32. MQFW mg.MongodbSim
  33. )
  34. func init() {
  35. util.ReadConfig(&Sysconfig)
  36. MQFW = mg.MongodbSim{
  37. MongodbAddr: Sysconfig.Mongodb.Main.Address,
  38. Size: Sysconfig.Mongodb.Main.Size,
  39. DbName: Sysconfig.Mongodb.Main.DbName,
  40. UserName: Sysconfig.Mongodb.Main.UserName,
  41. Password: Sysconfig.Mongodb.Main.Password,
  42. }
  43. MQFW.InitPool()
  44. }
  45. func xiufu() {
  46. //
  47. sess := MQFW.GetMgoConn()
  48. defer MQFW.DestoryMongoConn(sess)
  49. query := map[string]interface{}{
  50. "district": "开发区",
  51. "city": "呼和浩特市",
  52. }
  53. i := 0
  54. it := sess.DB("yusuan").C("yusuan_fileitem").Find(query).Iter()
  55. for tp := make(map[string]interface{}); it.Next(&tp); {
  56. _id := mg.BsonIdToSId(tp["_id"])
  57. file_path := gconv.String(tp["file_path"])
  58. file_path = strings.ReplaceAll(file_path, `/开发区/`, `/呼和浩特经济技术开发区/`)
  59. MQFW.UpdateById("yusuan_fileitem", _id, map[string]interface{}{
  60. "$set": map[string]interface{}{
  61. "file_path": file_path,
  62. "district": "呼和浩特经济技术开发区",
  63. },
  64. })
  65. i++
  66. if i%100 == 0 {
  67. log.Println("i:", i)
  68. }
  69. tp = make(map[string]interface{})
  70. }
  71. log.Println(i)
  72. }
  73. //是否正文匹配表格不匹配 : table:0不带表格 1带表格
  74. //正文 全取 表格 匹配
  75. func projectHash(year, projectname, procure_content, kpi, institution string) string {
  76. projectname = RemoveNewlines(projectname)
  77. procure_content = RemoveNewlines(procure_content)
  78. kpi = RemoveNewlines(kpi)
  79. institution = RemoveNewlines(institution)
  80. return fmt.Sprintf("%s@@%s@@%s@@%s", projectname, procure_content, kpi, institution)
  81. }
  82. func purchasingHash(year, projectname, pro_code, pro_item, institution, number, unitprice string) string {
  83. projectname = RemoveNewlines(projectname)
  84. pro_item = RemoveNewlines(pro_item)
  85. pro_code = RemoveNewlines(pro_code)
  86. institution = RemoveNewlines(institution)
  87. unitprice = gconv.String(gconv.Float64(unitprice))
  88. s := fmt.Sprintf("%s@@%s@@%s@@%s@@%s", projectname, pro_item, number, unitprice, institution)
  89. return s
  90. }
  91. func initZXZ() map[string]bool {
  92. m := map[string]bool{}
  93. for _, v := range []string{"1.xlsx", "2.xlsx", "3.xlsx", "4.xlsx", "5.xlsx", "6.xlsx", "7.xlsx"} {
  94. // 打开 Excel 文件
  95. xlFile, err := xlsx.OpenFile(v)
  96. if err != nil {
  97. fmt.Println("Error:", err)
  98. }
  99. // 获取第一个工作表
  100. sheet := xlFile.Sheets[0]
  101. // 遍历每一行
  102. for i, row := range sheet.Rows {
  103. // 获取 FGHI 列的值
  104. if i >= 3 {
  105. projectname := row.Cells[7].String()
  106. procure_content := row.Cells[8].String()
  107. kpi := row.Cells[9].String()
  108. institution := row.Cells[10].String()
  109. projectname = RemoveNewlines(projectname)
  110. procure_content = RemoveNewlines(procure_content)
  111. kpi = RemoveNewlines(kpi)
  112. institution = RemoveNewlines(institution)
  113. s := fmt.Sprintf("%s@@%s@@%s@@%s", projectname, procure_content, kpi, institution)
  114. m[s] = true
  115. }
  116. }
  117. }
  118. for _, v := range []string{"20230523_1.xlsx", "20240510_1.xlsx", "20240510_2.xlsx"} {
  119. // for _, v := range []string{"20230523_1.xlsx"} {
  120. // 打开 Excel 文件
  121. xlFile, err := xlsx.OpenFile(v)
  122. if err != nil {
  123. fmt.Println("Error:", err)
  124. }
  125. // 获取第一个工作表
  126. sheet := xlFile.Sheets[0]
  127. // 遍历每一行
  128. for i, row := range sheet.Rows {
  129. // 获取 FGHI 列的值
  130. if i >= 2 {
  131. projectname := row.Cells[7].String()
  132. pro_item := row.Cells[8].String()
  133. number := row.Cells[9].String()
  134. totalprice := row.Cells[10].String()
  135. institution := row.Cells[11].String()
  136. projectname = RemoveNewlines(projectname)
  137. pro_item = RemoveNewlines(pro_item)
  138. number = RemoveNewlines(number)
  139. totalprice = RemoveNewlines(totalprice)
  140. institution = RemoveNewlines(institution)
  141. s := fmt.Sprintf("%s@@%s@@%s@@%s@@%s", projectname, pro_item, number, totalprice, institution)
  142. m[s] = true
  143. }
  144. }
  145. }
  146. log.Println(len(m))
  147. return m
  148. }
  149. func initJCL() map[string]bool {
  150. m := map[string]bool{}
  151. for _, v := range []string{"j1.xlsx", "j2.xlsx", "j3.xlsx", "j4.xlsx", "j5.xlsx", "j6.xlsx", "j7.xlsx", "j8.xlsx"} {
  152. // 打开 Excel 文件
  153. xlFile, err := xlsx.OpenFile(v)
  154. if err != nil {
  155. fmt.Println("Error:", err)
  156. }
  157. // 获取第一个工作表
  158. sheet := xlFile.Sheets[0]
  159. // 遍历每一行
  160. for i, row := range sheet.Rows {
  161. // 获取 FGHI 列的值
  162. if i >= 3 {
  163. projectname := row.Cells[7].String()
  164. procure_content := row.Cells[8].String()
  165. kpi := row.Cells[9].String()
  166. institution := row.Cells[10].String()
  167. projectname = RemoveNewlines(projectname)
  168. procure_content = RemoveNewlines(procure_content)
  169. kpi = RemoveNewlines(kpi)
  170. institution = RemoveNewlines(institution)
  171. s := fmt.Sprintf("%s@@%s@@%s@@%s", projectname, procure_content, kpi, institution)
  172. m[s] = true
  173. }
  174. }
  175. }
  176. // for _, v := range []string{"mx_j1.xlsx", "mx_j2.xlsx", "mx_j3.xlsx", "mx_j4.xlsx"} {
  177. for _, v := range []string{"mx_j3.xlsx"} {
  178. // 打开 Excel 文件
  179. xlFile, err := xlsx.OpenFile(v)
  180. if err != nil {
  181. fmt.Println("Error:", err)
  182. }
  183. // 获取第一个工作表
  184. sheet := xlFile.Sheets[0]
  185. // 遍历每一行
  186. for i, row := range sheet.Rows {
  187. // 获取 FGHI 列的值
  188. if i >= 2 {
  189. projectname := row.Cells[7].String()
  190. pro_item := row.Cells[8].String()
  191. number := row.Cells[9].String()
  192. totalprice := row.Cells[10].String()
  193. institution := row.Cells[11].String()
  194. projectname = RemoveNewlines(projectname)
  195. pro_item = RemoveNewlines(pro_item)
  196. number = RemoveNewlines(number)
  197. totalprice = RemoveNewlines(totalprice)
  198. institution = RemoveNewlines(institution)
  199. if institution == "石家庄市城市更新促进中心" {
  200. log.Println(row.Cells[10])
  201. log.Println(row.Cells[10].String())
  202. }
  203. s := fmt.Sprintf("%s@@%s@@%s@@%s@@%s", projectname, pro_item, number, totalprice, institution)
  204. m[s] = true
  205. if institution == "石家庄市城市更新促进中心" {
  206. log.Println("---", s)
  207. }
  208. }
  209. }
  210. }
  211. log.Println(len(m))
  212. return m
  213. }
  214. // SafeDivide 安全的除法操作,解决浮点数精度丢失问题
  215. func SafeDivide(a, b float64) float64 {
  216. // 将 float64 转换为 big.Rat
  217. aRat := big.NewRat(int64(a*1e9), 1e9)
  218. bRat := big.NewRat(int64(b*1e9), 1e9)
  219. // 执行除法操作
  220. result := new(big.Rat).Quo(aRat, bRat)
  221. // 将结果转换回 float64
  222. resultFloat, _ := result.Float64()
  223. return resultFloat
  224. }
  225. func main() {
  226. dbname := "hp_zxl_2"
  227. JCL_hashMap := initJCL()
  228. // for k, _ := range JCL_hashMap {
  229. // if strings.Contains(k, "石家庄市人民代表大会常务委员会") {
  230. // log.Println(k)
  231. // }
  232. // }
  233. // purchasing, _ := MQFW.Find("purchasing_huipu", map[string]interface{}{
  234. // "file_path": "2024年/河北/石家庄市/政府/2024_石家庄市人大常委会_1.pdf",
  235. // "unitprice": "0.40",
  236. // }, nil, nil, false, -1, -1)
  237. // log.Println("len:", len(*purchasing))
  238. // for _, pv := range *purchasing {
  239. // pv["type"] = "purchasing"
  240. // year := gconv.String(pv["year"])
  241. // projectname := gconv.String(pv["projectname"])
  242. // pro_code := gconv.String(pv["pro_code"])
  243. // pro_item := gconv.String(pv["pro_item"])
  244. // institution := gconv.String(pv["institution"])
  245. // number := gconv.String(pv["number"])
  246. // totalprice := gconv.String(pv["totalprice"])
  247. // projectname = RemoveNewlines(projectname)
  248. // pro_item = RemoveNewlines(pro_item)
  249. // institution = RemoveNewlines(institution)
  250. // pv["projectname"] = projectname
  251. // pv["pro_item"] = pro_item
  252. // pv["institution"] = institution
  253. // hs := purchasingHash(year, projectname, pro_code, pro_item, institution, number, totalprice)
  254. // log.Println("hs", hs)
  255. // log.Println(JCL_hashMap[hs])
  256. // }
  257. // return
  258. hashMap := initZXZ()
  259. saveMap := map[string]bool{}
  260. //
  261. sess := MQFW.GetMgoConn()
  262. defer MQFW.DestoryMongoConn(sess)
  263. query := map[string]interface{}{
  264. "exists_key": 1,
  265. }
  266. it := sess.DB("yusuan").C("yusuan_txt").Find(query).Select(map[string]interface{}{
  267. "detail": 1,
  268. "file_path": 1,
  269. }).Iter()
  270. notfind := 0
  271. ccc := 0
  272. for tp := make(map[string]interface{}); it.Next(&tp); {
  273. file_path := gconv.String(tp["file_path"])
  274. detail := gconv.Maps(tp["detail"])
  275. ccc++
  276. if ccc%100 == 0 {
  277. log.Println("txt 解析:", ccc)
  278. }
  279. project, _ := MQFW.Find("yusuan_project", map[string]interface{}{
  280. "file_path": file_path,
  281. }, nil, nil, false, -1, -1)
  282. purchasing, _ := MQFW.Find("yusuan_purchasing", map[string]interface{}{
  283. "file_path": file_path,
  284. }, nil, nil, false, -1, -1)
  285. if (project == nil || len(*project) == 0) && (purchasing == nil || len(*purchasing) == 0) {
  286. fmt.Println(file_path)
  287. notfind++
  288. continue
  289. }
  290. for _, v := range detail {
  291. key_words := gconv.String(v["key_words"])
  292. paragraph := gconv.String(v["paragraph"])
  293. table := gconv.Int(v["table"])
  294. //是否正文匹配表格不匹配 : table:0不带表格 1带表格
  295. //正文 全取 表格 匹配
  296. if table == 0 {
  297. if project != nil && len(*project) > 0 {
  298. for _, pv := range *project {
  299. pv["matchkey"] = key_words
  300. pv["paragraph"] = paragraph
  301. pv["table"] = table
  302. pv["type"] = "project"
  303. year := gconv.String((pv)["year"])
  304. projectname := gconv.String((pv)["projectname"])
  305. procure_content := gconv.String((pv)["procure_content"])
  306. kpi := gconv.String((pv)["kpi"])
  307. institution := gconv.String((pv)["institution"])
  308. projectname = RemoveNewlines(projectname)
  309. institution = RemoveNewlines(institution)
  310. pv["projectname"] = projectname
  311. pv["institution"] = institution
  312. hs := projectHash(year, projectname, procure_content, kpi, institution)
  313. if saveMap[hs] {
  314. continue
  315. }
  316. saveMap[hs] = true
  317. pv["zxz"] = util.If(hashMap[hs], "是", "否")
  318. pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
  319. if !JCL_hashMap[hs] {
  320. hash_code := gconv.String(pv["hash_code"])
  321. if MQFW.Count("project_huipu", map[string]interface{}{
  322. "hash_code": hash_code,
  323. }) > 0 {
  324. pv["jcl"] = "是"
  325. }
  326. }
  327. id := MQFW.Save(dbname, pv)
  328. if id != "" {
  329. MQFW.UpdateById(dbname, id, map[string]interface{}{
  330. "$set": map[string]interface{}{
  331. "eid": encrypt.EncodeArticleId2ByCheck(id),
  332. },
  333. })
  334. }
  335. }
  336. }
  337. if purchasing != nil && len(*purchasing) > 0 {
  338. for _, pv := range *purchasing {
  339. pv["matchkey"] = key_words
  340. pv["paragraph"] = paragraph
  341. pv["table"] = table
  342. pv["type"] = "purchasing"
  343. year := gconv.String(pv["year"])
  344. projectname := gconv.String(pv["projectname"])
  345. pro_code := gconv.String(pv["pro_code"])
  346. pro_item := gconv.String(pv["pro_item"])
  347. institution := gconv.String(pv["institution"])
  348. number := gconv.String(pv["number"])
  349. totalprice := gconv.String(pv["totalprice"])
  350. projectname = RemoveNewlines(projectname)
  351. pro_item = RemoveNewlines(pro_item)
  352. institution = RemoveNewlines(institution)
  353. pv["projectname"] = projectname
  354. pv["pro_item"] = pro_item
  355. pv["institution"] = institution
  356. hs := purchasingHash(year, projectname, pro_code, pro_item, institution, number, totalprice)
  357. if saveMap[hs] {
  358. continue
  359. }
  360. saveMap[hs] = true
  361. pv["zxz"] = util.If(hashMap[hs], "是", "否")
  362. pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
  363. if !JCL_hashMap[hs] {
  364. hash_code := gconv.String(pv["hash_code"])
  365. if MQFW.Count("purchasing_huipu", map[string]interface{}{
  366. "hash_code": hash_code,
  367. }) > 0 {
  368. pv["jcl"] = "是"
  369. }
  370. }
  371. id := MQFW.Save(dbname, pv)
  372. if id != "" {
  373. MQFW.UpdateById(dbname, id, map[string]interface{}{
  374. "$set": map[string]interface{}{
  375. "eid": encrypt.EncodeArticleId2ByCheck(id),
  376. },
  377. })
  378. }
  379. }
  380. }
  381. } else {
  382. //表格 匹配
  383. for _, vkey := range strings.Split(key_words, ",") {
  384. if project != nil && len(*project) > 0 {
  385. for _, pv := range *project {
  386. pv["matchkey"] = key_words
  387. pv["paragraph"] = paragraph
  388. pv["table"] = table
  389. pv["type"] = "project"
  390. year := gconv.String((pv)["year"])
  391. projectname := gconv.String((pv)["projectname"])
  392. procure_content := gconv.String((pv)["procure_content"])
  393. kpi := gconv.String((pv)["kpi"])
  394. institution := gconv.String((pv)["institution"])
  395. projectname = RemoveNewlines(projectname)
  396. institution = RemoveNewlines(institution)
  397. pv["projectname"] = projectname
  398. pv["institution"] = institution
  399. if !strings.Contains(projectname, vkey) && !strings.Contains(procure_content, vkey) && !strings.Contains(kpi, vkey) {
  400. continue
  401. pv["table_jiexi"] = 1 //漏解析
  402. }
  403. hs := projectHash(year, projectname, procure_content, kpi, institution)
  404. if saveMap[hs] {
  405. continue
  406. }
  407. if gconv.String(pv["fileitem_id"]) == "660491d7138c4f04f70f5838" {
  408. log.Println(hs)
  409. }
  410. saveMap[hs] = true
  411. pv["zxz"] = util.If(hashMap[hs], "是", "否")
  412. pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
  413. if !JCL_hashMap[hs] {
  414. hash_code := gconv.String(pv["hash_code"])
  415. if MQFW.Count("project_huipu", map[string]interface{}{
  416. "hash_code": hash_code,
  417. }) > 0 {
  418. pv["jcl"] = "是"
  419. }
  420. }
  421. id := MQFW.Save(dbname, pv)
  422. if id != "" {
  423. MQFW.UpdateById(dbname, id, map[string]interface{}{
  424. "$set": map[string]interface{}{
  425. "eid": encrypt.EncodeArticleId2ByCheck(id),
  426. },
  427. })
  428. }
  429. }
  430. }
  431. if purchasing != nil && len(*purchasing) > 0 {
  432. for _, pv := range *purchasing {
  433. pv["matchkey"] = key_words
  434. pv["paragraph"] = paragraph
  435. pv["table"] = table
  436. pv["type"] = "purchasing"
  437. year := gconv.String(pv["year"])
  438. projectname := gconv.String(pv["projectname"])
  439. pro_code := gconv.String(pv["pro_code"])
  440. pro_item := gconv.String(pv["pro_item"])
  441. institution := gconv.String(pv["institution"])
  442. number := gconv.String(pv["number"])
  443. totalprice := gconv.String(pv["totalprice"])
  444. projectname = RemoveNewlines(projectname)
  445. institution = RemoveNewlines(institution)
  446. pro_item = RemoveNewlines(pro_item)
  447. pv["projectname"] = projectname
  448. pv["institution"] = institution
  449. pv["pro_item"] = pro_item
  450. if !strings.Contains(projectname, vkey) && !strings.Contains(pro_item, vkey) {
  451. continue
  452. pv["table_jiexi"] = 1 //漏解析
  453. }
  454. hs := purchasingHash(year, projectname, pro_code, pro_item, institution, number, totalprice)
  455. if saveMap[hs] {
  456. continue
  457. }
  458. saveMap[hs] = true
  459. pv["zxz"] = util.If(hashMap[hs], "是", "否")
  460. pv["jcl"] = util.If(JCL_hashMap[hs], "是", "否")
  461. if !JCL_hashMap[hs] {
  462. hash_code := gconv.String(pv["hash_code"])
  463. if MQFW.Count("purchasing_huipu", map[string]interface{}{
  464. "hash_code": hash_code,
  465. }) > 0 {
  466. pv["jcl"] = "是"
  467. }
  468. }
  469. id := MQFW.Save(dbname, pv)
  470. if id != "" {
  471. MQFW.UpdateById(dbname, id, map[string]interface{}{
  472. "$set": map[string]interface{}{
  473. "eid": encrypt.EncodeArticleId2ByCheck(id),
  474. },
  475. })
  476. }
  477. }
  478. }
  479. }
  480. }
  481. }
  482. tp = make(map[string]interface{})
  483. }
  484. log.Println("未解析到数据数量:", notfind)
  485. }
  486. //生成hashCode
  487. func HashCode(input string) string {
  488. hash := sha256.Sum256([]byte(input))
  489. hashString := hex.EncodeToString(hash[:])
  490. return hashString
  491. }
  492. func RemoveNewlines(str string) string {
  493. regex := regexp.MustCompile(`\r?\n`)
  494. return regex.ReplaceAllString(str, "")
  495. }