Răsfoiți Sursa

修复sh启动缺少log目录导致的脚本无法启动问题

dzr 3 zile în urmă
părinte
comite
64d0493ac4
100 a modificat fișierele cu 2609 adăugiri și 974 ștergeri
  1. 2 2
      lzz_theme/bqgyjtgscgdzswpt/start.sh
  2. 1 1
      lzz_theme/clgjzbcgjtyxgs/clgj_cookies.txt
  3. 1 1
      lzz_theme/clgjzbcgjtyxgs/list_start.sh
  4. 2 2
      lzz_theme/clgjzbcgjtyxgs/start.sh
  5. 5 5
      lzz_theme/cqszfcgyptfwcs/start.sh
  6. 37 16
      lzz_theme/crontab.txt
  7. 2 2
      lzz_theme/gdgczbzxyxgs/start.sh
  8. 10 10
      lzz_theme/gdszfcgw/gdszfcgw_daliy_crawl.py
  9. 277 104
      lzz_theme/gdszfcgw/gdszfcgw_details_spider.py
  10. 272 41
      lzz_theme/gdszfcgw/list_spider.py
  11. 2 2
      lzz_theme/gdszfcgw/start.sh
  12. 5 5
      lzz_theme/gnzggzyjyzx/start.sh
  13. 1 1
      lzz_theme/hnszfcgdzmc/dt_start.sh
  14. 1 1
      lzz_theme/hnszfcgdzmc/hn_collector.py
  15. 3 3
      lzz_theme/hnszfcgdzmc/start.sh
  16. 58 0
      lzz_theme/hnszfcgdzmc_new/hn_new_collector.py
  17. 260 0
      lzz_theme/hnszfcgdzmc_new/spider.py
  18. 4 0
      lzz_theme/hnszfcgdzmc_new/start.sh
  19. 21 0
      lzz_theme/hnszfcgdzmc_new/异常公告.py
  20. 21 0
      lzz_theme/hnszfcgdzmc_new/成交公告.py
  21. 21 0
      lzz_theme/hnszfcgdzmc_new/终止公告.py
  22. 21 0
      lzz_theme/hnszfcgdzmc_new/邀请公告.py
  23. 22 0
      lzz_theme/hnszfcgdzmc_new/验收公告.py
  24. 1 1
      lzz_theme/htdzcgpt/htdz_login.py
  25. 12 12
      lzz_theme/htdzcgpt/start.sh
  26. 1 1
      lzz_theme/jsxmhjyxdjbbaxt/det_start.sh
  27. 1 1
      lzz_theme/jsxmhjyxdjbbaxt/rstart.sh
  28. 1 1
      lzz_theme/jsxmhjyxdjbbaxt/start.sh
  29. 1 1
      lzz_theme/lcdzcgpt/lcdz_login.py
  30. 1 1
      lzz_theme/lcdzcgpt/start.sh
  31. 7 7
      lzz_theme/qgzbgggsssyq/start.sh
  32. 5 3
      lzz_theme/qgzbgggsssyq/start_spider.sh
  33. 1 4
      lzz_theme/qjwqzbcgxxw/start.sh
  34. 12 88
      lzz_theme/requirements.txt
  35. 1 1
      lzz_theme/sfc/sfc_cookies.txt
  36. 1 1
      lzz_theme/sfc/sfc_uuid.txt
  37. BIN
      lzz_theme/sfc/slice.png
  38. 9 9
      lzz_theme/sfc/start.sh
  39. 1 1
      lzz_theme/sgycw/sgycw_login.py
  40. 2 2
      lzz_theme/sgycw/start.sh
  41. 2 2
      lzz_theme/szycycgpt/start.sh
  42. 3 3
      lzz_theme/tjszfcgw/start.sh
  43. 32 33
      lzz_theme/utils/tools.py
  44. 0 6
      lzz_theme/xgyyglj/start.sh
  45. 0 1
      lzz_theme/xgyyglj/translate/baidufanyi_ck.json
  46. 0 68
      lzz_theme/xgyyglj/translate/baidutrans.js
  47. 0 115
      lzz_theme/xgyyglj/translate/bd_translate.py
  48. 0 177
      lzz_theme/xgyyglj/xgyy_spider_details.py
  49. 0 150
      lzz_theme/xgyyglj/xgyy_spider_list.py
  50. 1 1
      lzz_theme/ynszfcgw/det_start.sh
  51. 1 1
      lzz_theme/ynszfcgw/start.sh
  52. 1 1
      lzz_theme/ynszfcgw/start1.sh
  53. 1 1
      lzz_theme/ynszfcgw/start2.sh
  54. 2 2
      lzz_theme/ynszfcgw/start3.sh
  55. 1 1
      lzz_theme/yyc/Yyc_dtcookie.py
  56. 4 4
      lzz_theme/yyc/start.sh
  57. 2 2
      lzz_theme/yzcbjkjfzyxgs/start.sh
  58. 1 1
      lzz_theme/yzcbjkjfzyxgs/yzcbjkjfzyxgs_ck.py
  59. 93 0
      lzz_theme/yzw/login.py
  60. 43 0
      lzz_theme/yzw/reset_count.py
  61. 0 0
      lzz_theme/yzw/runhekeji_ck.json
  62. 11 0
      lzz_theme/yzw/start.sh
  63. 362 0
      lzz_theme/yzw/yzw_xj_details.py
  64. 164 0
      lzz_theme/yzw/yzw_xj_list.py
  65. 437 0
      lzz_theme/yzw/yzw_zm_details.py
  66. 168 0
      lzz_theme/yzw/yzw_zm_list.py
  67. 2 2
      lzz_theme/zgdtjtgsdzswpt/start.sh
  68. 2 2
      lzz_theme/zgdtjtgsdzswpt_m30/start_m30.sh
  69. 1 5
      lzz_theme/zgdzkjjtyxgsdzcgpt/limit_details.py
  70. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/list_start.sh
  71. 2 2
      lzz_theme/zgdzkjjtyxgsdzcgpt/login_account.py
  72. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/ret_start.sh
  73. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/start.sh
  74. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/zdk_reset.py
  75. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/zgdk_cookies.json
  76. 1 1
      lzz_theme/zgdzkjjtyxgsdzcgpt/zgdk_list_spider.py
  77. 2 2
      lzz_theme/zgsyzbtbw/start.sh
  78. 3 3
      lzz_theme/zgwkjtyxgs/detail_start.sh
  79. 1 1
      lzz_theme/zgwkjtyxgs/his_start.sh
  80. 1 1
      lzz_theme/zgwkjtyxgs/list_start.sh
  81. 1 1
      lzz_theme/zgwkjtyxgs/retry_start.sh
  82. 3 3
      lzz_theme/zgzbtbggfwpt/detail_start.sh
  83. 1 1
      lzz_theme/zgzbtbggfwpt/retry_start.sh
  84. 48 0
      lzz_theme/zgzbtbggfwpt/spider_detail_bu.py
  85. 27 0
      lzz_theme/zgzbtbggfwpt_wagf/spider_list_b.py
  86. 28 0
      lzz_theme/zgzbtbggfwpt_wagf/spider_list_f.py
  87. 33 33
      lzz_theme/zmdszfcgdzsc/start.sh
  88. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_byx_ck.json
  89. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_gxq_ck.json
  90. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_kfq_ck.json
  91. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_login.py
  92. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_pyx_ck.json
  93. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_qsx_ck.json
  94. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_qyzq_ck.json
  95. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_rnx_ck.json
  96. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_sbj_ck.json
  97. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_scx_ck.json
  98. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_sfq_ck.json
  99. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_spx_ck.json
  100. 1 1
      lzz_theme/zmdszfcgdzsc/zmd_xcx_ck.json

+ 2 - 2
lzz_theme/bqgyjtgscgdzswpt/start.sh

@@ -2,6 +2,6 @@
 
 ps -ef |grep "list_bqgyjt.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "details_bqgyjt.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 list_bqgyjt.py > log/list_bqgyjt.out 2>&1 &
-nohup python3 details_bqgyjt.py > log/details_bqgyjt.out 2>&1 &
+nohup python3 list_bqgyjt.py > /dev/null 2>&1 &
+nohup python3 details_bqgyjt.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/clgjzbcgjtyxgs/clgj_cookies.txt

@@ -1 +1 @@
-eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyOTYyOTIiLCJpc3MiOiJwbGF0Zm9ybUNlbnRlciIsImlhdCI6MTc0NTk3NDkxNiwiZXhwIjoxNzQ2MDYxMzE2LCJuYmYiOjE3NDU5NzQ5MTYsImp0aSI6ImYzOWQwMGYzNWViNjQ4NmM4ZGM5Yjc5ZThkNGFkYjYzIiwiYXVkIjpbImJVc2VyIl0sInVzZXJJZCI6Mjk2MjkyLCJ1c2VyTmFtZSI6IjEzMjIzMDc0MDAzIiwidGVuYW50SWQiOjIyMDEsImVtYWlsQWRkcmVzcyI6IjEzMjIzMDc0MDAzQGludmFsaWQuY24iLCJ0eXBlIjoiYWNjZXNzVG9rZW4ifQ.Y9v0HbZYxa05iAnzRt3o5CVVgIDVmAIG4QRuWbony-6Fu85uYr2Qsrv8zsHfvcyi3S_EFVcalQi60xqXyt8xJRgV9mA1lKTYV7TW7C18AMMozTVvh-BPTdv4bc3oH6F4Xtzc9aSdZx_2_AiNSSXtny-M1ybARqatnPdmO54Mu7dEIibtUHNn_j1Y3Dh2QidD7xuNgDHen1AbysZMGNF9osDDjbxi99UBoyJomVvhFYs0FiS1e4eXz_zVDuQSIVw_9aQgE99nqLQ2sILes62EJV_4dk5qOUNs5JHW2BnsAZoLcwzT8SWhvsVtGqFBx4VIlV1jGdDEyVvgjlVpuwg0HQ
+eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyOTYyOTIiLCJpc3MiOiJwbGF0Zm9ybUNlbnRlciIsImlhdCI6MTc1NTY1MTcxOCwiZXhwIjoxNzU1NzM4MTE4LCJuYmYiOjE3NTU2NTE3MTgsImp0aSI6ImVlMDZmNTMwM2VkNDRkN2Q5MDIwNDU5Y2EwZmFlODI1IiwiYXVkIjpbImJVc2VyIl0sInVzZXJJZCI6Mjk2MjkyLCJ1c2VyTmFtZSI6IjEzMjIzMDc0MDAzIiwidGVuYW50SWQiOjIyMDEsImVtYWlsQWRkcmVzcyI6IjEzMjIzMDc0MDAzQGludmFsaWQuY24iLCJ0eXBlIjoiYWNjZXNzVG9rZW4ifQ.PldCFy_0NoY28xhocRIRjyZHNCj6FetU0xp7TK3CJMQXvIq8WIUGIfTo7GBWpjB4PH7X4qc8fg37Il76ARy9TPhYCw1aMYMwTVvs2fljuuRHW27hDrYvdHjgJgp_QE0JH8wG2SmAtRxBy1trCLDpYJk_tZHVUOQmq6GY_k1uF2HsrCanbuJZrsv17-MYUEbpaSFltH3-fGMLjotFD1Makg2pkttHqJzMDUfTqG3Xen1ZHFoaJ9GVeBzs-sxoOOYYKuQh5HpCCtiBm9SxYSNJI-thyFy8ttWPa26sbTxdfaRVw4Ph_eTbVfy74VQCoIAOExMdeAv4tBGzwW0hQfgA-A

+ 1 - 1
lzz_theme/clgjzbcgjtyxgs/list_start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "clgjzb_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 clgjzb_list.py > log/clgjzb_list.out 2>&1 &
+nohup python3 clgjzb_list.py > /dev/null 2>&1 &

+ 2 - 2
lzz_theme/clgjzbcgjtyxgs/start.sh

@@ -2,5 +2,5 @@
 
 # ps -ef |grep "clgjzb_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "clgjzb_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-# nohup python3 clgjzb_list.py > log/clgjzb_list.out 2>&1 &
-nohup python3 clgjzb_details.py > log/clgjzb_details.out 2>&1 &
+# nohup python3 clgjzb_list.py > /dev/null 2>&1 &
+nohup python3 clgjzb_details.py > /dev/null 2>&1 &

+ 5 - 5
lzz_theme/cqszfcgyptfwcs/start.sh

@@ -6,10 +6,10 @@ ps -ef |grep "cqszfcgy_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "cqszfcgy_cgxq_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "cqszfcgy_cgxq_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 cqszfcgy_qx_details.py > log/cqszfcgy_qx_details.out 2>&1 &
-nohup python3 cqszfcgy_details.py > log/cqszfcgy_details.out 2>&1 &
-nohup python3 cqszfcgy_list.py > log/cqszfcgy_list.out 2>&1 &
-nohup python3 cqszfcgy_cgxq_list.py > log/cqszfcgy_cgxq_list.out 2>&1 &
-nohup python3 cqszfcgy_cgxq_details.py > log/cqszfcgy_cgxq_details.out 2>&1 &
+nohup python3 cqszfcgy_qx_details.py > /dev/null 2>&1 &
+nohup python3 cqszfcgy_details.py > /dev/null 2>&1 &
+nohup python3 cqszfcgy_list.py > /dev/null 2>&1 &
+nohup python3 cqszfcgy_cgxq_list.py > /dev/null 2>&1 &
+nohup python3 cqszfcgy_cgxq_details.py > /dev/null 2>&1 &
 
 

+ 37 - 16
lzz_theme/crontab.txt

@@ -1,3 +1,4 @@
+
 PYTHONPATH=/mnt/lzz_theme/utils:/mnt/lzz_theme
 NODE_PATH=/usr/lib/node_modules
 
@@ -10,11 +11,13 @@ NODE_PATH=/usr/lib/node_modules
 10 6-23/3 * * * cd /mnt/lzz_theme/hnszfcgdzmc && ./start.sh
 20 6-23/1 * * * cd /mnt/lzz_theme/hnszfcgdzmc && ./dt_start.sh
 
+30 6-23/3 * * * cd /mnt/lzz_theme/hnszfcgdzmc_new && ./start.sh
+
 # 天津市政府采购网
 */10 * * * * cd /mnt/lzz_theme/tjszfcgw && ./start.sh
 
 # 香港医院管理局
-0 */3 * * * cd /mnt/lzz_theme/xgyyglj && ./start.sh
+# 0 */3 * * * cd /mnt/lzz_theme/xgyyglj && ./start.sh
 
 # 云南省政府采购网-采购意向公开
 0 */1 * * * cd /mnt/lzz_theme/ynszfcgw && ./det_start.sh
@@ -25,8 +28,8 @@ NODE_PATH=/usr/lib/node_modules
 
 # 优质采(北京)科技发展有限公司
 */30 7-20 * * * cd /mnt/lzz_theme/yzcbjkjfzyxgs && ./start.sh
-*/30 * * * * cd /mnt/lzz_theme/yzcbjkjfzyxgs && python3 spider_list.py > spider_list.out 2>&1
-*/20 * * * * cd /mnt/lzz_theme/yzcbjkjfzyxgs && python3 sp_list.py > sp_list.out 2>&1
+# */30 * * * * cd /mnt/lzz_theme/yzcbjkjfzyxgs && python3 spider_list.py > spider_list.out 2>&1
+# */20 * * * * cd /mnt/lzz_theme/yzcbjkjfzyxgs && python3 sp_list.py > sp_list.out 2>&1
 
 # 中国电子科技集团有限公司电子采购平台
 0 */1 * * * cd /mnt/lzz_theme/zgdzkjjtyxgsdzcgpt && ./list_start.sh
@@ -39,19 +42,25 @@ NODE_PATH=/usr/lib/node_modules
 0 8 */3 * * cd /mnt/lzz_theme/zgwkjtyxgs && ./his_start.sh
 0 7 * * * cd /mnt/lzz_theme/zgwkjtyxgs && ./retry_start.sh
 
-# 中国招标投标公共服务平台
-*/2 7-23 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_f.py > /dev/null &
-*/20 * * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_details.py > /dev/null &
+# 中国招标投标公共服务平台[未按规范]
+*/2 5-23 * * * cd /mnt/lzz_theme/zgzbtbggfwpt_wagf && python3 zgzbtbggfwpt_wagf_list_f.py > /dev/null &
+05 3 * * * cd /mnt/lzz_theme/zgzbtbggfwpt_wagf && python3 zgzbtbggfwpt_wagf_list_b.py > /dev/null &
+*/10 * * * * cd /mnt/lzz_theme/zgzbtbggfwpt_wagf && python3 zgzbtbggfwpt_wagf_details.py > /dev/null &
+05 1 * * * cd /mnt/lzz_theme/zgzbtbggfwpt_wagf && python3 zgzbtbggfwpt_wagf_details_retry.py > /dev/null &
+
+# 中国招标投标公共服务平台[已按规范]
+# */2 7-23 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_f.py > /dev/null &
+# */18 * * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_details.py > /dev/null &
 # 05 22 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_b.py > /dev/null &
-20 0 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_date.py > /dev/null &
-15 4 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 spider_detail_retry.py > /dev/null &
+# 20 0 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 zgzbtbggfwpt_list_date.py > /dev/null &
+# 15 4,13 * * * cd /mnt/lzz_theme/zgzbtbggfwpt && python3 spider_detail_retry.py > /dev/null &
 
 # 全国招标公告公示搜索引擎
-*/2 9-23,0 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_f.py > /dev/null &
-*/20 * * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_detail.py > /dev/null &
+# */2 9-23,0 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_f.py > /dev/null &
+# */20 * * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_detail.py > /dev/null &
 # */20 * * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list.py > /dev/null &
-30 1 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_detail_retry.py > /dev/null &
-10 1-8/2 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_area.py > /dev/null &
+# 30 1,14 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_detail_retry.py > /dev/null &
+# 10 1-8/2 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_area.py > /dev/null &
 # 10 1-8/2 * * * cd /mnt/lzz_theme/qgzbgggsssyq && python3 spider_list_b.py > /dev/null &
 
 # 建设项目环境影响登记表备案系统 
@@ -72,8 +81,11 @@ NODE_PATH=/usr/lib/node_modules
 20 7-20/1 * * * cd /mnt/lzz_theme/zgdtjtgsdzswpt && ./start.sh
 30 9 * * * cd /mnt/lzz_theme/zgdtjtgsdzswpt && python3 retry_dtpy.py > log/retry_dtpy.out 2>&1
 
+# 中国大唐集团公司电子商务平台 30分钟一轮(中国移动要求改采集频率)
+30 * * * * cd /mnt/lzz_theme/zgdtjtgsdzswpt_m30 && ./start_m30.sh
+
 # 友云采
-30 7-20/1 * * * cd /mnt/lzz_theme/yyc && ./start.sh
+# 30 7-20/1 * * * cd /mnt/lzz_theme/yyc && ./start.sh
 
 # 施工云采网
 15 7-20/1 * * * cd /mnt/lzz_theme/sgycw && ./start.sh
@@ -88,13 +100,13 @@ NODE_PATH=/usr/lib/node_modules
 10 6-22/1 * * * cd /mnt/lzz_theme/szycycgpt && ./start.sh
 
 # 兵器工业集团公司采购电子商务平台
-15 7-21/1 * * * cd /mnt/lzz_theme/bqgyjtgscgdzswpt && ./start.sh
+# 15 7-21/1 * * * cd /mnt/lzz_theme/bqgyjtgscgdzswpt && ./start.sh
 
 # 航天电子采购平台
 # 10 7-21/1 * * * cd /mnt/lzz_theme/htdzcgpt && ./start.sh
 
 # 驻马店市政府采购电子商城
-20 7-19/2 * * * cd /mnt/lzz_theme/zmdszfcgdzsc && ./start.sh
+20 6-22/1 * * * cd /mnt/lzz_theme/zmdszfcgdzsc && ./start.sh
 
 # 重庆市政府采购云平台服务超市
 5 6-22/1 * * * cd /mnt/lzz_theme/cqszfcgyptfwcs && ./start.sh
@@ -104,9 +116,18 @@ NODE_PATH=/usr/lib/node_modules
 
 # 全军武器装备采购信息网
 # 15 6-21/1 * * * cd /mnt/lzz_theme/qjwqzbcgxxw && ./start.sh
+# 30 7-20/2 * * * cd /mnt/lzz_theme/qjwqzbcgxxw && ./start_details.sh
 
 # 浪潮电子采购平台
 10 6-21/1 * * * cd /mnt/lzz_theme/lcdzcgpt && ./start.sh
 
 # 中国石油招标投标网 
-6 6-21/1 * * * cd /mnt/lzz_theme/zgsyzbtbw && ./start.sh
+*/40 6-23/1 * * * cd /mnt/lzz_theme/zgsyzbtbw && ./start.sh
+
+# 招天下
+10 */3 * * * cd /mnt/lzz_theme/ztx && ./start.sh
+
+# 云筑网 
+20 7-19/2 * * * cd /mnt/lzz_theme/yzw && ./start.sh
+30 6 * * * cd /mnt/lzz_theme/yzw && python3 reset_count.py > log/reset_count.out 2>&1
+

+ 2 - 2
lzz_theme/gdgczbzxyxgs/start.sh

@@ -2,6 +2,6 @@
 
 ps -ef |grep "gdgczb_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "gdgczb_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 gdgczb_list.py > log/gdgczb_list.out 2>&1 &
-nohup python3 gdgczb_details.py > log/gdgczb_details.out 2>&1 &
+nohup python3 gdgczb_list.py > /dev/null 2>&1 &
+nohup python3 gdgczb_details.py > /dev/null 2>&1 &
 

+ 10 - 10
lzz_theme/gdszfcgw/gdszfcgw_daliy_crawl.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-06-25 
+Created on 2025-07-04
 ---------
 @summary: 广东省政府采购网 - 增量采集
 ---------
@@ -16,15 +16,15 @@ def get_today_of_day(day_offset=0):
 
 if __name__ == '__main__':
     Menu = namedtuple('Menu',
-                      ['channel', 'spidercode', 'chan_key', 'noticeType', 'start_day', 'end_day', 'selectTimeName',
-                       'crawl_page'])
-    st = get_today_of_day()
-    ed = get_today_of_day(1)
+                      ['channel', 'spidercode', 'chan_key', 'noticeType', 'reqtime',  'crawl_page'])
+
+    reqtime = get_today_of_day()
     menus = [
-        Menu('首页搜索-采购公告', 'gd_gdszfcgw_syss_cggg', 'fca71be5-fc0c-45db-96af-f513e9abda9d', '', st, ed, 'noticeTime', 50),
-        Menu('首页搜索-电子卖场', 'gd_gdszfcgw_syss_dzmc', '3b49b9ba-48b6-4220-9e8b-eb89f41e9d66', '', st, ed, 'noticeTime', 30),
-        Menu('首页搜索-批量采购', 'gd_gdszfcgw_syss_plcg', 'b893ce4b-616d-4f39-a531-57e958e1475e,751c7726-20a2-47f2-b190-206ae6e9cd89,456ab317-2144-4d8d-b3a7-f26dcecfc096', '', st, ed, 'noticeTime', 1),   # 暂无数据
-        Menu('首页搜索-监管信息', 'gd_gdszfcgw_syss_jgxx', '418093fb-aac8-4562-9be2-97082c101ef7', '', st, ed, 'noticeTime', 1),
-        Menu('首页搜索-采购计划', 'gd_gdszfcgw_syss_cgjh', '95ff31f3-a1af-4bc4-b1a2-54c894476193', '001101', st, ed, 'noticeTime', 50),
+        Menu('首页搜索-采购公告', 'gd_gdszfcgw_syss_cggg', 'fca71be5-fc0c-45db-96af-f513e9abda9d', '59', reqtime, 3),
+        Menu('首页搜索-采购公告', 'gd_gdszfcgw_syss_cggg', 'fca71be5-fc0c-45db-96af-f513e9abda9d', '59,001051,001101,001059,001052,001053,00101,00102,00103,001004,001006,001054,001009,00105A', reqtime, 30),
+        Menu('首页搜索-电子卖场', 'gd_gdszfcgw_syss_dzmc', '3b49b9ba-48b6-4220-9e8b-eb89f41e9d66', '001101,201022,201023,201111,00107D,202022,202023,202111,00107E,001076,204022,204023,204111,204112,001054,001009,00105A', reqtime, 20),
+        Menu('首页搜索-批量采购', 'gd_gdszfcgw_syss_plcg', 'b893ce4b-616d-4f39-a531-57e958e1475e,751c7726-20a2-47f2-b190-206ae6e9cd89,456ab317-2144-4d8d-b3a7-f26dcecfc096', '', reqtime, 1),
+        Menu('首页搜索-监管信息', 'gd_gdszfcgw_syss_jgxx', '418093fb-aac8-4562-9be2-97082c101ef7', '', reqtime, 1),
+        Menu('首页搜索-采购计划', 'gd_gdszfcgw_syss_cgjh', '95ff31f3-a1af-4bc4-b1a2-54c894476193', '001101', reqtime, 2),
     ]
     Crawl_Gds().start_list(menus)

+ 277 - 104
lzz_theme/gdszfcgw/gdszfcgw_details_spider.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-04-26
+Created on 2025-07-04
 ---------
 @summary: 广东省政府采购网 - 详情页
 ---------
@@ -13,7 +13,6 @@ sys.path.append(os.path.dirname(os.getcwd()))
 from utils.tools import *
 from utils.attachment import AttachmentDownloader
 from threading import Timer
-from utils.clean_html import cleaner
 import requests
 import re
 import time
@@ -22,11 +21,181 @@ import execjs
 from parsel import Selector
 from collections import namedtuple
 
+def ctx():
+    ex_js = '''
+    window = global
+
+    JSEncrypt = require('jsencrypt')
+    CryptoJs = require('crypto-js')
+
+    function K(value) {
+        var encrypt = new JSEncrypt;
+        var RSAPublicKey = 'MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCS2TZDs5+orLYCL5SsJ54+bPCVs1ZQQwP2RoPkFQF2jcT0HnNNT8ZoQgJTrGwNi5QNTBDoHC4oJesAVYe6DoxXS9Nls8WbGE8ZNgOC5tVv1WVjyBw7k2x72C/qjPoyo/kO7TYl6Qnu4jqW/ImLoup/nsJppUznF0YgbyU/dFFNBQIDAQAB';
+        encrypt.setPublicKey('-----BEGIN PUBLIC KEY-----' + RSAPublicKey + '-----END PUBLIC KEY-----')
+        return encrypt.encrypt(value)
+    }
+
+    function mm(e, t) {
+        return e += `_${t}_bosssoft_platform_095285`,
+            t = CryptoJs.SHA1(e).toString(),
+            CryptoJs.MD5(t).toString()
+    }
+
+    function get_njs(e){
+        var t = (new Date).getTime();
+        return K(String(e).split("?")[0] + "$$" + t)
+    }
+
+    function decode_str(e) {
+        var n, t, a, c, i, r;
+        n = "",
+        a = e.length,
+        t = 0;
+        while (t < a)
+            switch (c = e.charCodeAt(t++),
+            c >> 4) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                n += e.charAt(t - 1);
+                break;
+            case 12:
+            case 13:
+                i = e.charCodeAt(t++),
+                n += String.fromCharCode((31 & c) << 6 | 63 & i);
+                break;
+            case 14:
+                i = e.charCodeAt(t++),
+                r = e.charCodeAt(t++),
+                n += String.fromCharCode((15 & c) << 12 | (63 & i) << 6 | (63 & r) << 0);
+                break
+            }
+        return n
+    }
+
+    function de_str(e) {
+        var n, t, a, c, i, r, o, u = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1];
+        r = e.length,
+        i = 0,
+        o = "";
+        while (i < r) {
+            do {
+                n = u[255 & e.charCodeAt(i++)]
+            } while (i < r && -1 == n);
+            if (-1 == n)
+                break;
+            do {
+                t = u[255 & e.charCodeAt(i++)]
+            } while (i < r && -1 == t);
+            if (-1 == t)
+                break;
+            o += String.fromCharCode(n << 2 | (48 & t) >> 4);
+            do {
+                if (a = 255 & e.charCodeAt(i++),
+                61 == a)
+                    return o;
+                a = u[a]
+            } while (i < r && -1 == a);
+            if (-1 == a)
+                break;
+            o += String.fromCharCode((15 & t) << 4 | (60 & a) >> 2);
+            do {
+                if (c = 255 & e.charCodeAt(i++),
+                61 == c)
+                    return o;
+                c = u[c]
+            } while (i < r && -1 == c);
+            if (-1 == c)
+                break;
+            o += String.fromCharCode((3 & a) << 6 | c)
+        }
+        return o
+    }
+
+    function get_data(data){
+        return decode_str(de_str(data))
+    }
+
+    var i = 0
+        , r = 8;
+
+    function a(e, t) {
+        e[t >> 5] |= 128 << 24 - t % 32,
+            e[15 + (t + 64 >> 9 << 4)] = t;
+        for (var n, i, r, o = Array(80), a = 1732584193, u = -271733879, c = -1732584194, h = 271733878, d = -1009589776, f = 0; f < e.length; f += 16) {
+            for (var p = a, m = u, g = c, v = h, C = d, y = 0; y < 80; y++) {
+                o[y] = y < 16 ? e[f + y] : l(o[y - 3] ^ o[y - 8] ^ o[y - 14] ^ o[y - 16], 1);
+                var A = s(s(l(a, 5), (A = u,
+                    i = c,
+                    r = h,
+                    (n = y) < 20 ? A & i | ~A & r : !(n < 40) && n < 60 ? A & i | A & r | i & r : A ^ i ^ r)), s(s(d, o[y]), (n = y) < 20 ? 1518500249 : n < 40 ? 1859775393 : n < 60 ? -1894007588 : -899497514));
+                d = h,
+                    h = c,
+                    c = l(u, 30),
+                    u = a,
+                    a = A
+            }
+            a = s(a, p),
+                u = s(u, m),
+                c = s(c, g),
+                h = s(h, v),
+                d = s(d, C)
+        }
+        return Array(a, u, c, h, d)
+    }
+
+    function s(e, t) {
+        var n = (65535 & e) + (65535 & t);
+        return (e >> 16) + (t >> 16) + (n >> 16) << 16 | 65535 & n
+    }
+
+    function l(e, t) {
+        return e << t | e >>> 32 - t
+    }
+
+    function u(e) {
+        for (var t = Array(), n = (1 << r) - 1, i = 0; i < e.length * r; i += r)
+            t[i >> 5] |= (e.charCodeAt(i / r) & n) << 24 - i % 32;
+        return t
+    }
+
+    function c(e) {
+        for (var t = i ? "0123456789ABCDEF" : "0123456789abcdef", n = "", r = 0; r < 4 * e.length; r++)
+            n += t.charAt(e[r >> 2] >> 8 * (3 - r % 4) + 4 & 15) + t.charAt(e[r >> 2] >> 8 * (3 - r % 4) & 15);
+        return n
+    }
+
+    function h(e) {
+        return c(a(u(e), e.length * 8))
+    }
+
+    function pp(e) {
+        return CryptoJs.MD5(e).toString()
+    }
+
+    function mme(e, t) {
+        var n = (new Date).getTime()
+            , i = pp(h(`${n}_${String(e).split("?")[0]}_bosssoft_platform_095285`))
+            , r = {};
+        return r.time = n,
+            r.url = String(e).split("?")[0],
+            r.sign = i,
+        null != t && "" !== t && (i = p(h("" + t + String(e).split("?")[0] + n)),
+            r.tokensign = i),
+            r
+    }
+    '''
+    return execjs.compile(ex_js)
 
 class Details:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = get_QGIP()
         self.db_table = Mongo_client().py_spider
         self.db_name = self.db_table.theme_list
         self.zt_details = self.db_table.data_bak
@@ -34,44 +203,6 @@ class Details:
         self.redis_key = "ztpc_gdszfcgw_msg"
         self.delete_key = ""
         self.end_state = False
-        self.headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "Accept-Language": "zh-CN,zh;q=0.9",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Pragma": "no-cache",
-            "Upgrade-Insecure-Requests": "1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
-        }
-
-    def get_html(self, html_source):
-        html_js = "".join(re.findall("var siteIdOriginal='';(.*?)\$\('#info_download'\).hide\(\)", html_source, re.S))
-        if html_js:
-            try:
-                trans = "".join(re.findall(r"openTenderCode.replace(.*?);", html_js))
-                trans_html = html_js.replace(trans, "('\\\\',\"\")")
-                data = "function get_html(){" + trans_html + " return demandAnnouncement }"
-                ctx = execjs.compile(data)
-                html = ctx.call('get_html')
-                return html
-            except:
-                return None
-        else:
-            return None
-
-    def get_file_list(self, html, proxies=False):
-        currPage = "".join(re.findall('var currPage = (.*?);', html))
-        pageSize = "".join(re.findall('var pageSize = (.*?);', html))
-        Id = "".join(re.findall('var currInfoId = "(.*?)"', html))
-        if currPage and pageSize and Id:
-            try:
-                url = f"https://gdgpo.czt.gd.gov.cn/freecms/rest/v1/notice/selectNoticeDocInfo.do?currPage={currPage}&pageSize={pageSize}0&id={Id}"
-                file_res = requests.get(url, headers=self.headers, proxies=proxies, timeout=60, verify=False)
-                return file_res.json().get('data')
-            except:
-                return []
-        else:
-            return []
 
     def text_search(self, content: str):
         SearchText = namedtuple('SearchText', ['total'])
@@ -85,71 +216,99 @@ class Details:
 
     def detail_get(self, response, item):
         response.encoding = response.apparent_encoding
-        root = Selector(text=response.text)
-        detail_html = root.xpath('/html/body').extract_first()
-
-        html = ''
-        dxpath_list = ['//div[@id="content"]', '//div[@class="infoCommon"]', '//div[@class="noticeArea"]']
-        for xpath in dxpath_list:
-            html = root.xpath(xpath).extract_first()
-            if html:
-                break
-
-        publishTime = "".join(re.findall('var publishTime = "(.*?)"', response.text))
-        if publishTime:
-            year = root.xpath('//input[@id="year"]').extract_first()
-            month = root.xpath('//input[@id="month"]').extract_first()
-            date = root.xpath('//input[@id="date"]').extract_first()
-            y = publishTime.split(' ')[0].split('-')[0]
-            m = publishTime.split(' ')[0].split('-')[1]
-            d = publishTime.split(' ')[0].split('-')[2]
-            html = html.replace(year, y)
-            html = html.replace(month, m)
-            html = html.replace(date, d)
-
-        js_html = self.get_html(detail_html)
-        if js_html and self.text_search(html).total < 20:
-            html = js_html
+        dt = response.json().get('data')
+        if item.get('noticeType','') == "001101":
+            html = f'''
+                <table style="margin-left: 270px;">
+                    <tbody>
+                    <tr>
+                        <td> 一、采购人: {dt.get('purchaser')}</td>
+                    </tr>
+                    <tr>
+                        <td> 二、采购计划编号:{dt.get('planCodes')}</td>
+                    </tr>
+                    <tr>
+                        <td> 三、采购计划名称:{dt.get('title')}</td>
+                    </tr>
+                    <tr>
+                        <td> 四、采购品目名称: {dt.get('catalogueNameList')}</td>
+                    </tr>
+                    <tr>
+                        <td> 五、采购预算金额(元):{dt.get('budget')}</td>
+                    </tr>
+                    <tr>
+                        <td> 六、需求时间: {dt.get('demandTime')}</td>
+                    </tr>
+                    <tr>
+                        <td> 七、采购方式: {dt.get('purchaseManner')}</td>
+                    </tr>
+                    <tr>
+                        <td> 八、备案时间: {dt.get('recordTime')}</td>
+                    </tr>
+                    </tbody>
+                </table>
+                '''.replace('None', '')
+        elif item.get('noticeType','') == "001059":
+            html = f'''
+                <div>
+                    <div> 一、采购项目名称:<span>{dt.get('title')}</span></div>
+                    <div> 二、采购品目名称:<span>{dt.get('catalogueNameList')}</span></div>
+                    <div> 三、本公告期限(不得少于5个工作日)自:
+                        <span>{dt.get('noticeTime').split(' ')[0]} 至
+                        <span>{dt.get('expireTime').split(' ')[0]}
+                    </div>
+                    <div>
+                        四、任何供应商、单位或者个人对本项目采购需求(征求意见稿)公告有异议的,可以自公告开始之日起至公告期满后5个工作日内将书面意见反馈给采购人、采购代理机构。
+                    </div>
+                    <div> 五、联系事项
+                        <div>
+                            <div> (一)采购人:<span>{dt.get('purchaser')}</span></div>
+                            <div> 地址:<span>{dt.get('purchaserAddr')}</span></div>
+                            <div style="padding-left: 47px;"> 联系人:<span>{dt.get('purchaserLinkMan')}</span></div>
+                            <div> 联系电话:<span>{dt.get('purchaserLinkPhone')}</span></div>
+                            <div> (二)采购代理机构:<span>{dt.get('agency')}</span></div>
+                            <div> 地址:<span>{dt.get('agentAddress')}</span></div>
+                            <div style="padding-left: 47px;"> 联系人:<span>{dt.get('agentLinkMan')}</span></div>
+                            <div> 联系电话:<span>{dt.get('agentLinkPhone')}</span></div>
+                        </div>
+                    </div>
+                    <div>
+                        <div> 发布人:<span>{dt.get('purchaser')}</span></div>
+                        <div> 发布时间:<span>{dt.get('noticeTime').split(' ')[0]}</div>
+                    </div>
+                </div>
+                '''.replace('None', '')
+        else:
+            html = dt.get('content')
 
         file_name_list = []
-
-        file_list = root.xpath('//a[@href]')
+        file_list = dt.get('attchList') or []
         attachments = {}
-        file_types = ['zip', 'docx', 'ftp', 'pdf', 'doc', 'rar', 'gzzb', 'jpg',
-                      'png', 'zbid', 'xls', 'xlsx', 'swp', 'dwg', 'wps']
-        if file_list:
-            for index, info in enumerate(file_list):
-                file_url = info.xpath('./@href').extract_first()
-                file_name = info.xpath('./text()').extract_first("").strip()
-                file_type = extract_file_type(file_name, file_url)
-                if file_type and 'http' in file_url:
-                    file_name_list.append(file_name)
-                    attachment = AttachmentDownloader().fetch_attachment(
-                        file_name=file_name, file_type=file_type, download_url=file_url, proxies=self.proxy)
-                    attachments[str(len(attachments) + 1)] = attachment
-
-        js_file_list = self.get_file_list(detail_html, self.proxy)
-        if js_file_list:
-            for infoo in js_file_list:
-                file_name = infoo.get('fileName').strip()
-                file_url = infoo.get('fileUrl').strip()
-                file_type = infoo.get('fileExt').strip()
-
-                if file_type not in file_types:
-                    file_type = file_name.split(".")[-1].lower()
-
-                if file_type in file_types and 'http' in file_url:
-                    file_name_list.append(file_name)
+        for info in file_list:
+            file_url = info.get('fileUrl')
+            file_name = info.get('fileName')
+            file_type = extract_file_type(file_name, file_url)
+            if file_type and file_name not in file_name_list:
+                file_name_list.append(file_name)
+                attachment = AttachmentDownloader().fetch_attachment(
+                    file_name=file_name, file_type=file_type, download_url=file_url)
+                attachments[str(len(attachments) + 1)] = attachment
+
+        f_list = Selector(text=html).xpath('//a[contains(@href,"upload")]')
+        if f_list:
+            for foo in f_list:
+                f_url = foo.xpath('./@href').extract_first("").strip()
+                f_name = foo.xpath('./text()').extract_first("").strip()
+                f_type = extract_file_type(f_name, f_url)
+                if f_type and f_name not in file_name_list:
+                    file_name_list.append(f_name)
                     attachment = AttachmentDownloader().fetch_attachment(
-                        file_name=file_name, file_type=file_type, download_url=file_url, proxies=self.proxy)
+                        file_name=f_name, file_type=f_type, download_url=f_url)
                     attachments[str(len(attachments) + 1)] = attachment
 
         if attachments:
             item['projectinfo'] = {"attachments": attachments}
 
-        rm_list = ['//p[contains(@class,"info-title")]','//div[contains(@class,"info-source")]']
-        html = remove_htmldata(rm_list,html,root)
-
         new_html = html
         for fn in file_name_list:
             new_html = new_html.replace(fn, '')
@@ -165,9 +324,23 @@ class Details:
 
         return True
 
-    def fetch_request(self, item, proxies=False):
-        response = requests.get(url=item.get("parse_url"), headers=self.headers,
-                                proxies=proxies, timeout=60, verify=False)
+    def fetch_request(self, item):
+        nsssjss = ctx().call('get_njs')
+        pms = ctx().call('mme', '/gpcms/rest/web/v2/info/selectInfoForIndex')
+
+        headers = {
+            "Accept": "*/*",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Connection": "keep-alive",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+            "nsssjss": f"{nsssjss}",
+            "sign": f"{pms.get('sign')}",
+            "time": f"{pms.get('time')}",
+            "url": f"{pms.get('url')}"
+        }
+        params =item.get('request_params') or {}
+        response = requests.get(url=item.get("parse_url"), headers=headers,params=params,
+                                proxies=self.proxy, timeout=60, verify=False)
         return response
 
     def deal_request(self, item):
@@ -177,7 +350,7 @@ class Details:
         org_item = item.copy()
         while retry_times < 5:
             try:
-                response = self.fetch_request(item, self.proxy)
+                response = self.fetch_request(item)
                 state = response.status_code
                 if response is not None and state == 200:
                     self.detail_get(response, item)
@@ -186,7 +359,7 @@ class Details:
                 item = org_item
                 logger.error(f"{item['href']} 异常:{e}")
                 time.sleep(3)
-                self.proxy = get_proxy()
+                self.proxy = get_QGIP()
                 retry_times += 1
         return False
 
@@ -214,7 +387,7 @@ class Details:
                 # logger.debug(item)
                 if self.end_state:
                     break
-                if count >= 200:
+                if count >= limit:
                     break
                 unicode_key = md5value(item.get('href') + item.get('title'))
                 if not self.rds.hexists(self.redis_key, unicode_key):  # 除 动态字段 外所有字段去重
@@ -235,4 +408,4 @@ class Details:
 
 
 if __name__ == "__main__":
-    Details().start(limit=1)
+    Details().start(limit=200)

+ 272 - 41
lzz_theme/gdszfcgw/list_spider.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-06-25
+Created on 2025-07-04
 ---------
 @summary: 广东省政府采购网 - 列表页
 ---------
@@ -12,11 +12,11 @@ import os
 sys.path.append(os.path.dirname(os.getcwd()))
 from utils.tools import *
 import time, random
-from urllib.parse import urljoin
 from utils.clean_html import cleaner
 from parsel import Selector
 from utils.attachment import AttachmentDownloader
 from utils.RedisDB import RedisFilter
+from utils.get_imgcode import get_code
 from collections import namedtuple
 
 requests.packages.urllib3.disable_warnings()
@@ -27,11 +27,221 @@ except AttributeError:
     # no pyopenssl support used / needed / available
     pass
 
+def download_code():
+    headers = {
+        "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
+        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+        "Connection": "keep-alive",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+    }
+    url = "https://gdgpo.czt.gd.gov.cn/gpcms/rest/web/v2/index/getVerify"
+    for _ in range(10):
+        res = requests.get(url, headers=headers, proxies=get_QGIP(), timeout=60)
+        # with open('./aaa.jpg', 'wb') as f:
+        #     f.write(res.content)
+        code = get_code(res.content)
+        if len(code) == 4:
+            return code
+    return None
+
+def ctx():
+    ex_js = '''
+    window = global
+
+    JSEncrypt = require('jsencrypt')
+    CryptoJs = require('crypto-js')
+
+    function K(value) {
+        var encrypt = new JSEncrypt;
+        var RSAPublicKey = 'MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCS2TZDs5+orLYCL5SsJ54+bPCVs1ZQQwP2RoPkFQF2jcT0HnNNT8ZoQgJTrGwNi5QNTBDoHC4oJesAVYe6DoxXS9Nls8WbGE8ZNgOC5tVv1WVjyBw7k2x72C/qjPoyo/kO7TYl6Qnu4jqW/ImLoup/nsJppUznF0YgbyU/dFFNBQIDAQAB';
+        encrypt.setPublicKey('-----BEGIN PUBLIC KEY-----' + RSAPublicKey + '-----END PUBLIC KEY-----')
+        return encrypt.encrypt(value)
+    }
+
+    function mm(e, t) {
+        return e += `_${t}_bosssoft_platform_095285`,
+            t = CryptoJs.SHA1(e).toString(),
+            CryptoJs.MD5(t).toString()
+    }
+
+    function get_njs(e){
+        var t = (new Date).getTime();
+        return K(String(e).split("?")[0] + "$$" + t)
+    }
+
+    function decode_str(e) {
+        var n, t, a, c, i, r;
+        n = "",
+        a = e.length,
+        t = 0;
+        while (t < a)
+            switch (c = e.charCodeAt(t++),
+            c >> 4) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                n += e.charAt(t - 1);
+                break;
+            case 12:
+            case 13:
+                i = e.charCodeAt(t++),
+                n += String.fromCharCode((31 & c) << 6 | 63 & i);
+                break;
+            case 14:
+                i = e.charCodeAt(t++),
+                r = e.charCodeAt(t++),
+                n += String.fromCharCode((15 & c) << 12 | (63 & i) << 6 | (63 & r) << 0);
+                break
+            }
+        return n
+    }
+
+    function de_str(e) {
+        var n, t, a, c, i, r, o, u = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1];
+        r = e.length,
+        i = 0,
+        o = "";
+        while (i < r) {
+            do {
+                n = u[255 & e.charCodeAt(i++)]
+            } while (i < r && -1 == n);
+            if (-1 == n)
+                break;
+            do {
+                t = u[255 & e.charCodeAt(i++)]
+            } while (i < r && -1 == t);
+            if (-1 == t)
+                break;
+            o += String.fromCharCode(n << 2 | (48 & t) >> 4);
+            do {
+                if (a = 255 & e.charCodeAt(i++),
+                61 == a)
+                    return o;
+                a = u[a]
+            } while (i < r && -1 == a);
+            if (-1 == a)
+                break;
+            o += String.fromCharCode((15 & t) << 4 | (60 & a) >> 2);
+            do {
+                if (c = 255 & e.charCodeAt(i++),
+                61 == c)
+                    return o;
+                c = u[c]
+            } while (i < r && -1 == c);
+            if (-1 == c)
+                break;
+            o += String.fromCharCode((3 & a) << 6 | c)
+        }
+        return o
+    }
+
+    function get_data(data){
+        return decode_str(de_str(data))
+    }
+
+    var i = 0
+        , r = 8;
+
+    function a(e, t) {
+        e[t >> 5] |= 128 << 24 - t % 32,
+            e[15 + (t + 64 >> 9 << 4)] = t;
+        for (var n, i, r, o = Array(80), a = 1732584193, u = -271733879, c = -1732584194, h = 271733878, d = -1009589776, f = 0; f < e.length; f += 16) {
+            for (var p = a, m = u, g = c, v = h, C = d, y = 0; y < 80; y++) {
+                o[y] = y < 16 ? e[f + y] : l(o[y - 3] ^ o[y - 8] ^ o[y - 14] ^ o[y - 16], 1);
+                var A = s(s(l(a, 5), (A = u,
+                    i = c,
+                    r = h,
+                    (n = y) < 20 ? A & i | ~A & r : !(n < 40) && n < 60 ? A & i | A & r | i & r : A ^ i ^ r)), s(s(d, o[y]), (n = y) < 20 ? 1518500249 : n < 40 ? 1859775393 : n < 60 ? -1894007588 : -899497514));
+                d = h,
+                    h = c,
+                    c = l(u, 30),
+                    u = a,
+                    a = A
+            }
+            a = s(a, p),
+                u = s(u, m),
+                c = s(c, g),
+                h = s(h, v),
+                d = s(d, C)
+        }
+        return Array(a, u, c, h, d)
+    }
+
+    function s(e, t) {
+        var n = (65535 & e) + (65535 & t);
+        return (e >> 16) + (t >> 16) + (n >> 16) << 16 | 65535 & n
+    }
+
+    function l(e, t) {
+        return e << t | e >>> 32 - t
+    }
+
+    function u(e) {
+        for (var t = Array(), n = (1 << r) - 1, i = 0; i < e.length * r; i += r)
+            t[i >> 5] |= (e.charCodeAt(i / r) & n) << 24 - i % 32;
+        return t
+    }
+
+    function c(e) {
+        for (var t = i ? "0123456789ABCDEF" : "0123456789abcdef", n = "", r = 0; r < 4 * e.length; r++)
+            n += t.charAt(e[r >> 2] >> 8 * (3 - r % 4) + 4 & 15) + t.charAt(e[r >> 2] >> 8 * (3 - r % 4) & 15);
+        return n
+    }
+
+    function h(e) {
+        return c(a(u(e), e.length * 8))
+    }
+
+    function pp(e) {
+        return CryptoJs.MD5(e).toString()
+    }
+
+    function mme(e, t) {
+        var n = (new Date).getTime()
+            , i = pp(h(`${n}_${String(e).split("?")[0]}_bosssoft_platform_095285`))
+            , r = {};
+        return r.time = n,
+            r.url = String(e).split("?")[0],
+            r.sign = i,
+        null != t && "" !== t && (i = p(h("" + t + String(e).split("?")[0] + n)),
+            r.tokensign = i),
+            r
+    }
+    '''
+    return execjs.compile(ex_js)
+
+
+def get_href(hid, channel, noticeType, channelName, openTenderCode, kcProjectCode=""):
+    if channel in ["fca71be5-fc0c-45db-96af-f513e9abda9d", "958b68d2-d97f-4f98-a0f4-3a5802ec94a9"]:
+        t = ["59", "001051", "001009", "00105A"]
+        a = noticeType.split(",")
+        if a in t:
+            href = f"https://gdgpo.czt.gd.gov.cn/maincms-web/articleGd?type=article&id={hid}&channelName={channelName}"
+        else:
+            if "001101" == noticeType:
+                href = f"https://gdgpo.czt.gd.gov.cn/maincms-web/articleRedHeadGd?id={hid}&channelName={channelName}"
+            else:
+                href = f"https://gdgpo.czt.gd.gov.cn/maincms-web/noticeGd?type=notice&id={hid}&channel=fca71be5-fc0c-45db-96af-f513e9abda9d&noticeType={noticeType}&openTenderCode={openTenderCode}&channelName={channelName}"
+    else:
+        if "95ff31f3-a1af-4bc4-b1a2-54c894476193" == channel:
+            href = f"https://gdgpo.czt.gd.gov.cn/maincms-web/articleRedHeadGd?id={hid}&channelName={channelName}"
+        else:
+            if "82fad126-7447-43a2-94aa-d42647349ae9" == channel:
+                href = f"https://gdgpo.czt.gd.gov.cn/maincms-web/noticeKjxyGd?id={hid}&channel={channel}&kcProjectCode={kcProjectCode}"
+            else:
+                href = f"https://gdgpo.czt.gd.gov.cn/maincms-web/articleGd?type=article&id={hid}&channelName={channelName}"
+    return href
+
 
 class Crawl_Gds:
 
     def __init__(self):
-        self.proxy = get_proxy()
+        self.proxy = get_QGIP()
         self.py_spider = Mongo_client().py_spider
         self.zb_list = self.py_spider.theme_list
         self.zb_details = self.py_spider.data_bak
@@ -40,6 +250,8 @@ class Crawl_Gds:
         self.host = 'https://gdgpo.czt.gd.gov.cn'
         self.real_cont = 0
         self.con_page = 0
+        self.code = None
+        self.noticeType_list = ['001051', '59', '001009', '001101', '00105A']
 
     def get_file(self, html_source, proxies=False):
         file_list = Selector(text=html_source).xpath('//a[@href]')
@@ -62,33 +274,39 @@ class Crawl_Gds:
     def fetch_list_page(self, page, menu):
         logger.debug(f' *** 开始采集第{page}页 ***')
 
+        nsssjss = ctx().call('get_njs')
+        pms = ctx().call('mme', '/gpcms/rest/web/v2/info/selectInfoForIndex')
         headers = {
             "Accept": "*/*",
-            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
             "Connection": "keep-alive",
-            "Content-Type": "application/json;charset=utf-8",
-            "Referer": "https://gdgpo.czt.gd.gov.cn/cms-gd/site/guangdong/qwjsy/index.html?",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
-            "X-Requested-With": "XMLHttpRequest",
-            "nsssjss": nsssjss(),
-            "url": "/freecms/rest/v1/notice/selectInfoMoreChannel.do"
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+            "nsssjss": nsssjss,
+            "sign": f"{pms.get('sign')}",
+            "time": f"{pms.get('time')}",
+            "url": f"{pms.get('url')}"
         }
-        url = "https://gdgpo.czt.gd.gov.cn/freecms/rest/v1/notice/selectInfoMoreChannel.do"
+        if not self.code:
+            self.code = download_code()
+
+        url = "https://gdgpo.czt.gd.gov.cn/gpcms/rest/web/v2/info/selectInfoForIndex"
         params = {
+            "currPage": f"{page}",
+            "pageSize": "40",
             "siteId": "cd64e06a-21a7-4620-aebc-0576bab7e07a",
             "channel": menu.chan_key,
-            "title": "",
-            "content": "",
-            "regionCode": "",
             "noticeType": menu.noticeType,
-            "operationStartTime": f"{menu.start_day} 00:00:00",
-            "operationEndTime": f"{menu.end_day} 00:00:00",
-            "selectTimeName": menu.selectTimeName,
             "purchaser": "",
             "agency": "",
-            "currPage": f"{page}",
-            "pageSize": "15",
-            "verifyCode": down_load_image(self.proxy)
+            "operationStartTime": f"{menu.reqtime} 00:00:00",
+            "operationEndTime": f"{menu.reqtime} 23:59:59",
+            "searchKey": "",
+            "regionCode": "",
+            "selectTimeName": "noticeTime",
+            "cityOrAreal": "",
+            "requestSource": "qwjs",
+            "verifyCode": f"{self.code}",
+            "purchaseManner": "",
         }
 
         request_params = {
@@ -103,18 +321,37 @@ class Crawl_Gds:
         return resp
 
     def parser_list_page(self, response, page, menu):
+        if "验证码错误" in response.text or "验签比对失败" in response.text:
+            self.code = None
+            raise ValueError("验证码错误!")
         results_list = []
         results_detail = []
-        info_list = response.json().get("data")
+        info_list = response.json().get("data").get('rows')
 
         if type(info_list) == list:
             for info in info_list:
-                href = info.get("pageurl")
-                title = info.get("shorttitle")
+                hid = info.get("id")
+                openTenderCode = info.get("openTenderCode")
+                channelName = info.get("channelName")
+                noticeType = info.get("noticeType")
+                channel = info.get("channel")
+                kcProjectCode = info.get("kcProjectCode")
+                if noticeType in self.noticeType_list:
+                    params = {
+                        "id": hid,
+                    }
+                else:
+                    params = {
+                        "channel": "fca71be5-fc0c-45db-96af-f513e9abda9d",
+                        "site": "cd64e06a-21a7-4620-aebc-0576bab7e07a",
+                        "id": hid,
+                    }
+                href = get_href(hid, channel, noticeType, channelName, openTenderCode, kcProjectCode)
+                title = info.get("title")
                 create_time = info.get("noticeTime")
-                href = urljoin(self.host, href)
+
                 content = info.get("content")
-                dedup = [title,href,create_time]
+                dedup = [title, href, create_time]
                 if not self.RDS.data_filter(dedup):
                     if not content or len(content.strip()) < 5:
                         item = {
@@ -126,22 +363,16 @@ class Crawl_Gds:
                             "area": "广东",
                             "city": "",
                             "href": href,
-                            "publishdept": "",
-                            "iscompete": True,
-                            "type": "",
-                            "T": "bidding",
-                            "infoformat": 1,
-                            "l_np_publishtime": "",
                             "is_mixed": False,
                             "is_theme": True,
                             "is_crawl": False,
                             "failed": False,
                             "retry": 0,
-                            "comeintime": int2long(time.time()),
-                            "sendflag": "false",
-                            "_d": "comeintime",
+                            "comeintime": int2long(int(time.time())),
                             "parser_name": "gdszfcgw_zt_details",
-                            "parse_url": href,
+                            "parse_url": "https://gdgpo.czt.gd.gov.cn/gpcms/rest/web/v2/info/getInfoById",
+                            "request_params": params,
+                            "noticeType": noticeType,
                         }
 
                         results_list.append(item)
@@ -163,7 +394,7 @@ class Crawl_Gds:
                             "type": "",
                             "T": "bidding",
                             "infoformat": 1,
-                            "comeintime": int2long(time.time()),
+                            "comeintime": int2long(int(time.time())),
                             "l_np_publishtime": pub_time[1],
                             "is_mixed": True,
                             "is_theme": True,
@@ -183,14 +414,15 @@ class Crawl_Gds:
                 else:
                     logger.warning(f"[重复数据] {href}")
 
-            logger.info(f' *** 第{page}页采集完毕 - {len(info_list)} - 入库 {len(results_list) + len(results_detail)} 条***')
+            logger.info(
+                f' *** 第{page}页采集完毕 - {len(info_list)} - 入库 {len(results_list) + len(results_detail)} 条***')
         else:
             raise ValueError("请求失败!")
         return results_list, results_detail
 
     def crawl_list_spider(self, page, menu):
         if page % 100 == 0:
-            self.proxy = get_proxy()
+            self.proxy = get_QGIP()
         response = None
         retry_times = 0
         while retry_times < 3:
@@ -204,13 +436,13 @@ class Crawl_Gds:
                     if crawl_num == 0:
                         self.con_page += 1
                     else:
-                        self.con_page == 0
+                        self.con_page = 0
                     time.sleep(random.random())
                 break
             except Exception as e:
                 logger.error(f"第{page}页 异常:{e}")
                 time.sleep(3)
-                self.proxy = get_proxy()
+                self.proxy = get_QGIP()
                 retry_times += 1
 
     def start_list(self, menus):
@@ -228,4 +460,3 @@ class Crawl_Gds:
             logger.debug(f"------ {menu.channel} 采集结束 ------")
 
         logger.info("********** 列表页结束 **********")
-

+ 2 - 2
lzz_theme/gdszfcgw/start.sh

@@ -2,5 +2,5 @@
 
 ps -ef |grep "gdszfcgw_daliy_crawl.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "gdszfcgw_details_spider.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 gdszfcgw_daliy_crawl.py > log/gdszfcgw_daliy_crawl.out 2>&1 &
-nohup python3 gdszfcgw_details_spider.py > log/gdszfcgw_details_spider.out 2>&1 &
+nohup python3 gdszfcgw_daliy_crawl.py > /dev/null 2>&1 &
+nohup python3 gdszfcgw_details_spider.py > /dev/null 2>&1 &

+ 5 - 5
lzz_theme/gnzggzyjyzx/start.sh

@@ -6,9 +6,9 @@ ps -ef |grep "gnz_sx_zb.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "gnz_sx_zgys.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "gnz_ygcg.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 gnz_gn_zbjh.py > log/gnz_gn_zbjh.out 2>&1 &
-nohup python3 gnz_gn_zgys.py > log/gnz_gn_zgys.out 2>&1 &
-nohup python3 gnz_sx_zb.py > log/gnz_sx_zb.out 2>&1 &
-nohup python3 gnz_sx_zgys.py > log/gnz_sx_zgys.out 2>&1 &
-nohup python3 gnz_ygcg.py > log/gnz_ygcg.out 2>&1 &
+nohup python3 gnz_gn_zbjh.py > /dev/null 2>&1 &
+nohup python3 gnz_gn_zgys.py > /dev/null 2>&1 &
+nohup python3 gnz_sx_zb.py > /dev/null 2>&1 &
+nohup python3 gnz_sx_zgys.py > /dev/null 2>&1 &
+nohup python3 gnz_ygcg.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/hnszfcgdzmc/dt_start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "zxjj_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 zxjj_details.py > log/zxjj_details.out 2>&1 &
+nohup python3 zxjj_details.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/hnszfcgdzmc/hn_collector.py

@@ -11,7 +11,7 @@ import importlib
 import re
 import time
 from concurrent.futures import ThreadPoolExecutor, wait
-from loguru import logger
+from utils.log import logger
 from spider import Crawl_Hndzmc
 
 

+ 3 - 3
lzz_theme/hnszfcgdzmc/start.sh

@@ -3,6 +3,6 @@
 ps -ef |grep "hn_collector.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "zxjj_spider.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "jjjg_spider.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 hn_collector.py > log/hn_collector.out 2>&1 &
-nohup python3 zxjj_spider.py > log/zxjj_spider.out 2>&1 &
-nohup python3 jjjg_spider.py > log/jjjg_spider.out 2>&1 &
+nohup python3 hn_collector.py > /dev/null 2>&1 &
+nohup python3 zxjj_spider.py > /dev/null 2>&1 &
+nohup python3 jjjg_spider.py > /dev/null 2>&1 &

+ 58 - 0
lzz_theme/hnszfcgdzmc_new/hn_new_collector.py

@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 启动器
+---------
+@author: Lzz
+"""
+import glob
+import importlib
+import re
+import time
+from concurrent.futures import ThreadPoolExecutor, wait
+from utils.log import logger
+from spider import Crawl_Hndzmc
+
+
+# 获取指定路径下的所有.py文件
+file_list = glob.glob('./*.py')
+
+menus_list = []
+# 遍历并导入 52 个文件  共计 808 页
+# files = os.listdir(os.getcwd())
+# file_list = [file for file in files if re.findall('[\u4e00-\u9fa5]', file)]
+
+for file in file_list:
+    # module_name = file[:-3].replace(".\\", "")  # 去除后缀名".py" windows
+    module_name = file[:-3].replace("./", "")  # 去除后缀名".py"  centos mac
+    if re.findall('^[\u4e00-\u9fa5]', module_name):
+        spider_param = importlib.import_module(module_name)
+        menus_list.append(spider_param.menus)
+
+
+def myfine(menus):
+    Crawl_Hndzmc().start(menus)
+
+
+def main(work=1):
+
+    logger.info(" 启动器 开启 >>> >>> ")
+    start_time = int(time.time())
+    with ThreadPoolExecutor(max_workers=work) as thp:
+
+        future_list = []
+        for menus in menus_list:
+            future = thp.submit(myfine, menus)
+            future_list.append(future)
+
+        wait(future_list)
+
+    end_time = int(time.time())
+    logger.info(f"本轮运行时间:{end_time - start_time} s")
+    logger.info(" <<< <<< 启动器 关闭 ")
+
+
+if __name__ == '__main__':
+    main(work=1)
+

+ 260 - 0
lzz_theme/hnszfcgdzmc_new/spider.py

@@ -0,0 +1,260 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 湖南省政府采购电子卖场
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+from collections import namedtuple
+from utils.RedisDB import RedisFilter
+from utils.attachment import AttachmentDownloader
+from utils.tools import *
+from parsel import Selector
+import json
+import warnings
+
+
+
+warnings.filterwarnings('ignore')
+
+
+
+class Crawl_Hndzmc:
+
+    def __init__(self):
+        self.py_spider = Mongo_client().py_spider
+        self.zb_details = self.py_spider.data_bak
+        self.RDS = RedisFilter()
+        self.real_cont = 0
+        self.proxy = get_proxy()
+        self.params = {}
+        self.cookies = {}
+        self.is_stop = False
+
+    def get_acw_sc_v2(self, html):
+        try:
+            arg1 = "".join(re.findall("arg1='(.*?)'", html))
+            if arg1:
+                js_script = '''
+                    function getAcw_sc__v2(obt_arg1) {
+                        String["prototype"]["hexXor"] = function (_0x4e08d8) {
+                            var _0x5a5d3b = '';
+                            for (var _0xe89588 = 0x0; _0xe89588 < this["length"] && _0xe89588 < _0x4e08d8["length"]; _0xe89588 += 2) {
+                                var _0x401af1 = parseInt(this["slice"](_0xe89588, _0xe89588 + 2), 16);
+                                var _0x105f59 = parseInt(_0x4e08d8["slice"](_0xe89588, _0xe89588 + 2), 16);
+                                var _0x189e2c = (_0x401af1 ^ _0x105f59)["toString"](16);
+                                if (_0x189e2c["length"] == 1) {
+                                    _0x189e2c = '0' + _0x189e2c;
+                                }
+                                _0x5a5d3b += _0x189e2c;
+                            }
+                            return _0x5a5d3b;
+                        };
+                        String["prototype"]["unsbox"] = function () {
+                            var _0x4b082b = [15, 35,29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2, 30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36];
+                            var _0x4da0dc = [];
+                            var _0x12605e = '';
+                            for (var _0x20a7bf = 0x0; _0x20a7bf < this["length"]; _0x20a7bf++) {
+                                var _0x385ee3 = this[_0x20a7bf];
+                                for (var _0x217721 = 0; _0x217721 < _0x4b082b["length"]; _0x217721++) {
+                                    if (_0x4b082b[_0x217721] == _0x20a7bf + 1) {
+                                        _0x4da0dc[_0x217721] = _0x385ee3;
+                                    }
+                                }
+                            }
+                            _0x12605e = _0x4da0dc["join"]('');
+                            return _0x12605e;
+                        };
+
+                        var _0x5e8b26 = "3000176000856006061501533003690027800375";
+                        // var arg1 = "0A5F01F50F9BC66FB28038F18B99B7B10CFF4667"
+                        var arg1 = obt_arg1
+                        var _0x23a392 = arg1["unsbox"]();
+                        arg2 = _0x23a392["hexXor"](_0x5e8b26);
+                        return arg2
+                    }
+                '''
+                ctx = execjs.compile(js_script)
+                arg2 = ctx.call('getAcw_sc__v2', arg1)
+                return {"acw_sc__v2": arg2}
+            else:
+                return {}
+        except:
+            return {}
+
+    def fetch_list_page(self, page, menu):
+        logger.debug(f' *** {menu.channel} 开始采集第{page}页 ***')
+
+        session = requests.Session()
+
+        session.proxies = get_QGIP()
+        session.verify = False
+
+        headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9",
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Content-Type": "application/json;charset=UTF-8",
+            "Origin": "https://hunan.zcygov.cn",
+            "Pragma": "no-cache",
+            "Referer": "https://hunan.zcygov.cn/bidding/result",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
+            "X-Requested-With": "XMLHttpRequest",
+        }
+
+        data = {
+            "pageSize": 100,
+            "pageNo": page,
+            "announcementTypes": menu.anTypes,
+            "district": menu.district
+        }
+        data = json.dumps(data)
+
+        retry = 0
+        while (retry := retry + 1) < 3:
+            url = "https://hunan.zcygov.cn/announcement/lobby/queryPage"
+            resp = session.post(url, headers=headers, cookies=self.cookies, data=data, timeout=20)
+            self.cookies.update(session.cookies.get_dict())
+
+            arg1_ck = self.get_acw_sc_v2(resp.text)
+
+            if "滑动验证页面" in resp.text:
+                logger.warning("滑动验证页面")
+                return None
+
+            elif arg1_ck:
+                logger.warning("arg1_ck")
+                self.cookies.update(arg1_ck)
+            else:
+                return resp
+
+        return None
+
+    def parser_list_page(self, response, page, menu):
+        results_list = []
+        info_list = response.json().get('result').get('data') or []
+        for info in info_list:
+            title = info.get('title').strip()
+            encryptId = info.get('encryptId')
+            href = f'https://hunan.zcygov.cn/luban/announcement/detail?encryptId={encryptId}&district={menu.district}'
+            create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(str(info.get("releasedAt"))[:-3])))
+
+            chl = menu.channel
+
+            area = "湖南"  # 省份
+            city = ""
+            if "自治州" in chl:
+                city = chl.split('自治州')[0] + "自治州"
+            elif "市" in chl:
+                city = chl.split('市')[0] + "市"
+
+            html = f'''<span>发布人: {info.get('checkerName')}</span>
+                        <span>来源: {info.get('orgName')}</span>
+                        <span>发布时间: {create_time}</span>{info.get('content')}'''
+
+            contenthtml = html.replace('None', '')
+            detail = cleaner(contenthtml)
+
+            pub_time = handle_publish_time(create_time)
+            publishtime = pub_time[0]
+            l_np_publishtime = pub_time[1]
+
+            dedup = [href]
+            if not self.RDS.data_filter(dedup):
+                attachments = {}
+                root = Selector(info.get('content'))
+                file_list = root.xpath('//a[@href]')
+                if file_list:
+                    ad = AttachmentDownloader()
+                    for ff in file_list:
+                        file_url = ff.xpath('./@href').extract_first()
+                        file_name = ff.xpath('./text()').extract_first("").strip()
+                        file_type = extract_file_type(file_name, file_url)
+                        if not file_type:
+                            continue
+
+                        for _ in range(3):
+                            attachment = ad.fetch_attachment(
+                                file_name=file_name,
+                                file_type=file_type,
+                                download_url=file_url,
+                                proxies=self.proxy
+                            )
+                            if attachment.__contains__("fid"):
+                                attachments[str(len(attachments) + 1)] = attachment
+                                break
+                            self.proxy = get_proxy()
+
+
+                item = {
+                    "site": "湖南省政府采购电子卖场",
+                    "channel": menu.channel,
+                    "spidercode": menu.spidercode,
+                    "area": area,
+                    "city": city,
+                    "district": "",
+                    "href": href,
+                    "title": title,
+                    "s_title": title,
+                    "publishtime": publishtime,
+                    "l_np_publishtime": l_np_publishtime,
+                    "is_mixed": True,
+                    "comeintime": int2long(int(time.time())),
+                    "contenthtml": contenthtml,
+                    "detail": detail,
+                    "iscompete": True,
+                    "sendflag": "false",
+                    "T": "bidding",
+                    "infoformat": 1,
+                    "type": "",
+                    "publishdept": "",
+                    "_d": "comeintime",
+                }
+
+                if attachments:
+                    item['projectinfo'] = {"attachments": attachments}
+
+                self.zb_details.insert_one(item)
+                self.RDS.data_save_redis(dedup)
+                results_list.append(item)
+
+        logger.info(f' *** {menu.channel}_{menu.anTypes}_第{page}页 采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
+        if len(results_list) < 10 or len(info_list) < 100:
+            self.is_stop = True
+        return results_list
+
+    def crawl_list_spider(self, page, menu):
+        retry = 0
+        while (retry := retry + 1) < 10:
+            try:
+                logger.debug(f"{menu.channel}_第{page}页 start")
+                response = self.fetch_list_page(page=page, menu=menu)
+                if response is not None and response.status_code == 200:
+                    informations = self.parser_list_page(response=response, page=page, menu=menu)
+                    crawl_num = len(informations)
+                    self.real_cont += crawl_num
+                    logger.info(f"{menu.channel}_第 {page} 页 end, 当前已采集 {self.real_cont} 条数据")
+                    time.sleep(random.randint(1,4))
+                    return
+                else:
+                    time.sleep(2)
+
+            except Exception as e:
+                logger.error(e)
+                time.sleep(2)
+
+
+    def start(self, menus):
+        for menu in menus:
+            crawl_page = menu.crawl_page
+            for page in range(1, crawl_page + 1):
+                self.crawl_list_spider(page, menu)
+                if self.is_stop:
+                    break
+            self.is_stop = False

+ 4 - 0
lzz_theme/hnszfcgdzmc_new/start.sh

@@ -0,0 +1,4 @@
+#!/bin/bash
+
+ps -ef |grep "hn_new_collector.py" |grep -v grep |awk '{print $2}' |xargs kill -9
+nohup python3 hn_new_collector.py > /dev/null 2>&1 &

+ 21 - 0
lzz_theme/hnszfcgdzmc_new/异常公告.py

@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 异常公告
+---------
+@author: Lzz
+"""
+from collections import namedtuple
+from spider import Crawl_Hndzmc
+
+Menu = namedtuple('Menu', ['channel', 'spidercode', 'district', 'anTypes', 'crawl_page'])
+
+announcementTypes = [8015]
+
+menus = [
+    Menu('长沙高新技术产业开发区隆平高科技园管理委员会-公告大厅', 'hb_hnszfcgdzmc_csgxjscykfqlpgkjyglwyh_ggdt', '430183', announcementTypes, 1),
+    Menu('湘潭岳塘经济技术开发区-公告大厅', 'hb_hnszfcgdzmc_xtytjjjskfq_ggdt', '430313', announcementTypes, 1),
+]
+
+# Crawl_Hndzmc().start_list(menus)

+ 21 - 0
lzz_theme/hnszfcgdzmc_new/成交公告.py

@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 成交公告
+---------
+@author: Lzz
+"""
+from collections import namedtuple
+from spider import Crawl_Hndzmc
+
+Menu = namedtuple('Menu', ['channel', 'spidercode', 'district', 'anTypes', 'crawl_page'])
+
+announcementTypes = [8020, 8025, 8026, 8013]
+
+menus = [
+    Menu('长沙高新技术产业开发区隆平高科技园管理委员会-公告大厅', 'hb_hnszfcgdzmc_csgxjscykfqlpgkjyglwyh_ggdt', '430183', announcementTypes, 1),
+    Menu('湘潭岳塘经济技术开发区-公告大厅', 'hb_hnszfcgdzmc_xtytjjjskfq_ggdt', '430313', announcementTypes, 1),
+]
+
+# Crawl_Hndzmc().start_list(menus)

+ 21 - 0
lzz_theme/hnszfcgdzmc_new/终止公告.py

@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 终止公告
+---------
+@author: Lzz
+"""
+from collections import namedtuple
+from spider import Crawl_Hndzmc
+
+Menu = namedtuple('Menu', ['channel', 'spidercode', 'district', 'anTypes', 'crawl_page'])
+
+announcementTypes = [7002, 8022]
+
+menus = [
+    Menu('长沙高新技术产业开发区隆平高科技园管理委员会-公告大厅', 'hb_hnszfcgdzmc_csgxjscykfqlpgkjyglwyh_ggdt', '430183', announcementTypes, 1),
+    Menu('湘潭岳塘经济技术开发区-公告大厅', 'hb_hnszfcgdzmc_xtytjjjskfq_ggdt', '430313', announcementTypes, 1),
+]
+
+# Crawl_Hndzmc().start_list(menus)

+ 21 - 0
lzz_theme/hnszfcgdzmc_new/邀请公告.py

@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 邀请公告
+---------
+@author: Lzz
+"""
+from collections import namedtuple
+from spider import Crawl_Hndzmc
+
+Menu = namedtuple('Menu', ['channel', 'spidercode', 'district', 'anTypes', 'crawl_page'])
+
+announcementTypes = [10017, 8018, 8023, 8024]
+
+menus = [
+    Menu('长沙高新技术产业开发区隆平高科技园管理委员会-公告大厅', 'hb_hnszfcgdzmc_csgxjscykfqlpgkjyglwyh_ggdt', '430183', announcementTypes, 1),
+    Menu('湘潭岳塘经济技术开发区-公告大厅', 'hb_hnszfcgdzmc_xtytjjjskfq_ggdt', '430313', announcementTypes, 1),
+]
+
+# Crawl_Hndzmc().start_list(menus)

+ 22 - 0
lzz_theme/hnszfcgdzmc_new/验收公告.py

@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-07-24
+---------
+@summary: 验收公告 - 湘潭市
+---------
+@author: Lzz
+"""
+from collections import namedtuple
+from spider import Crawl_Hndzmc
+
+Menu = namedtuple('Menu', ['channel', 'spidercode', 'district', 'anTypes', 'crawl_page'])
+
+announcementTypes = [8016, 8021]
+
+menus = [
+    Menu('长沙高新技术产业开发区隆平高科技园管理委员会-公告大厅', 'hb_hnszfcgdzmc_csgxjscykfqlpgkjyglwyh_ggdt', '430183', announcementTypes, 1),
+    Menu('湘潭岳塘经济技术开发区-公告大厅', 'hb_hnszfcgdzmc_xtytjjjskfq_ggdt', '430313', announcementTypes, 1),
+
+]
+
+# Crawl_Hndzmc().start_list(menus)

+ 1 - 1
lzz_theme/htdzcgpt/htdz_login.py

@@ -4,7 +4,7 @@ import time
 import requests
 from hashlib import md5
 from utils.get_imgcode import chaojiying_platform
-from loguru import logger
+from utils.log import logger
 import json
 
 

+ 12 - 12
lzz_theme/htdzcgpt/start.sh

@@ -13,16 +13,16 @@ ps -ef |grep "htdz_zbgg_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "htdz_zbhxrgs_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "htdz_zbhxrgs_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 htdz_bggg_list.py > log/htdz_bggg_list.out 2>&1 &
-nohup python3 htdz_cjgg_details.py > log/htdz_cjgg_details.out 2>&1 &
-nohup python3 htdz_cjgg_list.py > log/htdz_cjgg_list.out 2>&1 &
-nohup python3 htdz_jzxtp_details.py > log/htdz_jzxtp_details.out 2>&1 &
-nohup python3 htdz_jzxtp_list.py > log/htdz_jzxtp_list.out 2>&1 &
-nohup python3 htdz_qtcg_list.py > log/htdz_qtcg_list.out 2>&1 &
-nohup python3 htdz_xjgg_details.py > log/htdz_xjgg_details.out 2>&1 &
-nohup python3 htdz_xjgg_list.py > log/htdz_xjgg_list.out 2>&1 &
-nohup python3 htdz_zbgg_details.py > log/htdz_zbgg_details.out 2>&1 &
-nohup python3 htdz_zbgg_list.py > log/htdz_zbgg_list.out 2>&1 &
-nohup python3 htdz_zbhxrgs_details.py > log/htdz_zbhxrgs_details.out 2>&1 &
-nohup python3 htdz_zbhxrgs_list.py > log/htdz_zbhxrgs_list.out 2>&1 &
+nohup python3 htdz_bggg_list.py > /dev/null 2>&1 &
+nohup python3 htdz_cjgg_details.py > /dev/null 2>&1 &
+nohup python3 htdz_cjgg_list.py > /dev/null 2>&1 &
+nohup python3 htdz_jzxtp_details.py > /dev/null 2>&1 &
+nohup python3 htdz_jzxtp_list.py > /dev/null 2>&1 &
+nohup python3 htdz_qtcg_list.py > /dev/null 2>&1 &
+nohup python3 htdz_xjgg_details.py > /dev/null 2>&1 &
+nohup python3 htdz_xjgg_list.py > /dev/null 2>&1 &
+nohup python3 htdz_zbgg_details.py > /dev/null 2>&1 &
+nohup python3 htdz_zbgg_list.py > /dev/null 2>&1 &
+nohup python3 htdz_zbhxrgs_details.py > /dev/null 2>&1 &
+nohup python3 htdz_zbhxrgs_list.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/jsxmhjyxdjbbaxt/det_start.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 ps -ef |grep "jsxmhjyx_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 jsxmhjyx_details.py > log/jsxmhjyx_details.out 2>&1 &
+nohup python3 jsxmhjyx_details.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/jsxmhjyxdjbbaxt/rstart.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 ps -ef |grep "jsxmhjyx_retry_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 jsxmhjyx_retry_list.py > log/jsxmhjyx_retry_list.out 2>&1 &
+nohup python3 jsxmhjyx_retry_list.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/jsxmhjyxdjbbaxt/start.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 ps -ef |grep "jsxmhjyx_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 jsxmhjyx_list.py > log/jsxmhjyx_list.out 2>&1 &
+nohup python3 jsxmhjyx_list.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/lcdzcgpt/lcdz_login.py

@@ -1,7 +1,7 @@
 import json
 import requests
 import execjs
-from loguru import logger
+from utils.log import logger
 
 import warnings
 

+ 1 - 1
lzz_theme/lcdzcgpt/start.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 ps -ef |grep "lcdz_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 lcdz_list.py > log/lcdz_list.out 2>&1 &
+nohup python3 lcdz_list.py > /dev/null 2>&1 &
 

+ 7 - 7
lzz_theme/qgzbgggsssyq/start.sh

@@ -5,14 +5,14 @@
 #ps -ef |grep "py_ssyq_details2.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 #ps -ef |grep "py_ssyq_details3.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 #ps -ef |grep "py_ssyq_details4.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-#nohup python3 py_ssyq_list.py > log/py_ssyq_list.out 2>&1 &
-#nohup python3 py_ssyq_details.py > log/py_ssyq_details.out 2>&1 &
-#nohup python3 py_ssyq_details2.py > log/py_ssyq_details2.out 2>&1 &
-#nohup python3 py_ssyq_details3.py > log/py_ssyq_details3.out 2>&1 &
-#nohup python3 py_ssyq_details4.py > log/py_ssyq_details4.out 2>&1 &
+#nohup python3 py_ssyq_list.py > /dev/null 2>&1 &
+#nohup python3 py_ssyq_details.py > /dev/null 2>&1 &
+#nohup python3 py_ssyq_details2.py > /dev/null 2>&1 &
+#nohup python3 py_ssyq_details3.py > /dev/null 2>&1 &
+#nohup python3 py_ssyq_details4.py > /dev/null 2>&1 &
 
 
 ps -ef |grep "spider_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9 2>/dev/null
 ps -ef |grep "spider_detail.py" |grep -v grep |awk '{print $2}' |xargs kill -9 2>/dev/null
-nohup python3 spider_list.py > log/spider_list.out 2>&1 &
-nohup python3 spider_detail.py > log/spider_detail.out 2>&1 &
+nohup python3 spider_list.py > /dev/null 2>&1 &
+nohup python3 spider_detail.py > /dev/null 2>&1 &

+ 5 - 3
lzz_theme/qgzbgggsssyq/start_spider.sh

@@ -106,6 +106,8 @@ class Details:
                     rr = self.detail_get(response, item=item)
                     if rr and rr == "500":
                         return "500"
+                    self.count += 1
+                    time.sleep(random.randint(5, 10))
                     return True
                 else:
                     retry_times += 1
@@ -165,13 +167,11 @@ class Details:
             update_id = item["_id"]
             result = self.deal_request(item)
             if result is True:
-                self.count += 1
                 self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
             elif result == "500":
                 break
             else:
                 self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True}})
-            time.sleep(random.randint(5, 10))
 
         total_count += self.count
         new_info = {
@@ -187,4 +187,6 @@ class Details:
 
 
 if __name__ == "__main__":
-    Details().start(limit=random.randint(60,100))
+    Details().start(limit=200)
+
+

+ 1 - 4
lzz_theme/qjwqzbcgxxw/start.sh

@@ -1,8 +1,5 @@
 #!/bin/bash
 
 ps -ef |grep "qjwqzb_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-ps -ef |grep "qjwqzb_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-
-nohup python3 qjwqzb_list.py > log/qjwqzb_list.out 2>&1 &
-nohup python3 qjwqzb_details.py > log/qjwqzb_details.out 2>&1 &
+nohup python3 qjwqzb_list.py > /dev/null 2>&1 &
 

+ 12 - 88
lzz_theme/requirements.txt

@@ -1,91 +1,15 @@
-aliyun-python-sdk-core==2.14.0
-aliyun-python-sdk-kms==2.16.2
-annotated-types==0.7.0
-anyio==4.4.0
-Brotli==1.1.0
-certifi==2023.11.17
-cffi==1.16.0
-chardet==3.0.4
-charset-normalizer==3.3.2
-click==8.1.7
-copyheaders==0.0.2
-crcmod==1.7
-cryptography==36.0.1
-cssselect==1.2.0
-cycler==0.12.1
-dnspython==2.6.1
-elasticsearch==7.8.0
-email_validator==2.2.0
-et-xmlfile==1.1.0
-exceptiongroup==1.2.2
-fake-useragent==1.5.1
-fastapi==0.111.1
-fastapi-cli==0.0.4
-fonttools==4.47.2
-h11==0.14.0
-hpack==4.0.0
-httpcore==1.0.5
-httptools==0.6.1
-httpx==0.27.0
-idna==2.8
-importlib_resources==6.4.5
-itchat==1.3.10
-Jinja2==3.1.4
-jmespath==0.10.0
-kiwisolver==1.4.5
-loguru==0.5.3
-lxml==4.6.3
-markdown-it-py==3.0.0
-MarkupSafe==2.1.5
-matplotlib==3.5.1
-mdurl==0.1.2
-numpy==1.21.5
-opencv-python==4.6.0.66
-openpyxl==3.1.1
+elasticsearch~=7.10.1
+loguru==0.6.0
+lxml~=5.3.0
+numpy~=1.24.4
 oss2==2.14.0
-packaging==23.2
-parsel==1.6.0
-Pillow==9.5.0
-pyasn1==0.5.1
-pycparser==2.21
-pycryptodome==3.20.0
-pydantic==2.8.2
-pydantic_core==2.20.1
+parsel~=1.7.0
+Pillow~=10.4.0
 PyExecJS==1.5.1
-Pygments==2.18.0
-pyhttpx==2.10.12
-pymongo==3.10.1
-pyOpenSSL==21.0.0
-pyparsing==3.1.1
-pypng==0.20220715.0
-PyQRCode==1.2.1
-PySocks==1.7.1
-python-dateutil==2.8.2
-python-dotenv==1.0.1
-python-multipart==0.0.9
+pymongo~=3.12.0
 pytz==2024.2
-PyYAML==6.0.1
-redis==3.3.6
-requests==2.31.0
-requests-toolbelt==1.0.0
-rich==13.7.1
-rsa==4.8
-selenium==3.141.0
-shellingham==1.5.4
-six==1.16.0
-sniffio==1.3.1
-starlette==0.37.2
-tls-client==1.0.1
-tqdm==4.64.0
-typer==0.12.3
-typing_extensions==4.12.1
-urllib3==1.26.18
-uvicorn==0.30.1
-uvloop==0.19.0
-w3lib==2.1.2
-watchfiles==0.22.0
-websockets==12.0
-xlrd==2.0.1
-xlutils==2.0.0
-xlwt==1.3.0
-zipp==3.20.2
+redis~=3.5.3
+requests~=2.28.1
+tqdm~=4.67.1
+urllib3~=1.26.13
+fake-useragent~=2.0.0

+ 1 - 1
lzz_theme/sfc/sfc_cookies.txt

@@ -1 +1 @@
-eyJhbGciOiJIUzI1NiJ9.eyJ1c2VyQ29udGV4dCI6IntcInVzZXJuYW1lXCI6XCJ0b3AxMjNcIixcIm5pY2tOYW1lXCI6XCLlvKDph5HlnaRcIixcImZhY2VcIjpcImdyb3VwMS9NMDAvMDUvMDQvd0tnQWNXZHpTeVNBUGhJc0FBQlVQVS16UUtNNy5uLmpwZyxncm91cDEvTTAwLzBGLzRDL3dLZ0FjR2R6U3ktQUlQSkRBQUE3bERVR3kyMDIubi5wbmdcIixcImlkXCI6XCIxODcxODU2NjE0MzQ2NjIwOTI4XCIsXCJsb25nVGVybVwiOmZhbHNlLFwicm9sZVwiOlwiTUVNQkVSXCIsXCJzdG9yZUlkXCI6XCIxODcxODYyOTM4MzE5ODU5NzE0XCIsXCJjbGVya0lkXCI6XCIxODczOTIzMjI4MDE2NDUxNTg2XCIsXCJzdG9yZU5hbWVcIjpcIuays-WNl-aLk-aZruiuoeeul-acuue9kee7nOW3peeoi-aciemZkOWFrOWPuFwiLFwiY29tcGFueVNjYWxlXCI6XCJtZWR1aW1cIixcImNvbXBhbnlOYW1lXCI6XCLmsrPljZfmi5Pmma7orqHnrpfmnLrnvZHnu5zlt6XnqIvmnInpmZDlhazlj7hcIixcImlzU3VwZXJcIjp0cnVlLFwiYXJlYUNvZGVcIjpcIjQxMDEwNVwiLFwibGFzdExvZ2luRGF0ZVwiOlwiQXByIDI5LCAyMDI1IDM6MTE6MTggUE1cIixcInJlbW90ZUlwXCI6XCIxMDEuMjAwLjIwOS4xMVwifSIsInN1YiI6InRvcDEyMyIsImV4cCI6MTc0NTk4OTg3MX0.zyZFe5qkUIijvGqNqrclaE47JbG6HEdi5qy5HQ2Cdj0
+eyJhbGciOiJIUzI1NiJ9.eyJ1c2VyQ29udGV4dCI6IntcInVzZXJuYW1lXCI6XCJ0b3AxMjNcIixcIm5pY2tOYW1lXCI6XCLlvKDph5HlnaRcIixcImZhY2VcIjpcImdyb3VwMS9NMDAvMDUvMDQvd0tnQWNXZHpTeVNBUGhJc0FBQlVQVS16UUtNNy5uLmpwZyxncm91cDEvTTAwLzBGLzRDL3dLZ0FjR2R6U3ktQUlQSkRBQUE3bERVR3kyMDIubi5wbmdcIixcImlkXCI6XCIxODcxODU2NjE0MzQ2NjIwOTI4XCIsXCJsb25nVGVybVwiOmZhbHNlLFwicm9sZVwiOlwiTUVNQkVSXCIsXCJzdG9yZUlkXCI6XCIxODcxODYyOTM4MzE5ODU5NzE0XCIsXCJjbGVya0lkXCI6XCIxODczOTIzMjI4MDE2NDUxNTg2XCIsXCJzdG9yZU5hbWVcIjpcIuays-WNl-aLk-aZruiuoeeul-acuue9kee7nOW3peeoi-aciemZkOWFrOWPuFwiLFwiY29tcGFueVNjYWxlXCI6XCJtZWR1aW1cIixcImNvbXBhbnlOYW1lXCI6XCLmsrPljZfmi5Pmma7orqHnrpfmnLrnvZHnu5zlt6XnqIvmnInpmZDlhazlj7hcIixcImlzU3VwZXJcIjp0cnVlLFwiYXJlYUNvZGVcIjpcIjQxMDEwNVwiLFwibGFzdExvZ2luRGF0ZVwiOlwiQXVnIDIwLCAyMDI1IDk6MTE6MTUgQU1cIixcInJlbW90ZUlwXCI6XCIxMDEuMjAwLjIwOS4xMVwifSIsInN1YiI6InRvcDEyMyIsImV4cCI6MTc1NTY1OTQ4M30.PeMykQTB4MkPGZibX-0UK85XojWevTje4z240gaxheY

+ 1 - 1
lzz_theme/sfc/sfc_uuid.txt

@@ -1 +1 @@
-5244fd4b-b1c2-4f66-98e2-d32175e29a7e
+d6f9b862-82eb-499a-981d-0666d36a5477

BIN
lzz_theme/sfc/slice.png


+ 9 - 9
lzz_theme/sfc/start.sh

@@ -10,13 +10,13 @@ ps -ef |grep "sfc_htgg_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "sfc_zzgg_detail.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "sfc_zzgg_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 sfc_cjgg_detail.py > log/sfc_cjgg_detail.out 2>&1 &
-nohup python3 sfc_cjgg_list.py > log/sfc_cjgg_list.out 2>&1 &
-nohup python3 sfc_gkbx_list.py > log/sfc_gkbx_list.out 2>&1 &
-nohup python3 sfc_gzgg_detail.py > log/sfc_gzgg_detail.out 2>&1 &
-nohup python3 sfc_gzgg_list.py > log/sfc_gzgg_list.out 2>&1 &
-nohup python3 sfc_htgg_detail.py > log/sfc_htgg_detail.out 2>&1 &
-nohup python3 sfc_htgg_list.py > log/sfc_htgg_list.out 2>&1 &
-nohup python3 sfc_zzgg_detail.py > log/sfc_zzgg_detail.out 2>&1 &
-nohup python3 sfc_zzgg_list.py > log/sfc_zzgg_list.out 2>&1 &
+nohup python3 sfc_cjgg_detail.py > /dev/null 2>&1 &
+nohup python3 sfc_cjgg_list.py > /dev/null 2>&1 &
+nohup python3 sfc_gkbx_list.py > /dev/null 2>&1 &
+nohup python3 sfc_gzgg_detail.py > /dev/null 2>&1 &
+nohup python3 sfc_gzgg_list.py > /dev/null 2>&1 &
+nohup python3 sfc_htgg_detail.py > /dev/null 2>&1 &
+nohup python3 sfc_htgg_list.py > /dev/null 2>&1 &
+nohup python3 sfc_zzgg_detail.py > /dev/null 2>&1 &
+nohup python3 sfc_zzgg_list.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/sgycw/sgycw_login.py

@@ -3,7 +3,7 @@ import json
 import requests
 from utils.chaojiying import postpic, report_error
 from hashlib import md5
-from loguru import logger
+from utils.log import logger
 
 import warnings
 

+ 2 - 2
lzz_theme/sgycw/start.sh

@@ -3,5 +3,5 @@
 ps -ef |grep "sgycw_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "sgycw_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 sgycw_list.py > log/sgycw_list.out 2>&1 &
-nohup python3 sgycw_details.py > log/sgycw_details.out 2>&1 &
+nohup python3 sgycw_list.py > /dev/null 2>&1 &
+nohup python3 sgycw_details.py > /dev/null 2>&1 &

+ 2 - 2
lzz_theme/szycycgpt/start.sh

@@ -3,5 +3,5 @@
 ps -ef |grep "szyc_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "szyc_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 szyc_list.py > log/szyc_list.out 2>&1 &
-nohup python3 szyc_details.py > log/szyc_details.out 2>&1 &
+nohup python3 szyc_list.py > /dev/null 2>&1 &
+nohup python3 szyc_details.py > /dev/null 2>&1 &

+ 3 - 3
lzz_theme/tjszfcgw/start.sh

@@ -3,6 +3,6 @@
 ps -ef |grep "tjszfcgw_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "tjszfcgw_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "tjszfcgw_details2.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 tjszfcgw_list.py > log/tjszfcgw_list.out 2>&1 &
-nohup python3 tjszfcgw_details.py > log/tjszfcgw_details.out 2>&1 &
-nohup python3 tjszfcgw_details2.py > log/tjszfcgw_details2.out 2>&1 &
+nohup python3 tjszfcgw_list.py > /dev/null 2>&1 &
+nohup python3 tjszfcgw_details.py > /dev/null 2>&1 &
+nohup python3 tjszfcgw_details2.py > /dev/null 2>&1 &

+ 32 - 33
lzz_theme/utils/tools.py

@@ -20,10 +20,10 @@ import bson
 import execjs
 import redis
 import requests
-from loguru import logger
 from pymongo import MongoClient
 
 from utils.clean_html import cleaner
+from utils.log import logger
 
 try:
     from pymongo.errors import DuplicateKeyError
@@ -61,61 +61,60 @@ def nsssjss():
     return njs
 
 
-def get_QGIP():
+def get_pay_proxy():
     proxy = "http://6278CF0D:41D9C796172D@tun-vdpzuj.qg.net:15254"
-    proxies = {
-        "http": proxy,
-        "https": proxy,
-    }
-    return proxies
+    return {"http": proxy, "https": proxy}
+
+get_QGIP = get_pay_proxy
 
 
 def get_proxy(scheme=None, default=None, socks5h=False):
-    headers = {
-        "Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"
-    }
+    url = "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch"
+    headers = {"Authorization": "Basic amlhbnl1MDAxOjEyM3F3ZSFB"}
     while True:
-        proxy = requests.get("http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch", headers=headers).json()
-        # proxy = requests.get("http://39.106.157.58:1405/crawl/proxy/socks5/fetch", headers=headers).json()
-        proxies = proxy.get("data")
+        try:
+            response = requests.get(url, headers=headers).json()
+            response.raise_for_status()
+        except requests.exceptions.RequestException as e:
+            logger.error(f"代理获取失败 | {type(e).__name__} | {e}")
+            raise e
+
+        proxies = response.get("data")
         if proxies:
             break
         else:
             logger.warning("暂无代理...")
             time.sleep(3)
+
     if socks5h:
-        proxyh = {
+        proxies = {
             "http": proxies.get("http").replace("socks5", "socks5h"),
             "https": proxies.get("http").replace("socks5", "socks5h")
         }
-        proxies = proxyh
-    logger.info(f"切换代理: {proxies}")
+
+    logger.info(f"提取代理 | {proxies}")
     if not scheme:
         return proxies
     else:
         return proxies.get(scheme, default)
 
 
-def Mongo_client():
-    client = MongoClient("172.17.4.87", 27080)
-    # client = MongoClient("172.20.45.130", 27017)
-    return client
+def Mongo_client(env=None):
+    kwargs = dict(host="172.20.47.168", port=27080)
+    if env == "test":
+        kwargs = dict(host="172.20.45.130", port=27017)
+    return MongoClient(**kwargs)
+
 
+def Redis_client(env=None):
+    connection_kwargs = dict(host='172.17.162.28', password='k5ZJR5KV4q7DRZ92DQ', port=7361, db=1)
+    if env == "test":
+       connection_kwargs = dict(host='172.20.45.129', password='jianyu@python', port=3379, db=1)
 
-def Redis_client():
-    _pool = redis.ConnectionPool(
-        host='172.17.162.28',
-        port=7361,
-        password='k5ZJR5KV4q7DRZ92DQ',
-        db=1
+    r = redis.Redis(
+        connection_pool=redis.ConnectionPool(**connection_kwargs),
+        decode_responses=True
     )
-    # _pool = redis.ConnectionPool(
-    #     host='172.20.45.129',
-    #     password='jianyu@python',
-    #     port=3379,
-    #     db=1
-    # )
-    r = redis.Redis(connection_pool=_pool, decode_responses=True)
     return r
 
 

+ 0 - 6
lzz_theme/xgyyglj/start.sh

@@ -1,6 +0,0 @@
-#!/bin/bash
-
-ps -ef |grep "xgyy_spider_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-ps -ef |grep "xgyy_spider_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 xgyy_spider_list.py > log/xgyy_spider_list.out 2>&1 &
-nohup python3 xgyy_spider_details.py > log/xgyy_spider_details.out 2>&1 &

+ 0 - 1
lzz_theme/xgyyglj/translate/baidufanyi_ck.json

@@ -1 +0,0 @@
-{"BAIDUID": "29CB21429A9FDF497FA142316CB8CF81:FG=1", "BAIDUID_BFESS": "29CB21429A9FDF497FA142316CB8CF81:FG=1", "token": "d26398671e647f777691ad5b615d966a"}

+ 0 - 68
lzz_theme/xgyyglj/translate/baidutrans.js

@@ -1,68 +0,0 @@
-const jsdom = require("jsdom");
-const {JSDOM} = jsdom;
-const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
-window = dom.window;
-document = window.document;
-window.gtk = "320305.131321201"
-
-function e(t, e) {
-    (null == e || e > t.length) && (e = t.length);
-    for (var n = 0, r = new Array(e); n < e; n++)
-        r[n] = t[n];
-    return r
-}
-function n(t, e) {
-    for (var n = 0; n < e.length - 2; n += 3) {
-        var r = e.charAt(n + 2);
-        r = "a" <= r ? r.charCodeAt(0) - 87 : Number(r),
-        r = "+" === e.charAt(n + 1) ? t >>> r : t << r,
-        t = "+" === e.charAt(n) ? t + r & 4294967295 : t ^ r
-    }
-    return t
-}
-var r = null;
-function get_sign(t) {
-    var o, i = t.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
-    if (null === i) {
-        var a = t.length;
-        a > 30 && (t = "".concat(t.substr(0, 10)).concat(t.substr(Math.floor(a / 2) - 5, 10)).concat(t.substr(-10, 10)))
-    } else {
-        for (var s = t.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), c = 0, l = s.length, u = []; c < l; c++)
-            "" !== s[c] && u.push.apply(u, function(t) {
-                if (Array.isArray(t))
-                    return e(t)
-            }(o = s[c].split("")) || function(t) {
-                if ("undefined" != typeof Symbol && null != t[Symbol.iterator] || null != t["@@iterator"])
-                    return Array.from(t)
-            }(o) || function(t, n) {
-                if (t) {
-                    if ("string" == typeof t)
-                        return e(t, n);
-                    var r = Object.prototype.toString.call(t).slice(8, -1);
-                    return "Object" === r && t.constructor && (r = t.constructor.name),
-                    "Map" === r || "Set" === r ? Array.from(t) : "Arguments" === r || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r) ? e(t, n) : void 0
-                }
-            }(o) || function() {
-                throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")
-            }()),
-            c !== l - 1 && u.push(i[c]);
-        var p = u.length;
-        p > 30 && (t = u.slice(0, 10).join("") + u.slice(Math.floor(p / 2) - 5, Math.floor(p / 2) + 5).join("") + u.slice(-10).join(""))
-    }
-    for (var d = "".concat(String.fromCharCode(103)).concat(String.fromCharCode(116)).concat(String.fromCharCode(107)), h = (null !== r ? r : (r = window.gtk || "") || "").split("."), f = Number(h[0]) || 0, m = Number(h[1]) || 0, g = [], y = 0, v = 0; v < t.length; v++) {
-        var _ = t.charCodeAt(v);
-        _ < 128 ? g[y++] = _ : (_ < 2048 ? g[y++] = _ >> 6 | 192 : (55296 == (64512 & _) && v + 1 < t.length && 56320 == (64512 & t.charCodeAt(v + 1)) ? (_ = 65536 + ((1023 & _) << 10) + (1023 & t.charCodeAt(++v)),
-        g[y++] = _ >> 18 | 240,
-        g[y++] = _ >> 12 & 63 | 128) : g[y++] = _ >> 12 | 224,
-        g[y++] = _ >> 6 & 63 | 128),
-        g[y++] = 63 & _ | 128)
-    }
-    for (var b = f, w = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(97)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(54)), k = "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(51)) + "".concat(String.fromCharCode(94)).concat(String.fromCharCode(43)).concat(String.fromCharCode(98)) + "".concat(String.fromCharCode(43)).concat(String.fromCharCode(45)).concat(String.fromCharCode(102)), x = 0; x < g.length; x++)
-        b = n(b += g[x], w);
-    return b = n(b, k),
-    (b ^= m) < 0 && (b = 2147483648 + (2147483647 & b)),
-    "".concat((b %= 1e6).toString(), ".").concat(b ^ f)
-}
-
-
-console.log(get_sign('[PleasecontactMsKylieNGat23007465onproblemswithdownloading]'))

+ 0 - 115
lzz_theme/xgyyglj/translate/bd_translate.py

@@ -1,115 +0,0 @@
-import requests
-import json
-import execjs
-import re
-import time
-
-
-class BD_spider():
-
-    def get_js(self, b):
-        with open("translate/baidutrans.js", "r") as f:
-            baidu = f.read()
-        bdjs = execjs.compile(baidu)
-        sign = bdjs.call("get_sign", b)
-        return sign
-
-    def create_token(self, proxies=False):
-        session = requests.session()
-        session.proxies = proxies
-
-        headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "Accept-Language": "zh-CN,zh;q=0.9",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Pragma": "no-cache",
-            "Referer": "https://fanyi.baidu.com/",
-            "Upgrade-Insecure-Requests": "1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-        }
-
-        url = "https://fanyi.baidu.com/"
-        res = session.get(url, headers=headers, timeout=30)
-
-        response = session.get(url, headers=headers, timeout=30)
-
-        token = "".join(re.findall("token: '(.*?)'", response.text, re.S))
-        cookies = session.cookies.get_dict()
-        cookies["token"] = token
-
-        with open('translate/baidufanyi_ck.json', 'w+') as f:
-            f.write(json.dumps(cookies))
-
-        return cookies
-
-    def baidu(self, b, proxies=False):
-        retry = 0
-        while retry < 3:
-            try:
-                session = requests.session()
-                session.proxies = proxies
-
-                with open('translate/baidufanyi_ck.json', 'r') as f:
-                    pre_cookie = f.read()
-
-                cookies = json.loads(pre_cookie)
-                token = cookies.pop('token',None)
-
-                headers = {
-                    "Accept": "*/*",
-                    "Accept-Language": "zh-CN,zh;q=0.9",
-                    "Cache-Control": "no-cache",
-                    "Connection": "keep-alive",
-                    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
-                    "Origin": "https://fanyi.baidu.com",
-                    "Pragma": "no-cache",
-                    "Referer": "https://fanyi.baidu.com/",
-                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-                    "X-Requested-With": "XMLHttpRequest",
-                }
-
-                url_f = "https://fanyi.baidu.com/langdetect"
-                data_f = {
-                    "query": b
-                }
-                res_f = session.post(url_f, headers=headers, data=data_f, timeout=30)
-                fr = res_f.json().get('lan')
-                if fr == "zh":
-                    to = "en"
-                else:
-                    to = "zh"
-                url = f"https://fanyi.baidu.com//v2transapi?from={fr}&to={to}"
-
-                data = {
-                    "from": fr,
-                    "to": to,
-                    "query": b,
-                    "transtype": "realtime",
-                    "simple_means_flag": "3",
-                    "sign": self.get_js(b),
-                    "token": f"{token}",
-                    "domain": "common",
-                    # "ts": f"{int(time.time()*1000)}"
-                }
-
-                response = session.post(url, data=data, headers=headers, cookies=cookies, timeout=30)
-                time.sleep(1)
-                respon = response.content.decode()
-                resp = json.loads(respon)
-                jieguo = resp['trans_result']['data'][0]["dst"]
-                # print("翻译结果:", jieguo)
-                return jieguo
-            except Exception as e:
-                print(f"{b} 翻译错误:",e)
-                retry += 1
-                time.sleep(5)
-                self.create_token(proxies)
-
-
-        return ""
-
-
-# if __name__ == '__main__':
-#     b = input("请输入您要翻译的单词:" + "\n")
-#     BD_spider().baidu(b)

+ 0 - 177
lzz_theme/xgyyglj/xgyy_spider_details.py

@@ -1,177 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on 2024-01-23
----------
-@summary: 香港医院管理局 - 详情页
----------
-@author: Lzz
-"""
-import sys
-import os
-
-from pymongo.errors import DuplicateKeyError
-
-sys.path.append(os.path.dirname(os.getcwd()))
-import random
-from utils.clean_html import cleaner
-from threading import Timer
-from parsel import Selector
-from utils.tools import *
-from translate.bd_translate import BD_spider
-import warnings
-
-warnings.filterwarnings('ignore')
-
-
-class Details:
-
-    def __init__(self):
-        self.proxy = False
-        self.redis_key = "xgyyglj_tendernotices"
-        self.db_table = Mongo_client().py_spider
-        self.db_name = self.db_table.theme_list
-        # self.zt_details = self.db_table.data_bak
-        self.zt_details = self.db_table.xgyyglj_EN
-        self.rds = Redis_client()
-        self.delete_key = ""
-        self.end_time = 0
-        self.headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "Accept-Language": "zh-CN,zh;q=0.9",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Pragma": "no-cache",
-            "Upgrade-Insecure-Requests": "1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-        }
-
-    def trans_html(self,contenthtml):
-
-        rr = Selector(contenthtml).xpath('//text()').extract()
-        new_data_list = []
-
-        for info in rr:
-            info = info.replace('\r', '').replace('\n', '').replace('\t', '').strip()
-            if info:
-                if re.search('\w', info):
-                    # tran_info = BD_spider().baidu(info)
-                    new_data_list.append(info)
-                else:
-                    new_data_list.append(info)
-
-        CH_html = " ".join(new_data_list)
-
-        return CH_html
-
-
-    def detail_get(self, response, item):
-        response.encoding = response.apparent_encoding
-        root = Selector(text=response.text)
-
-        html = root.xpath('//table[@class="xl655357677"]').extract_first("").strip()
-        if not html:
-            html = root.xpath('//table').extract_first("").strip()
-        # html = self.trans_html(html)     # 正文翻译成中文,删除所有正文样式
-
-        item["contenthtml"] = html
-
-        pbt = ""
-        for ttt in root.xpath('//td/text()').extract():
-            if re.fullmatch('\d{1,2}/\d{1,2}/\d{4}',ttt):
-                pbt = ttt
-        new_pbt = pbt.split('/')
-        publishtime = new_pbt[-1] + "-" +new_pbt[1] + "-" + new_pbt[0]
-        pub_time = handle_publish_time(publishtime)
-
-        item['publishtime'] = pub_time[0]
-        item['l_np_publishtime'] = pub_time[1]
-        item['s_title'] = item['title']
-
-        item['detail'] = cleaner(html)
-
-        item.pop('parse_url',None)
-        item.pop('parser_name',None)
-        item.pop('is_crawl',None)
-        item.pop('failed',None)
-        item.pop('retry', None)
-        item.pop('_id',None)
-
-        item['comeintime'] = int2long(time.time())
-
-        try:
-            self.zt_details.insert_one(item)
-            logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
-        except DuplicateKeyError:
-            logger.info(f"[重复采集]{item['title']}-{item['publishtime']}")
-
-    def fetch_request(self, item):
-
-        response = requests.get(url=item.get("parse_url"), headers=self.headers,
-                               proxies=self.proxy, timeout=(30,60), verify=False)
-        return response
-
-    def deal_request(self, item):
-        response = None
-        retry_times = 0
-        while retry_times < 3:
-            try:
-                response = self.fetch_request(item)
-                res_code = response.status_code
-                if response is not None and res_code == 200:
-                    self.detail_get(response, item=item)
-                    time.sleep(random.random())
-                    return True
-                else:
-                    self.proxy = get_proxy()
-                    retry_times += 1
-                    time.sleep(1)
-            except Exception as e:
-                logger.error(f"{item['href']} 采集异常:{e}")
-                self.proxy = get_proxy()
-                retry_times += 1
-                time.sleep(random.randint(3, 6))
-        logger.warning(f"[采集失败]{item['href']}")
-        return False
-
-    def countSec(self):
-        for count in range(10, 0, -1):
-            print(f'\r{count} 秒 后结束任务', end='')
-            time.sleep(1)
-        print('\r任务结束')
-
-    def de_redis_key(self):
-        self.end_time = 20
-        self.rds.hdel(self.redis_key, self.delete_key)
-        logger.warning("当前数据未采集成功,数据已回填!")
-        self.countSec()
-
-    def start(self, limit=1):
-        logger.debug("********** 详情页采集开始 **********")
-        time.sleep(random.random())
-        count = 0
-        ts = Timer(1790, self.de_redis_key)  # 声明一个定时器,设置多少s后执行
-        ts.start()                          # 启动定时器
-        with self.db_name.find({"parser_name": "xgyyglj_tendernotices", "failed": False, "is_crawl": False}) as data_lsit:
-            for item in data_lsit:
-                # logger.debug(item)
-                time.sleep(self.end_time)
-                if count >= 10:
-                    break
-                unicode_key = md5value(item.get('href') + item.get('title'))
-                if not self.rds.hexists(self.redis_key, unicode_key):  # 除 动态字段 外所有字段去重
-                    self.rds.hset(self.redis_key, unicode_key, '')
-                    self.delete_key = unicode_key
-                    count += 1
-                    update_id = item["_id"]
-                    if self.deal_request(item):
-                        self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
-                    else:
-                        self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True}})
-                        self.rds.hdel(self.redis_key, unicode_key)
-
-        logger.debug("********** 详情页采集结束 **********")
-        ts.cancel()   # 脚本规定时间内正常结束,取消定时器
-
-
-if __name__ == "__main__":
-    Details().start(limit=1)

+ 0 - 150
lzz_theme/xgyyglj/xgyy_spider_list.py

@@ -1,150 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on 2024-01-23
----------
-@summary: 香港医院管理局
----------
-@author: Lzz
-"""
-import sys
-import os
-
-sys.path.append(os.path.dirname(os.getcwd()))
-from collections import namedtuple
-from parsel import Selector
-from utils.tools import *
-from translate.bd_translate import BD_spider
-import warnings
-
-warnings.filterwarnings('ignore')
-
-
-class Spider:
-
-    def __init__(self):
-        self.proxy = get_proxy()
-        self.total = 0
-        self.r = Redis_client()
-        self.table = Mongo_client().py_spider.theme_list
-        self.redis_key = "xgyyglj_tendernotices"
-
-    def get_url(self):
-        headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "Accept-Language": "zh-CN,zh;q=0.9",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Pragma": "no-cache",
-            "Upgrade-Insecure-Requests": "1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-        }
-        url = "https://www.ha.org.hk/visitor/ha_view_content.asp?Parent_ID=2001&Content_ID=257201&Lang=ENG"
-
-        new_url = "https://www.ha.org.hk"
-        for i in range(3):
-            resp = requests.get(url, headers=headers, timeout=30)
-            new_url = "".join(re.findall("window.open\('(.*?)'", resp.text, re.S))
-            if new_url:
-                break
-            time.sleep(6)
-
-        return "https://www.ha.org.hk" + new_url
-
-    def fetch_request(self):
-
-        headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
-            "Accept-Language": "zh-CN,zh;q=0.9",
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Pragma": "no-cache",
-            "Upgrade-Insecure-Requests": "1",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-        }
-        url = self.get_url()
-        retry = 0
-        response = None
-        while retry < 5:
-            try:
-                response = requests.get(url, headers=headers, timeout=30, verify=False)
-                break
-            except Exception as e:
-                logger.error(f"请求异常:{e}")
-                time.sleep(3)
-                retry += 1
-
-        return response
-
-    def pasre(self, response, menu):
-
-        info_list = Selector(response.text).xpath('//div[@class="Section1"]/table[last()]/table/tr')
-        results_list = []
-        for item in info_list[1:]:
-            href = item.xpath('./td[2]/a/@href').extract_first("").strip()
-            title = item.xpath('./td[2]/a/text()').extract_first("").strip()
-            # title = BD_spider().baidu(title)  # 标题翻译成中文
-            if not self.r.hexists(self.redis_key, href):
-                item = {
-                    "site": "香港医院管理局",
-                    "channel": menu.channel,
-                    "spidercode": menu.spidercode,
-                    "area": "香港",
-                    "city": "",
-                    "district": "",
-                    "href": href,
-                    "title": title,
-                    "publishtime": "",
-                    "parse_url": href,
-                    "parser_name": self.redis_key,
-                    "is_mixed": False,
-                    "is_theme": True,
-                    "retry": 0,
-                    "comeintime": int2long(time.time()),
-                    "is_crawl": False,
-                    "failed": False,
-                    "iscompete": True,
-                    "sendflag": "false",
-                    "T": "bidding",
-                    "infoformat": 1,
-                    "type": "",
-                    "publishdept": "",
-                    "_d": "comeintime",
-                    "save": True,
-                }
-
-                self.table.insert_one(item)
-                self.r.hset(self.redis_key, href, '')
-                results_list.append(item)
-
-        logger.info(f' *** 第 1 页采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
-
-        return len(results_list)
-
-    def crawler(self, menu):
-
-        rty = 0
-        while rty < 3:
-            try:
-                response = self.fetch_request()
-                if response:
-                    tt = self.pasre(response, menu)
-                    self.total += tt
-                    logger.info(f"当前共 采集 {self.total} 条")
-                    break
-            except Exception as e:
-                logger.error(f"解析异常:{e}")
-                time.sleep(3)
-                rty += 1
-
-    def start(self, menu):
-        logger.info(" *** 开始采集 ***")
-        self.crawler(menu)
-        logger.info(" *** 采集结束 ***")
-
-
-if __name__ == '__main__':
-    Menu = namedtuple('Menu', ['channel', 'spidercode', 'crawl_page'])
-
-    menus = Menu('Tender Notices', 'a_xgyyglj_tendernotices', 1)
-
-    Spider().start(menus)

+ 1 - 1
lzz_theme/ynszfcgw/det_start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "yncgyx_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 yncgyx_details.py > log/yncgyx_details.out 2>&1 &
+nohup python3 yncgyx_details.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/ynszfcgw/start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "采购意向公开_leve0.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 采购意向公开_leve0.py > log/采购意向公开_leve0.out 2>&1 &
+nohup python3 采购意向公开_leve0.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/ynszfcgw/start1.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "采购意向公开_leve1.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 采购意向公开_leve1.py > log/采购意向公开_leve1.out 2>&1 &
+nohup python3 采购意向公开_leve1.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/ynszfcgw/start2.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "采购意向公开_leve2.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 采购意向公开_leve2.py > log/采购意向公开_leve2.out 2>&1 &
+nohup python3 采购意向公开_leve2.py > /dev/null 2>&1 &

+ 2 - 2
lzz_theme/ynszfcgw/start3.sh

@@ -2,5 +2,5 @@
 
 ps -ef |grep "采购意向公开_leve3.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "采购意向公开_leve.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 采购意向公开_leve3.py > log/采购意向公开_leve3.out 2>&1 &
-nohup python3 采购意向公开_leve.py > log/采购意向公开_leve.out 2>&1 &
+nohup python3 采购意向公开_leve3.py > /dev/null 2>&1 &
+nohup python3 采购意向公开_leve.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/yyc/Yyc_dtcookie.py

@@ -1,6 +1,6 @@
 import random
 import time
-from loguru import logger
+from utils.log import logger
 import requests
 import re
 from hashlib import md5, sha1, sha256

+ 4 - 4
lzz_theme/yyc/start.sh

@@ -4,8 +4,8 @@ ps -ef |grep "Yyc_cgxj.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "Yyc_zbgg.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "yyc_cgxj_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "yyc_zbgg_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 Yyc_cgxj.py > log/Yyc_cgxj.out 2>&1 &
-nohup python3 Yyc_zbgg.py > log/Yyc_zbgg.out 2>&1 &
-nohup python3 yyc_cgxj_details.py > log/yyc_cgxj_details.out 2>&1 &
-nohup python3 yyc_zbgg_details.py > log/yyc_zbgg_details.out 2>&1 &
+nohup python3 Yyc_cgxj.py > /dev/null 2>&1 &
+nohup python3 Yyc_zbgg.py > /dev/null 2>&1 &
+nohup python3 yyc_cgxj_details.py > /dev/null 2>&1 &
+nohup python3 yyc_zbgg_details.py > /dev/null 2>&1 &
 

+ 2 - 2
lzz_theme/yzcbjkjfzyxgs/start.sh

@@ -2,5 +2,5 @@
 
 ps -ef |grep "yzc_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "yzc_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 yzc_list.py > log/yzc_list.out 2>&1 &
-nohup python3 yzc_details.py > log/yzc_details.out 2>&1 &
+nohup python3 yzc_list.py > /dev/null 2>&1 &
+nohup python3 yzc_details.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/yzcbjkjfzyxgs/yzcbjkjfzyxgs_ck.py

@@ -1,7 +1,7 @@
 import requests
 import json
 import time
-from loguru import logger
+from utils.log import logger
 
 
 def create_cookie(proxies=False):

+ 93 - 0
lzz_theme/yzw/login.py

@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-06-10
+---------
+@summary: 云筑网 - 账号登录
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+import requests
+import random
+import json
+import time
+from hashlib import md5
+from utils.log import logger
+from utils.tools import Mongo_client,get_current_date
+
+
+
+def alter(file, old_str, new_str):
+    """
+    替换文件中的字符串
+    :param file:文件名
+    :param old_str:就字符串
+    :param new_str:新字符串
+    :return:
+
+    """
+    file_data = ""
+    with open(file, "r", encoding="utf-8") as f:
+        for index,line in enumerate(f):
+            if old_str in line and index < 50:
+                line = line.replace(old_str, new_str,1)
+            file_data += line
+    with open(file, "w", encoding="utf-8") as f:
+        f.write(file_data)
+
+
+
+def Login(user="runhekeji",password="50lvwx50"):
+    logger.debug(f" >>> 登录账号:{user} ...")
+    time.sleep(5)
+    pwd = md5(password.encode()).hexdigest()
+
+    to_mongo = Mongo_client()
+    account_table = to_mongo.user_login.yzw_account
+    acc = account_table.find_one({"site": "云筑网"})
+    if acc.get('state') == "stop" or acc.get('login_times') > 5:
+        logger.warning('采集上限,停止采集')
+        to_mongo.close()
+        return False
+
+    headers = {
+        "accept": "application/json, text/plain, */*",
+        "accept-language": "zh-CN,zh;q=0.9",
+        "cache-control": "no-cache",
+        "content-type": "application/json",
+        "origin": "https://ucenter.yzw.cn",
+        "pragma": "no-cache",
+        "priority": "u=1, i",
+        "referer": "https://ucenter.yzw.cn/login",
+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+    }
+
+    url = "https://auac-sso.yzw.cn/api/auac/sso/v1/web/login"
+    data = {
+        "appKey": "base_account_center",
+        "loginName": user,
+        "password": pwd,
+        "loginType": 10,
+        "isRememberMe": False,
+        "agreementFlag": 1
+    }
+    data = json.dumps(data, separators=(',', ':'))
+    try:
+        response = requests.post(url, headers=headers, data=data, timeout=30, verify=False)
+        cookies = response.cookies.get_dict()
+        if "账号禁用" in response.text:
+            account_table.update_one({"site" : "云筑网"},{"$set": {"login_times": 10, "update_time": get_current_date()}})
+            logger.warning('账号封禁,停止登录')
+        else:
+            with open(f'./{user}_ck.json', 'w', encoding='utf-8') as fw:
+                fw.write(json.dumps(cookies))
+            logger.debug(f" >>> 账号:{user} 登录完成!")
+    except Exception as e:
+        logger.error(f" >>> 账号:{user} 登录失败!{e}")
+
+    to_mongo.close()
+    time.sleep(random.randint(30, 50))
+    return True

+ 43 - 0
lzz_theme/yzw/reset_count.py

@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-06-10
+---------
+@summary: 重置 账号采集 数据量 及 登录次数 每日凌晨 2点重置 | 前一天数据采集统计
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+from utils.tools import *
+
+
+def date_timestamp(date, time_format="%Y-%m-%d"):
+    timestamp = time.mktime(time.strptime(date, time_format))
+    return int(timestamp)
+
+
+
+def start():
+    to_mongo = Mongo_client()
+    account_table = to_mongo.user_login.yzw_account
+
+    # 重置 账号信息
+    with account_table.find() as cursor:
+        for item in cursor:
+            if item["login_times"] < 5:
+                account_table.update_one({"_id": item["_id"]},
+                                         {"$set": {"total": random.randint(150, 200), "count": 0,
+                                                   "login_times": 0, "state": "running",
+                                                   "update_time": get_current_date()}})
+                print(f" {item['account']} 已更新 < {get_current_date()} > ")
+            else:
+                print(f" {item['account']} 停止采集 < {get_current_date()} > ")
+
+    print('完成重置账号')
+    to_mongo.close()
+
+
+if __name__ == '__main__':
+    start()

Fișier diff suprimat deoarece este prea mare
+ 0 - 0
lzz_theme/yzw/runhekeji_ck.json


+ 11 - 0
lzz_theme/yzw/start.sh

@@ -0,0 +1,11 @@
+#!/bin/bash
+
+ps -ef |grep "yzw_xj_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
+ps -ef |grep "yzw_xj_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
+ps -ef |grep "yzw_zm_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
+ps -ef |grep "yzw_zm_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
+nohup python3 yzw_xj_details.py > /dev/null 2>&1 &
+nohup python3 yzw_xj_list.py > /dev/null 2>&1 &
+nohup python3 yzw_zm_details.py > /dev/null 2>&1 &
+nohup python3 yzw_zm_list.py > /dev/null 2>&1 &
+

+ 362 - 0
lzz_theme/yzw/yzw_xj_details.py

@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-06-13
+---------
+@summary: 云筑网 - 市场询价 -  详情页
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+import json
+from requests_toolbelt import MultipartEncoder
+from requests.exceptions import RequestException
+from utils.attachment import AttachmentDownloader
+from utils.tools import *
+from login import Login
+import warnings
+
+warnings.filterwarnings('ignore')
+
+xmlx_dict = {'120201': '交通运输工程', '12020101': '交通运输工程/公路', '12020102': '交通运输工程/市政道路',
+             '12020103': '交通运输工程/铁路',
+             '12020104': '交通运输工程/城市轨道交通', '12020105': '交通运输工程/机场',
+             '12020107': '交通运输工程/停车场', '12020108': '交通运输工程/隧道',
+             '12020109': '交通运输工程/桥梁', '12020199': '交通运输工程/其他交通运输工程', '120202': '能源工程',
+             '12020201': '能源工程/变电站及输电线路工程',
+             '12020202': '能源工程/核电工程', '12020203': '能源工程/水电工程', '12020204': '能源工程/火电工程',
+             '12020205': '能源工程/风电工程',
+             '12020206': '能源工程/煤炭化工工程', '12020207': '能源工程/热力工程', '12020208': '能源工程/燃气供应工程',
+             '12020209': '能源工程/太阳能工程',
+             '12020210': '能源工程/矿山工程', '12020299': '能源工程/其他能源工程', '120203': '石油化工工程',
+             '12020301': '石油化工工程/炼油及石油化工工程',
+             '12020302': '石油化工工程/输油管线及泵房', '12020303': '石油化工工程/长输燃气管道',
+             '12020399': '石油化工工程/其他石油化工工程',
+             '120204': '供水及处理工程', '12020401': '供水及处理工程/输、供水工程(含管道及附属设备)',
+             '12020402': '供水及处理工程/水处理工程(含管道及附属设备)',
+             '12020403': '供水及处理工程/排污、排洪管道工程', '12020499': '供水及处理工程/其他供水及处理工程',
+             '1201': '房屋建设项目',
+             '120101': '房屋建设项目/住宅(含别墅、公寓)', '120102': '房屋建设项目/保障性住房',
+             '120103': '房屋建设项目/商用写字楼', '120104': '房屋建设项目/商厦',
+             '120105': '房屋建设项目/政府办公楼', '120106': '房屋建设项目/文化设施', '120107': '房屋建设项目/教育设施',
+             '120108': '房屋建设项目/体育设施',
+             '120109': '房屋建设项目/娱乐设施', '120110': '房屋建设项目/福利设施', '120111': '房屋建设项目/医疗建筑',
+             '120112': '房屋建设项目/酒店度假建筑',
+             '120113': '房屋建设项目/城市综合体', '120114': '房屋建设项目/工业加工(制造)厂房',
+             '120116': '房屋建设项目/会议会展中心',
+             '120117': '房屋建设项目/仓储物流', '120118': '房屋建设项目/宗教建筑',
+             '120119': '房屋建设项目/市政配套建筑', '120199': '房屋建设项目/其他',
+             '120299': '其他工程', '12029904': '其他工程/地下综合管廊、管网', '12029905': '其他工程/海绵城市',
+             '12029999': '其他工程/其他工程',
+             '120205': '环保工程', '12020501': '环保工程/民用垃圾处理', '12020502': '环保工程/工业废物处理',
+             '12020503': '环保工程/建筑业垃圾处理',
+             '12020505': '环保工程/景观、绿地与环境再造', '12020506': '环保工程/防磁、防光、防辐射、防噪音',
+             '12020599': '环保工程/其他环保工程',
+             '120206': '邮电通讯工程', '12020601': '邮电通讯工程/基站', '12020602': '邮电通讯工程/发射塔',
+             '12020603': '邮电通讯工程/通信线路',
+             '12020699': '邮电通讯工程/其他邮电通讯工程', '120207': '防卫防灾工程', '12020701': '防卫防灾工程/堤坝工程',
+             '12020702': '防卫防灾工程/山洪防御工程',
+             '12020703': '防卫防灾工程/防空设施', '12020704': '防卫防灾工程/消防设施',
+             '12020705': '防卫防灾工程/排雨工程',
+             '12020799': '防卫防灾工程/其他防卫防灾工程', '120208': '水利、水运工程',
+             '12020801': '水利、水运工程/引水工程', '12020802': '水利、水运工程/水库',
+             '12020803': '水利、水运工程/水利枢纽', '12020804': '水利、水运工程/灌溉排水',
+             '12020805': '水利、水运工程/船闸工程',
+             '12020806': '水利、水运工程/码头与岸壁工程', '12020807': '水利、水运工程/防波堤与护岸工程',
+             '12020808': '水利、水运工程/疏浚与吹填工程',
+             '12020809': '水利、水运工程/船坞与船台滑道工程', '12020810': '水利、水运工程/航道与航标工程',
+             '12020811': '水利、水运工程/道路与堆场工程',
+             '12020812': '水利、水运工程/填海造地、人工岛', '12020899': '水利、水运工程/其他水利、水运工程'}
+pay_type = {"NOW_PAY": "现款现结", "PRE_PAY": "预付款", "MONTH_PAY": "月付", "QUARTER_PAY": "季度付款","PROJECT_PAY": "根据项目进度结算", "OTHER": "其他"}
+zb_time = {"WITHIN_ONE_MONTH": "一个月内", "WITHIN_THREE_MONTH": "三个月内", "NOT_FOR_NOW": "暂不招标"}
+level = {"L0": "股份联采", "L1": "局级集采", "L2": "局二级集采", "L3": "局三级集采"}
+
+
+def get_file_url(cookies, org_url, proxies=False):
+    session = requests.Session()
+    session.proxies = proxies
+
+    headers = {
+        "accept": "application/json, text/plain, */*",
+        "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
+        "content-type": "application/json",
+        "origin": "https://xy.yzw.cn",
+        "priority": "u=1, i",
+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36"
+    }
+
+    surl = "https://fss-css.yzw.cn/v1/fts/signHandler.fts"
+    sdata = {
+        "appCode": "bXRn",
+        "operate": "share",
+        "params": {
+            "filePath": org_url,
+            "expireInSecs": 3600
+        }
+    }
+    sdata = json.dumps(sdata, separators=(',', ':'))
+    res = session.post(surl, headers=headers, cookies=cookies, data=sdata, timeout=30)
+    dt = res.json().get('data')
+    access = dt.get('access')
+    expires = dt.get('expires')
+    sign = dt.get('sign')
+    token = dt.get('token')
+
+    headers["access"] = access
+    headers["expires"] = expires
+    headers["sign"] = sign
+    headers["token"] = token
+
+    url = "https://fss-css.yzw.cn/v1/fss/previewHandler.fss"
+    data = {
+        "filePath": org_url,
+        "expireInSecs": "3600",
+    }
+    mm = MultipartEncoder(data)
+    headers["Content-Type"] = mm.content_type
+    response = session.post(url, headers=headers, cookies=cookies, data=mm)
+
+    return response.json().get('data').get('url')
+
+
+class Details:
+
+    def __init__(self):
+        self.db_table = Mongo_client()
+        self.yzw_acc = self.db_table.user_login.yzw_account
+        self.db_name = self.db_table.py_spider.theme_list
+        self.zt_details = self.db_table.py_spider.data_bak
+        self.rds = Redis_client()
+        self.cookies = None
+        self.username = "runhekeji"
+        self.count = 0
+        self.login_times = 0
+
+    def get_cookies(self):
+        if not os.path.isfile(f'./{self.username}_ck.json'):
+            Login()
+            self.login_times += 1
+
+        with open(f'./{self.username}_ck.json', 'r', encoding='utf-8') as fr:
+            cks = fr.read()
+        ck = json.loads(cks.replace("'", '"'))
+        return ck
+
+    def detail_get(self, response, item):
+        if '请先完成登录,再继续操作' in response.text or response.status_code == 401:
+            try:
+                os.remove(f'./{self.username}_ck.json')
+            except:
+                pass
+            raise RequestException('登录失效,重新登录')
+
+        dt = response.json().get('data')
+
+        xmlx = xmlx_dict.get(dt.get('project').get('typeCode'))
+
+        xjnr = dt.get('commodityList') or []
+        xjnr_html = ""
+        for foo in xjnr:
+            temp = f'''
+            <tr>
+                <td>{foo.get('commodityName')}</td>
+                <td>{foo.get('standards')}</td>
+                <td>{foo.get('num')}</td>
+                <td>{foo.get('unitName')}</td>
+                <td></td>
+            </tr>
+            '''
+            xjnr_html += temp
+
+        attachment_html = ""
+        attachmentList = dt.get('attachmentList') or []
+        attachments = {}
+        for att in attachmentList:
+            file_url = get_file_url(self.cookies, att.get('ossKey'))
+            file_name = att.get('fileName')
+            ff = f'''
+            <span><a href="{file_url}">{file_name}</a></span>
+            '''
+            attachment_html += ff
+            file_type = extract_file_type(file_name)
+            if file_type:
+                attachment = AttachmentDownloader().fetch_attachment(
+                    file_name=file_name, file_type=file_type, download_url=file_url)
+                attachments[str(len(attachments) + 1)] = attachment
+
+        if attachments:
+            item['projectinfo'] = {"attachments": attachments}
+
+        purchaseOrgLevel = dt.get('purchaseOrgLevel')
+        if purchaseOrgLevel:
+            purchaseOrgLevel = level.get(purchaseOrgLevel)
+            xmxx = f'''
+            <div>集采信息</div>
+            <table style="table-layout: auto;">
+                <colgroup>
+                    <col style="width: 100px;">
+                </colgroup>
+                <tbody>
+                <tr data-row-key="集采层级">
+                    <td>集采层级</td>
+                    <td>{purchaseOrgLevel}</td>
+                </tr>
+                <tr data-row-key="集采区域">
+                    <td>集采区域</td>
+                    <td>{dt.get('areaName')}</td>
+                </tr>
+                </tbody>
+            </table>
+            '''
+        else:
+            xmxx = f'''
+            <div>项目信息</div>
+            <table style="table-layout: auto;">
+                <colgroup>
+                    <col style="width: 100px;">
+                </colgroup>
+                <tbody>
+                <tr data-row-key="项目名称">
+                    <td>项目名称</td>
+                    <td>{dt.get('project').get('name')}</td>
+                </tr>
+                <tr data-row-key="项目类型">
+                    <td>项目类型</td>
+                    <td>{xmlx}</td>
+                </tr>
+                <tr data-row-key="项目区域">
+                    <td>项目区域</td>
+                    <td>{dt.get('areaName')}</td>
+                </tr>
+                </tbody>
+            </table>
+            '''
+
+        html = f'''
+        <div>
+            <div>所属单位</div>
+            {dt.get('orgLevel2Name')}
+        </div>
+        {xmxx}
+        <div>
+            <div>预计招标时间</div>
+            {zb_time.get(dt.get('tenderPlanTime'))}
+        </div>
+        <div>
+            <div>付款条件</div>
+            {pay_type.get(dt.get('project').get('paymentTerms'))}
+        </div>
+        <div>询价内容</div>
+        <table style="table-layout: fixed;">
+            <colgroup>
+                <col style="width: 140px;">
+                <col style="width: 250px;">
+                <col style="width: 100px;">
+                <col style="width: 100px;">
+                <col style="width: 100px;">
+                <col style="width: 120px;">
+                <col style="width: 8px;">
+            </colgroup>
+            <thead>
+            <tr>
+                <th scope="col">产品名称</th>
+                <th scope="col">规格型号</th>
+                <th scope="col">数量</th>
+                <th scope="col">单位</th>
+                <td></td>
+            </tr>
+            </thead>
+            <tbody>
+            {xjnr_html}
+            </tbody>
+        </table>
+        <div>
+            <div>需求附件</div>
+            {attachment_html}
+        </div>
+        <div>报价要求:{dt.get('quoteDemand')}</div>
+        '''
+
+        item["contenthtml"] = html.replace('None', '')
+
+        item = format_fileds(item)
+
+        self.zt_details.insert_one(item)
+        logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
+
+    def fetch_request(self, item):
+        self.cookies = self.get_cookies()
+        headers = {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "origin": "https://xy.yzw.cn",
+            "pragma": "no-cache",
+            "priority": "u=1, i",
+            "referer": item['href'],
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+        }
+        response = requests.get(url=item.get("parse_url"), headers=headers,
+                                cookies=self.cookies, timeout=30, verify=False)
+        time.sleep(3)
+        return response
+
+    def deal_request(self, item):
+        retry_times = 0
+        org_item = item.copy()
+        while retry_times < 3:
+            try:
+                response = self.fetch_request(item)
+                if response is not None:
+                    self.detail_get(response, item=item)
+                    return True
+                else:
+                    retry_times += 1
+                    time.sleep(random.randint(6, 10))
+            except Exception as e:
+                item = org_item
+                logger.error(f"{item['href']} 采集异常:{e}")
+                retry_times += 1
+                time.sleep(random.randint(3, 6))
+        logger.warning(f"[采集失败]{item['href']}")
+        return False
+
+    def start(self, limit=1):
+        logger.debug("********** 详情页采集开始 **********")
+        time.sleep(random.random())
+
+        with self.db_name.find({"parser_name": "ztpc_yzw_xj", "failed": False, "is_crawl": False},
+                               sort=[('publishtime', -1)]).limit(limit) as data_lsit:
+            tasks = [dd for dd in data_lsit]
+
+        for item in tasks:
+            # logger.debug(item)
+
+            account = self.yzw_acc.find_one({"site": "云筑网"})
+
+            self.count = account['count']
+            self.login_times = account['login_times']
+
+            if self.count >= account['total'] or account['state'] == "stop" or self.login_times > 5:
+                logger.warning("账号 采集数量 限制")
+                return True
+
+            update_id = item["_id"]
+            if self.deal_request(item):
+                self.count += 1
+                self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
+            else:
+                self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True}})
+
+            self.yzw_acc.update_one({"site": "云筑网"},
+                                    {"$set": {"count": self.count, "update_time": get_current_date(),
+                                              "login_times": self.login_times}})
+            time.sleep(random.randint(60, 100))
+
+        logger.debug("********** 详情页采集结束 **********")
+
+
+if __name__ == "__main__":
+    Details().start(limit=random.randint(15,30))

+ 164 - 0
lzz_theme/yzw/yzw_xj_list.py

@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-06-13
+---------
+@summary: 云筑网 - 市场询价 - 列表页
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+from utils.RedisDB import RedisFilter
+from requests.exceptions import RequestException
+from login import Login
+from utils.tools import *
+from collections import namedtuple
+import json
+import warnings
+warnings.filterwarnings('ignore')
+
+
+
+class Crawl_Yzw:
+
+    def __init__(self):
+        self.db_table = Mongo_client()
+        self.yzw_acc = self.db_table.user_login.yzw_account
+        self.zb_list = self.db_table.py_spider.theme_list
+        self.RDS = RedisFilter()
+
+        self.real_cont = 0
+        self.cookies = None
+        self.username = "runhekeji"
+
+    def get_cookies(self):
+        if not os.path.isfile(f'./{self.username}_ck.json'):
+            Login()
+
+        with open(f'./{self.username}_ck.json', 'r', encoding='utf-8') as fr:
+            cks = fr.read()
+        ck = json.loads(cks.replace("'", '"'))
+        return ck
+
+    def fetch_list_page(self, page, menu):
+        logger.debug(f' *** 开始采集第{page}页 ***')
+        self.cookies = self.get_cookies()
+
+        headers = {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "content-type": "application/json",
+            "origin": "https://xy.yzw.cn",
+            "priority": "u=1, i",
+            "referer": "https://xy.yzw.cn/search/sj/inquiry",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+        }
+
+        url = "https://yzmtg.yzw.cn/portal/inquiry/search"
+        data = {
+            "pageNum": page,
+            "pageSize": 100,
+            "param": {}
+        }
+        data = json.dumps(data, separators=(',', ':'))
+
+        request_params = {
+            "headers": headers,
+            "data": data,
+            "cookies": self.cookies,
+            "timeout": 60,
+            "verify": False,
+        }
+        resp = requests.post(url, **request_params)
+        time.sleep(3)
+        return resp
+
+    def parser_list_page(self, response, page, menu):
+        if '请先完成登录,再继续操作' in response.text or response.status_code == 401:
+            try:
+                os.remove(f'./{self.username}_ck.json')
+            except:
+                pass
+            raise RequestException('登录失效,重新登录')
+        results_list = []
+        info_list = response.json().get('data').get('records')
+        for info in info_list:
+            hid = info.get('code')
+            href = f"https://xy.yzw.cn/sj/inquiry-detail?inquiryCode={hid}&from=%E8%AF%A2%E4%BB%B7%E5%88%97%E8%A1%A8&searchId="
+            title = info.get('name').strip()
+            create_time = info.get('auditTime')
+
+            dedup = [href,title,create_time]
+            if not self.RDS.data_filter(dedup):
+                item = {
+                    "site": "云筑网",
+                    "channel": menu.channel,
+                    "spidercode": menu.spidercode,
+                    "area": "全国",
+                    "city": "",
+                    "district": "",
+                    "href": href,
+                    "title": title,
+                    "publishtime": create_time,
+                    "parse_url": f"https://yzmtg.yzw.cn/sup/inquiry/quote/getInquiry/{hid}",
+                    "parser_name": "ztpc_yzw_xj",
+                    "is_mixed": False,
+                    "is_theme": True,
+                    "retry": 0,
+                    "comeintime": int2long(int(time.time())),
+                    "is_crawl": False,
+                    "failed": False,
+                }
+
+                self.zb_list.insert_one(item)
+                self.RDS.data_save_redis(dedup)
+                results_list.append(item)
+
+        logger.info(f' *** 第{page}页采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
+        self.real_cont += len(results_list)
+        return results_list
+
+
+    def crawl_list_spider(self, page, menu):
+        retry_times = 0
+        while retry_times < 3:
+            try:
+                response = self.fetch_list_page(page=page,menu=menu)
+                logger.debug(f"第{page}页 状态码:{response.status_code}")
+                if response is not None:
+                    self.parser_list_page(response=response, page=page, menu=menu)
+                    logger.info(f"当前已采集 {self.real_cont} 条数据")
+                    # time.sleep(random.randint(60, 100))
+                break
+            except Exception as e:
+                logger.error(f"第{page}页 采集异常:{e}")
+                retry_times += 1
+                time.sleep(10)
+
+
+    def start_list(self, menus):
+        logger.debug("********** 列表页开始 **********")
+        for menu in menus:
+            logger.debug(f"++++++ {menu.channel}开始采集 ++++++")
+            page = menu.crawl_page
+            for page in range(1, page + 1):
+                account = self.yzw_acc.find_one({"site": "云筑网"}).get('state')
+                if account == "stop":
+                    logger.warning("账号限制!")
+                    return
+                self.crawl_list_spider(page=page,menu=menu)
+            logger.debug(f"------ {menu.channel}采集结束 ------")
+
+        logger.debug("********** 列表页结束 **********")
+
+
+if __name__ == '__main__':
+    Menu = namedtuple('Menu', ['channel', 'spidercode', 'crawl_page'])
+
+    menus = [
+        Menu('找商机-市场询价', 'a_yzw_zsj_scxj', 1),
+    ]
+
+    Crawl_Yzw().start_list(menus)

+ 437 - 0
lzz_theme/yzw/yzw_zm_details.py

@@ -0,0 +1,437 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-06-13
+---------
+@summary: 云筑网 -  招募供应商 - 详情页
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+import json
+from requests.exceptions import RequestException
+from utils.tools import *
+from utils.attachment import AttachmentDownloader
+from login import Login
+import warnings
+
+
+warnings.filterwarnings('ignore')
+
+
+zgyq = {"CSRY": "需求区域内有固定办公场所、仓储及管理人员;", "GCYJ": "近三年有经典案例工程/业绩,且内容包含当前响应需求的品类;",
+        "ZJGYS": "", "JJGYS": "与中建有过合作,且在中建单位无不良合作记录;", "NSRLX": "具备一般纳税人资格,可开具增值税专用发票;",
+        "QYXY": "具有良好的商业信誉和健全的财务会计制度;", "DZNL": "有一定的垫资能力;",
+        "FLZT": "具备法律主体资格,具有独立制定和履行合同的能力;", "QYXZ":""}
+level = {"L1": "股份联采", "L2": "局级集采", "L3": "局二级集采", "L4": "局三级集采"}
+xqlb = {0: "物资采购", 1: "专业分包", 2: "劳务分包", 3: "设备采购", 4: "租赁服务", 5: "专业服务"}
+qylx = {1: "生产商", 2: "经销商", 3: "代理商", 4: "其他"}
+zzzs = {"QYZS": "资质证书", "AQXK": "安全许可证", "JKRZ": "环境、质量、职业健康体系认证", "RYZS": "荣誉证书", "JCBG": "产品检测报告"}
+zb_time = {"WITHIN_ONE_MONTH": "一个月内", "WITHIN_THREE_MONTH": "三个月内", "NOT_FOR_NOW": "暂不招标"}
+
+
+
+class Details:
+
+    def __init__(self):
+        self.db_table = Mongo_client()
+        self.yzw_acc = self.db_table.user_login.yzw_account
+        self.db_name = self.db_table.py_spider.theme_list
+        self.zt_details = self.db_table.py_spider.data_bak
+
+        self.rds = Redis_client()
+        self.cookies = None
+        self.username = "runhekeji"
+        self.count = 0
+        self.login_times = 0
+
+    def get_cookies(self):
+        if not os.path.isfile(f'./{self.username}_ck.json'):
+            Login()
+            self.login_times += 1
+
+        with open(f'./{self.username}_ck.json', 'r', encoding='utf-8') as fr:
+            cks = fr.read()
+        ck = json.loads(cks.replace("'", '"'))
+        return ck
+
+    def detail_get(self, response, item):
+        if '请先完成登录,再继续操作' in response.text or response.status_code == 401:
+            try:
+                os.remove(f'./{self.username}_ck.json')
+            except:
+                pass
+            raise RequestException('登录失效,重新登录')
+        dt = response.json().get('data')
+
+        purchaseOrgLevel = dt.get('recruitVO').get('purchaseOrgLevel')
+        if purchaseOrgLevel:
+            purchaseOrgLevel = level.get(purchaseOrgLevel)
+            xmxx = f'''
+                <div>集采信息</div>
+                <table style="table-layout: auto;">
+                    <colgroup>
+                        <col style="width: 100px;">
+                    </colgroup>
+                    <tbody>
+                    <tr data-row-key="集采层级">
+                        <td>集采层级</td>
+                        <td>{purchaseOrgLevel}</td>
+                    </tr>
+                    </tbody>
+                </table>
+                '''
+        else:
+            projectList = dt.get('projectList')
+            if projectList:
+                xmxx = f'''
+                    <div>
+                        <div>项目信息<span></span></div>
+                        <div style="margin-bottom: 16px;">
+                            <div>
+                                <div>
+                                    <div>
+                                        <div>
+                                            <div>
+                                                <table style="table-layout: auto;">
+                                                    <colgroup>
+                                                        <col style="width: 100px;">
+                                                    </colgroup>
+                                                    <tbody class="hammer-table-tbody">
+                                                    <tr data-row-key="项目信息">
+                                                        <td>项目信息</td>
+                                                        <td>{projectList[0].get('name')}</td>
+                                                    </tr>
+                                                    <tr data-row-key="项目所在地点">
+                                                        <td>项目所在地点</td>
+                                                        <td>{projectList[0].get('address')}</td>
+                                                    </tr>
+                                                    <tr data-row-key="工期起止">
+                                                        <td>工期起止</td>
+                                                        <td>{projectList[0].get('duration')}</td>
+                                                    </tr>
+                                                    <tr data-row-key="建设规模">
+                                                        <td>建设规模</td>
+                                                        <td>{projectList[0].get('contractScale')}</td>
+                                                    </tr>
+                                                    <tr data-row-key="合同造价">
+                                                        <td>合同造价</td>
+                                                        <td>{projectList[0].get('contractCost')}</td>
+                                                    </tr>
+                                                    </tbody>
+                                                </table>
+                                            </div>
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                    '''
+            else:
+                xmxx = ""
+
+        cgxq = dt.get('recruitCategoryVOList')
+        cgxq_html = ""
+        if cgxq:
+            base_html = ""
+            if item['categoryType'] in [1,5]:
+                for cg in cgxq:
+                    temp = f'''
+                        <tr>
+                            <td>{cg.get('materialName','')}</td>
+                            <td>{cg.get('workContent','')}</td>
+                            <td>{cg.get('unitName','')}</td>
+                            <td>{cg.get('tentativeWorkload','')}</td>
+                            <td>{cg.get('budget','按需')}</td>
+                            <td>{cg.get('demandSupplierNum','按需')}</td>
+                        </tr>
+                        '''
+                    base_html += temp
+                cgxq_html = f'''
+                    <div>
+                        <div>采购需求<span>-{xqlb.get(item['categoryType'])}</span></div>
+                        <div>
+                            <div>
+                                <div>
+                                    <div>
+                                        <div>
+                                            <div>
+                                                <table style="table-layout: auto;">
+                                                <colgroup></colgroup>
+                                                <thead>
+                                                <tr>
+                                                    <th scope="col">分部分项</th>
+                                                    <th scope="col">工作内容</th>
+                                                    <th scope="col">计量单位</th>
+                                                    <th scope="col">暂定工程量</th>
+                                                    <th scope="col">概算金额(万元)</th>
+                                                    <th scope="col">需求供应商数(个)</th>
+                                                </tr>
+                                                </thead>
+                                                <tbody>
+                                                {base_html}
+                                                </tbody>
+                                            </table>
+                                            </div>
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                    '''
+            elif item['categoryType'] == 2:
+                for cg in cgxq:
+                    temp = f'''
+                        <tr>
+                            <td>{cg.get('materialName','')}</td>
+                            <td>{cg.get('labour','按需')}</td>
+                            <td>{cg.get('teamNum','按需')}</td>
+                            <td>{cg.get('budget','按需')}</td>
+                            <td>{cg.get('demandSupplierNum','')}</td>
+                        </tr>
+                        '''
+                    base_html += temp
+                cgxq_html = f'''
+                    <div>
+                        <div>采购需求<span>-{xqlb.get(item['categoryType'])}</span></div>
+                        <div>
+                            <div>
+                                <div>
+                                    <div>
+                                        <div>
+                                            <div>
+                                                <table style="table-layout: auto;">
+                                                    <colgroup></colgroup>
+                                                    <thead>
+                                                    <tr>
+                                                        <th scope="col">分包类别</th>
+                                                        <th scope="col">劳动力需求</th>
+                                                        <th scope="col">班组数量</th>
+                                                        <th scope="col">概算金额(万元)</th>
+                                                        <th scope="col">需求供应商数(个)</th>
+                                                    </tr>
+                                                    </thead>
+                                                    <tbody>
+                                                    {base_html}
+                                                    </tbody>
+                                                </table>
+                                            </div>
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                    '''
+            else:
+                for cg in cgxq:
+                    qy_org = cg.get('supplierTypeList', [])
+                    qy = "、".join([qylx.get(i) for i in qy_org])
+                    temp = f'''
+                    <tr>
+                        <td>{cg.get('materialName', '')}</td>
+                        <td>{cg.get('standards', '')}</td>
+                        <td>{cg.get('brand', '')}</td>
+                        <td>{cg.get('unitName', '')}</td>
+                        <td>{cg.get('num', '')}</td>
+                        <td>{cg.get('budget', '按需')}</td>
+                        <td>{cg.get('demandSupplierNum', '')}</td>
+                        <td>{qy}</td>
+                    </tr>
+                    '''
+                    base_html += temp
+                cgxq_html = f'''
+                <div>
+                    <div>采购需求<span>-{xqlb.get(item['categoryType'])}</span></div>
+                    <div>
+                        <div>
+                            <div>
+                                <div>
+                                    <div>
+                                        <div>
+                                            <table style="table-layout: auto;">
+                                                <colgroup>
+                                                    <col>
+                                                    <col>
+                                                    <col>
+                                                    <col>
+                                                    <col>
+                                                    <col>
+                                                    <col>
+                                                    <col style="width: 160px;">
+                                                </colgroup>
+                                                <thead class="hammer-table-thead">
+                                                <tr>
+                                                    <th scope="col">材料/设备名称</th>
+                                                    <th scope="col">规格</th>
+                                                    <th scope="col">品牌</th>
+                                                    <th scope="col">计量单位</th>
+                                                    <th scope="col">数量</th>
+                                                    <th scope="col">概算金额(万元)</th>
+                                                    <th scope="col">需求供应商数(个)</th>
+                                                    <th scope="col">企业类型</th>
+                                                </tr>
+                                                </thead>
+                                                <tbody class="hammer-table-tbody">
+                                                {base_html}
+                                                </tbody>
+                                            </table>
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+                '''
+
+        zgyq_list = dt.get('recruitConditionDetailVOList')
+        zgyq_qt = dt.get('recruitCustomizeConditions')
+        zgyq_html = ""
+        index = 1
+        if zgyq_list:
+            for zz in zgyq_list:
+                if zz['conditionCode'] == "ZCZJ":
+                    it = f"<div>{index}.企业注册资金大于{zz.get('conditionValue',0)}万元以上;</div>"
+                elif zz['conditionCode'] == "QYZZ":
+                    z = ",".join([zzzs.get(q,'') for q in eval(zz.get('conditionValue'))])
+                    it = f"<div>{index}.满足所需品类:{z};</div>"
+                else:
+                    it = f"<div>{index}.{zgyq.get(zz['conditionCode'],'')}</div>"
+                zgyq_html += it
+                index += 1
+
+        if zgyq_qt:
+            zgyq_html += "\n".join([f"<div>{index}.{t['conditionValue']}</div>" for t in zgyq_qt])
+
+        file_list = dt.get('attachments')
+        file_html = "无"
+        attachments = {}
+        if file_list:
+            file_html = file_html.replace("无","")
+            for info in file_list:
+                file_name = info.get('fileName')
+                file_url = info.get('fileUrl')
+                file_type = extract_file_type(file_name)
+                if file_type:
+                    file_html += f'<div><a href="{file_url}">{file_name}</a></div>'
+                    attachment = AttachmentDownloader().fetch_attachment(
+                        file_name=file_name, file_type=file_type, download_url=file_url)
+                    attachments[str(len(attachments) + 1)] = attachment
+        if attachments:
+            item['projectinfo'] = {"attachments": attachments}
+
+        html = f'''
+        <div>
+            <div>所属单位</div>
+            {dt.get('orgLevel2Name')}
+        </div>
+        <div>
+            <div>需求区域</div>
+            <div><span style="margin-right: 12px;">{" ".join(dt.get('areaNames', []))}</span></div>
+        </div>
+        <div>
+            <div>预计招标时间</div>
+            {zb_time.get(dt.get('tenderPlanTime'))}
+        </div>
+        {xmxx}
+        <div>
+            <div>付款条件</div>
+            {dt.get('recruitVO').get('paymentConditionText')}
+        </div>
+        {cgxq_html}
+        <div>
+            <div>需求附件<span></span></div>
+            <div> {file_html}</div>
+        </div>
+        <div>
+            <div>资格要求<span></span></div>
+            <div style="line-height: 1.8;">
+                {zgyq_html}
+            </div>
+        </div>
+        '''
+        item["contenthtml"] = html.replace('None', '')
+
+        item = format_fileds(item)
+
+        self.zt_details.insert_one(item)
+        logger.info(f"[采集成功]{item['title']}-{item['publishtime']}")
+
+    def fetch_request(self, item):
+        self.cookies = self.get_cookies()
+        headers = {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "origin": "https://xy.yzw.cn",
+            "pragma": "no-cache",
+            "priority": "u=1, i",
+            "referer": item['href'],
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+        }
+        response = requests.get(url=item.get("parse_url"), headers=headers,
+                                 cookies=self.cookies, timeout=30, verify=False)
+        time.sleep(3)
+        return response
+
+    def deal_request(self, item):
+        retry_times = 0
+        org_item = item.copy()
+        while retry_times < 3:
+            try:
+                response = self.fetch_request(item)
+                if response is not None:
+                    self.detail_get(response, item=item)
+                    return True
+                else:
+                    retry_times += 1
+                    time.sleep(random.randint(6, 10))
+            except Exception as e:
+                item = org_item
+                logger.error(f"{item['href']} 采集异常:{e}")
+                retry_times += 1
+                time.sleep(random.randint(3, 6))
+        logger.warning(f"[采集失败]{item['href']}")
+        return False
+
+    def start(self, limit=1):
+        logger.debug("********** 详情页采集开始 **********")
+        time.sleep(random.random())
+
+        with self.db_name.find({"parser_name": "ztpc_yzw_zm", "failed": False, "is_crawl": False},
+                               sort=[('publishtime', -1)]).limit(limit) as data_lsit:
+            tasks = [dd for dd in data_lsit]
+
+        for item in tasks:
+            # logger.debug(item)
+            account = self.yzw_acc.find_one({"site": "云筑网"})
+
+            self.count = account['count']
+            self.login_times = account['login_times']
+
+            if self.count >= account['total'] or account['state'] == "stop" or self.login_times > 5:
+                logger.warning("账号 采集数量 限制")
+                return True
+
+            update_id = item["_id"]
+            if self.deal_request(item):
+                self.count += 1
+                self.db_name.update_one({"_id": update_id}, {"$set": {"is_crawl": True}})
+            else:
+                self.db_name.update_one({"_id": update_id}, {"$set": {"failed": True}})
+
+            self.yzw_acc.update_one({"site": "云筑网"},
+                                    {"$set": {"count": self.count, "update_time": get_current_date(),
+                                              "login_times": self.login_times}})
+            time.sleep(random.randint(60, 100))
+
+        logger.debug("********** 详情页采集结束 **********")
+
+
+if __name__ == "__main__":
+    Details().start(limit=random.randint(15,30))

+ 168 - 0
lzz_theme/yzw/yzw_zm_list.py

@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-06-13
+---------
+@summary: 云筑网 - 招募供应商 - 列表页
+---------
+@author: Lzz
+"""
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.getcwd()))
+from utils.RedisDB import RedisFilter
+from requests.exceptions import RequestException
+from login import Login
+from utils.tools import *
+from collections import namedtuple
+import json
+import warnings
+warnings.filterwarnings('ignore')
+
+
+
+class Crawl_Yzw:
+
+    def __init__(self):
+        self.db_table = Mongo_client()
+        self.yzw_acc = self.db_table.user_login.yzw_account
+        self.zb_list = self.db_table.py_spider.theme_list
+        self.RDS = RedisFilter()
+
+        self.real_cont = 0
+        self.cookies = None
+        self.username = "runhekeji"
+
+    def get_cookies(self):
+        if not os.path.isfile(f'./{self.username}_ck.json'):
+            Login()
+
+        with open(f'./{self.username}_ck.json', 'r', encoding='utf-8') as fr:
+            cks = fr.read()
+        ck = json.loads(cks.replace("'", '"'))
+        return ck
+
+    def fetch_list_page(self, page, menu):
+        logger.debug(f' *** 开始采集第{page}页 ***')
+        self.cookies = self.get_cookies()
+
+        headers = {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "cache-control": "no-cache",
+            "content-type": "application/json",
+            "origin": "https://xy.yzw.cn",
+            "pragma": "no-cache",
+            "priority": "u=1, i",
+            "referer": "https://xy.yzw.cn/search/sj/recruit",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
+        }
+
+        url = "https://yzmtg.yzw.cn/mer/portal/recruit/search"
+        data = {
+            "pageNum": page,
+            "pageSize": 100,
+            "param": {}
+        }
+        data = json.dumps(data, separators=(',', ':'))
+
+        request_params = {
+            "headers": headers,
+            "data": data,
+            "cookies": self.cookies,
+            "timeout": 60,
+            "verify": False,
+        }
+        resp = requests.post(url, **request_params)
+        time.sleep(3)
+        return resp
+
+    def parser_list_page(self, response, page, menu):
+        if '请先完成登录,再继续操作' in response.text or response.status_code == 401:
+            try:
+                os.remove(f'./{self.username}_ck.json')
+            except:
+                pass
+            raise RequestException('登录失效,重新登录')
+        results_list = []
+        info_list = response.json().get('data').get('records')
+        for info in info_list:
+            create_time = info.get('publishTime')
+            hid = info.get('recruitCode')
+            categoryType = info.get('categoryType')
+            href = f"https://xy.yzw.cn/sj/recruit-detail?recruitCode={hid}&searchID=&isTop="
+            title = info.get('name').strip()
+
+            dedup = [href,title,create_time]
+            if not self.RDS.data_filter(dedup):
+                item = {
+                    "site": "云筑网",
+                    "channel": menu.channel,
+                    "spidercode": menu.spidercode,
+                    "area": "全国",
+                    "city": "",
+                    "district": "",
+                    "href": href,
+                    "title": title,
+                    "publishtime": create_time,
+                    "parse_url": f"https://yzmtg.yzw.cn/mer/portal/recruit/getDetailByCode/{hid}",
+                    "parser_name": "ztpc_yzw_zm",
+                    "categoryType": categoryType,
+                    "is_mixed": False,
+                    "is_theme": True,
+                    "retry": 0,
+                    "comeintime": int2long(int(time.time())),
+                    "is_crawl": False,
+                    "failed": False,
+                }
+
+                self.zb_list.insert_one(item)
+                self.RDS.data_save_redis(dedup)
+                results_list.append(item)
+
+        logger.info(f' *** 第{page}页采集完毕 - 共{len(info_list)}条 - 入库{len(results_list)}条 ***')
+        self.real_cont += len(results_list)
+        return results_list
+
+
+    def crawl_list_spider(self, page, menu):
+        retry_times = 0
+        while retry_times < 3:
+            try:
+                response = self.fetch_list_page(page=page,menu=menu)
+                logger.debug(f"第{page}页 状态码:{response.status_code}")
+                if response is not None:
+                    self.parser_list_page(response=response, page=page, menu=menu)
+                    logger.info(f"当前已采集 {self.real_cont} 条数据")
+                    # time.sleep(random.randint(60, 100))
+                break
+            except Exception as e:
+                logger.error(f"第{page}页 采集异常:{e}")
+                retry_times += 1
+                time.sleep(10)
+
+
+    def start_list(self, menus):
+        logger.debug("********** 列表页开始 **********")
+        for menu in menus:
+            logger.debug(f"++++++ {menu.channel}开始采集 ++++++")
+            page = menu.crawl_page
+            for page in range(1, page + 1):
+                account = self.yzw_acc.find_one({"site": "云筑网"}).get('state')
+                if account == "stop":
+                    logger.warning("账号限制!")
+                    return
+                self.crawl_list_spider(page=page,menu=menu)
+            logger.debug(f"------ {menu.channel}采集结束 ------")
+
+        logger.debug("********** 列表页结束 **********")
+
+
+if __name__ == '__main__':
+    Menu = namedtuple('Menu', ['channel', 'spidercode', 'crawl_page'])
+
+    menus = [
+        Menu('找商机-招募供应商', 'a_yzw_ztb_zbgg', 1),
+    ]
+
+    Crawl_Yzw().start_list(menus)

+ 2 - 2
lzz_theme/zgdtjtgsdzswpt/start.sh

@@ -2,5 +2,5 @@
 
 ps -ef |grep "dtpy_spider.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "dtpy_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 dtpy_spider.py > log/dtpy_spider.out 2>&1 &
-nohup python3 dtpy_details.py > log/dtpy_details.out 2>&1 &
+nohup python3 dtpy_spider.py > /dev/null 2>&1 &
+nohup python3 dtpy_details.py > /dev/null 2>&1 &

+ 2 - 2
lzz_theme/zgdtjtgsdzswpt_m30/start_m30.sh

@@ -2,5 +2,5 @@
 
 ps -ef |grep "dtpy_spider_m30.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "dtpy_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 dtpy_spider_m30.py > log/dtpy_spider_m30.out 2>&1 &
-nohup python3 dtpy_details.py > log/dtpy_details.out 2>&1 &
+nohup python3 dtpy_spider_m30.py > /dev/null 2>&1 &
+nohup python3 dtpy_details.py > /dev/null 2>&1 &

+ 1 - 5
lzz_theme/zgdzkjjtyxgsdzcgpt/limit_details.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-04-01
+Created on 2025-05-20
 ---------
 @summary: 中国电子科技集团有限公司电子采购平台 - 详情页
 ---------
@@ -12,7 +12,6 @@ import os
 sys.path.append(os.path.dirname(os.getcwd()))
 import json
 from utils.attachment import AttachmentDownloader
-from utils.clean_html import cleaner
 from utils.tools import *
 from login_account import Login
 
@@ -39,7 +38,6 @@ class Details:
         self.login_times = 0
         self.count = 0
 
-
     def get_cookies(self):
         if not os.path.isfile(f'./zgdk_cookies.json'):
             Login(self.phone)
@@ -53,7 +51,6 @@ class Details:
     def detail_get(self, response, item):
 
         html = response.json().get('publishContent') or ''
-
         buyer = response.json().get('annSource') or ''
         jsondata = {"buyer": buyer}
 
@@ -174,7 +171,6 @@ class Details:
             data_lsit = [dd for dd in cursor]
         for item in data_lsit:
             # logger.debug(item)
-
             sc = self.zgdk_acc.find_one({"account": self.phone})
 
             self.count = sc['count']

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/list_start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "zgdk_list_spider.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 zgdk_list_spider.py > zgdk_list_spider.out 2>&1 &
+nohup python3 zgdk_list_spider.py > /dev/null 2>&1 &

+ 2 - 2
lzz_theme/zgdzkjjtyxgsdzcgpt/login_account.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-03-08 
+Created on 2025-05-20
 ---------
 @summary: 账密登录
 ---------
@@ -14,7 +14,7 @@ import execjs
 
 def Login(phone=None, proxies=False):
     username = "91110105756025873C"
-    password = "Jy9876543210."
+    password = "Admin9876543210."
     print(f" >>> 登录账号:{username} ...")
     session = requests.session()
     session.proxies = proxies

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/ret_start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "zdk_reset.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 zdk_reset.py > zdk_reset.out 2>&1 &
+nohup python3 zdk_reset.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "limit_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 limit_details.py > limit_details.out 2>&1 &
+nohup python3 limit_details.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/zdk_reset.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-03-15
+Created on 2025-05-20
 ---------
 @summary: 中国电子科技集团有限公司电子采购平台 - 重置账户采集信息
 ---------

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/zgdk_cookies.json

@@ -1 +1 @@
-{'user': '91110105756025873C', 'token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiI5MTExMDEwNTc1NjAyNTg3M0MiLCJleHAiOjE3NDU5ODIzMzYsImlhdCI6MTc0NTk3NTEzNiwianRpIjoiNDVkYTcxZTktZWQ0YS00YTA2LTliYTAtZTYxOGM3NzUzMGY3In0.XbYEnfj4EKp_9a8nD-xrKKgwr9ulabHMf0-xuCG4kVNy8cxLPCUa8vdMVXQ3x3xohCih8ACBhfNDBD75XcqfWA', 'expire_time': 1745975407}
+{'user': '91110105756025873C', 'token': None, 'expire_time': 1747725008}

+ 1 - 1
lzz_theme/zgdzkjjtyxgsdzcgpt/zgdk_list_spider.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Created on 2024-05-17
+Created on 2025-05-20
 ---------
 @summary: 中国电子科技集团有限公司电子采购平台
 ---------

+ 2 - 2
lzz_theme/zgsyzbtbw/start.sh

@@ -3,5 +3,5 @@
 ps -ef |grep "zgsyzbtbw_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "zgsyzbtbw_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 zgsyzbtbw_list.py > log/zgsyzbtbw_list.out 2>&1 &
-nohup python3 zgsyzbtbw_details.py > log/zgsyzbtbw_details.out 2>&1 &
+nohup python3 zgsyzbtbw_list.py > /dev/null 2>&1 &
+nohup python3 zgsyzbtbw_details.py > /dev/null 2>&1 &

+ 3 - 3
lzz_theme/zgwkjtyxgs/detail_start.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 
 ps -ef |grep "zgwkjtyxgs_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-ps -ef |grep "zgwkjtyxgs_details2.py" |grep -v grep |awk '{print $2}' |x
-nohup python3 zgwkjtyxgs_details.py > log/zgwkjtyxgs_details.out 2>&1 &
-nohup python3 zgwkjtyxgs_details2.py > log/zgwkjtyxgs_details2.out 2>&1 &
+ps -ef |grep "zgwkjtyxgs_details2.py" |grep -v grep |awk '{print $2}' |xargs kill -9
+nohup python3 zgwkjtyxgs_details.py > /dev/null 2>&1 &
+nohup python3 zgwkjtyxgs_details2.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/zgwkjtyxgs/his_start.sh

@@ -1,4 +1,4 @@
 #!/bin/bash
 
 ps -ef |grep "history_crawl.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 history_crawl.py > log/history_crawl.out 2>&1 &
+nohup python3 history_crawl.py > /dev/null 2>&1 &

+ 1 - 1
lzz_theme/zgwkjtyxgs/list_start.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 ps -ef |grep "zgwk_daily_crawl.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 zgwk_daily_crawl.py > log/zgwk_daily_crawl.out 2>&1 &
+nohup python3 zgwk_daily_crawl.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/zgwkjtyxgs/retry_start.sh

@@ -1,5 +1,5 @@
 #!/bin/bash
 
 # ps -ef |grep "retry_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-nohup python3 retry_details.py > log/retry_details.out 2>&1 &
+nohup python3 retry_details.py > /dev/null 2>&1 &
 

+ 3 - 3
lzz_theme/zgzbtbggfwpt/detail_start.sh

@@ -3,9 +3,9 @@
 #ps -ef |grep "zgzbtbggfwpt_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 #ps -ef |grep "zgzbtbggfwpt_details2.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 #ps -ef |grep "zgzbtbggfwpt_details3.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-#nohup python3 zgzbtbggfwpt_details.py > log/zgzbtbggfwpt_details.out 2>&1 &
-#nohup python3 zgzbtbggfwpt_details2.py > log/zgzbtbggfwpt_details2.out 2>&1 &
-#nohup python3 zgzbtbggfwpt_details3.py > log/zgzbtbggfwpt_details3.out 2>&1 &
+#nohup python3 zgzbtbggfwpt_details.py > /dev/null 2>&1 &
+#nohup python3 zgzbtbggfwpt_details2.py > /dev/null 2>&1 &
+#nohup python3 zgzbtbggfwpt_details3.py > /dev/null 2>&1 &
 
 ps -ef |grep "zgzbtbggfwpt_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9 2> /dev/null
 nohup python3 zgzbtbggfwpt_details.py > /dev/null &

+ 1 - 1
lzz_theme/zgzbtbggfwpt/retry_start.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # ps -ef |grep "retry_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
-#nohup python3 retry_details.py > log/retry_details.out 2>&1 &
+#nohup python3 retry_details.py > /dev/null 2>&1 &
 
 
 ps -ef |grep "spider_detail_retry.py" |grep -v grep |awk '{print $2}' |xargs kill -9 2>/dev/null

+ 48 - 0
lzz_theme/zgzbtbggfwpt/spider_detail_bu.py

@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-04-22
+---------
+@summary: 中国招标投标公共服务平台 - 详情页[已按规范][失败重试]
+"""
+from spider_detail import Spider
+from utils.log import logger
+
+
+class RetrySpider(Spider):
+
+    def get_tasks(self, sizes, show_debug=False):
+        results = []
+        query = {
+            "parser_name": "ztpc_zgzbtbggfwpt",
+            "retry": {"$lt": 10},
+            "failed": True,
+            "is_crawl": False
+        }
+        sort = [("_id", -1)]
+        with self.theme_list.find(query, limit=sizes, sort=sort) as cursor:
+            for item in cursor:
+                if show_debug:
+                    logger.debug(item)
+
+                results.append(item)
+
+        yield from results
+
+    def start(self):
+        logger.debug("********** 详情页采集开始 **********")
+
+        try:
+            fs = []
+            for task in self.get_tasks(sizes=self._sizes):
+                f = self._executor.submit(self._spider, task)
+                fs.append(f)
+
+            self.wait(fs)
+
+        finally:
+            logger.debug("********** 详情页采集结束 **********")
+            self.shutdown_spider()
+
+
+if __name__ == "__main__":
+    RetrySpider(sizes=10000, threads=20).start()

+ 27 - 0
lzz_theme/zgzbtbggfwpt_wagf/spider_list_b.py

@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-05-06
+---------
+@summary: 中国招标投标公共服务平台 - 列表页[未按规范] - 大周期
+"""
+
+import datetime
+from collections import namedtuple
+
+from spider_list import Spider
+
+if __name__ == '__main__':
+    Menu = namedtuple(
+        'Menu',
+        ['channel', 'code', 'category', 'businessKeyWord', 'auto_paginate']
+    )
+
+    target_menus = [
+        Menu('未按数据规范-招标公告', 'a_zgzbtbggfwpt_wasjgf_zbgg', '招标公告', 'tenderBulletin', True),
+        Menu('未按数据规范-开标记录', 'a_zgzbtbggfwpt_wasjgf_kbjl', '开标记录', 'openBidRecord', True),
+        Menu('未按数据规范-评标公示', 'a_zgzbtbggfwpt_wasjgf_pbgs', '评标公示', 'winCandidateBulletin', True),
+        Menu('未按数据规范-中标公告', 'a_zgzbtbggfwpt_wasjgf_zhbgg', '中标公告', 'winBidBulletin', True),
+    ]
+    today = datetime.datetime.now().strftime('%Y-%m-%d')
+    date = (today, today)
+    Spider(target_menus, date=date, page_sizes=100, threads=10,).start()

+ 28 - 0
lzz_theme/zgzbtbggfwpt_wagf/spider_list_f.py

@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025-05-06
+---------
+@summary:  中国招标投标公共服务平台 - 列表页[未按规范] - 小周期
+"""
+
+import datetime
+from collections import namedtuple
+
+from spider_list import Spider
+
+if __name__ == '__main__':
+    Menu = namedtuple(
+        'Menu',
+        ['channel', 'code', 'category', 'businessKeyWord', 'crawl_page']
+    )
+
+    target_menus = [
+        Menu('未按数据规范-招标公告', 'a_zgzbtbggfwpt_wasjgf_zbgg', '招标公告', 'tenderBulletin', 1),
+        Menu('未按数据规范-开标记录', 'a_zgzbtbggfwpt_wasjgf_kbjl', '开标记录', 'openBidRecord', 1),
+        Menu('未按数据规范-评标公示', 'a_zgzbtbggfwpt_wasjgf_pbgs', '评标公示', 'winCandidateBulletin', 1),
+        Menu('未按数据规范-中标公告', 'a_zgzbtbggfwpt_wasjgf_zhbgg', '中标公告', 'winBidBulletin', 1),
+    ]
+
+    today = datetime.datetime.now().strftime('%Y-%m-%d')
+    date = (today, today)
+    Spider(target_menus, date=date, page_sizes=1000, threads=4).start()

+ 33 - 33
lzz_theme/zmdszfcgdzsc/start.sh

@@ -34,37 +34,37 @@ ps -ef |grep "zmd_zszq_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "zmd_zyx_details.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 ps -ef |grep "zmd_zyx_list.py" |grep -v grep |awk '{print $2}' |xargs kill -9
 
-nohup python3 zmd_byx_details.py > log/zmd_byx_details.out 2>&1 &
-nohup python3 zmd_byx_list.py > log/zmd_byx_list.out 2>&1 &
-nohup python3 zmd_gxq_details.py > log/zmd_gxq_details.out 2>&1 &
-nohup python3 zmd_gxq_list.py > log/zmd_gxq_list.out 2>&1 &
-nohup python3 zmd_kfq_details.py > log/zmd_kfq_details.out 2>&1 &
-nohup python3 zmd_kfq_list.py > log/zmd_kfq_list.out 2>&1 &
-nohup python3 zmd_pyx_details.py > log/zmd_pyx_details.out 2>&1 &
-nohup python3 zmd_pyx_list.py > log/zmd_pyx_list.out 2>&1 &
-nohup python3 zmd_qsx_details.py > log/zmd_qsx_details.out 2>&1 &
-nohup python3 zmd_qsx_list.py > log/zmd_qsx_list.out 2>&1 &
-nohup python3 zmd_qyzq_details.py > log/zmd_qyzq_details.out 2>&1 &
-nohup python3 zmd_qyzq_list.py > log/zmd_qyzq_list.out 2>&1 &
-nohup python3 zmd_rnx_details.py > log/zmd_rnx_details.out 2>&1 &
-nohup python3 zmd_rnx_list.py > log/zmd_rnx_list.out 2>&1 &
-nohup python3 zmd_sbj_details.py > log/zmd_sbj_details.out 2>&1 &
-nohup python3 zmd_sbj_list.py > log/zmd_sbj_list.out 2>&1 &
-nohup python3 zmd_sbjcgyx_list.py > log/zmd_sbjcgyx_list.out 2>&1 &
-nohup python3 zmd_scx_details.py > log/zmd_scx_details.out 2>&1 &
-nohup python3 zmd_scx_list.py > log/zmd_scx_list.out 2>&1 &
-nohup python3 zmd_sfq_details.py > log/zmd_sfq_details.out 2>&1 &
-nohup python3 zmd_sfq_list.py > log/zmd_sfq_list.out 2>&1 &
-nohup python3 zmd_spx_details.py > log/zmd_spx_details.out 2>&1 &
-nohup python3 zmd_spx_list.py > log/zmd_spx_list.out 2>&1 &
-nohup python3 zmd_xcx_details.py > log/zmd_xcx_details.out 2>&1 &
-nohup python3 zmd_xcx_list.py > log/zmd_xcx_list.out 2>&1 &
-nohup python3 zmd_xpx_details.py > log/zmd_xpx_details.out 2>&1 &
-nohup python3 zmd_xpx_list.py > log/zmd_xpx_list.out 2>&1 &
-nohup python3 zmd_ycq_details.py > log/zmd_ycq_details.out 2>&1 &
-nohup python3 zmd_ycq_list.py > log/zmd_ycq_list.out 2>&1 &
-nohup python3 zmd_zszq_details.py > log/zmd_zszq_details.out 2>&1 &
-nohup python3 zmd_zszq_list.py > log/zmd_zszq_list.out 2>&1 &
-nohup python3 zmd_zyx_details.py > log/zmd_zyx_details.out 2>&1 &
-nohup python3 zmd_zyx_list.py > log/zmd_zyx_list.out 2>&1 &
+nohup python3 zmd_byx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_byx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_gxq_details.py > /dev/null 2>&1 &
+nohup python3 zmd_gxq_list.py > /dev/null 2>&1 &
+nohup python3 zmd_kfq_details.py > /dev/null 2>&1 &
+nohup python3 zmd_kfq_list.py > /dev/null 2>&1 &
+nohup python3 zmd_pyx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_pyx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_qsx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_qsx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_qyzq_details.py > /dev/null 2>&1 &
+nohup python3 zmd_qyzq_list.py > /dev/null 2>&1 &
+nohup python3 zmd_rnx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_rnx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_sbj_details.py > /dev/null 2>&1 &
+nohup python3 zmd_sbj_list.py > /dev/null 2>&1 &
+nohup python3 zmd_sbjcgyx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_scx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_scx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_sfq_details.py > /dev/null 2>&1 &
+nohup python3 zmd_sfq_list.py > /dev/null 2>&1 &
+nohup python3 zmd_spx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_spx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_xcx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_xcx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_xpx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_xpx_list.py > /dev/null 2>&1 &
+nohup python3 zmd_ycq_details.py > /dev/null 2>&1 &
+nohup python3 zmd_ycq_list.py > /dev/null 2>&1 &
+nohup python3 zmd_zszq_details.py > /dev/null 2>&1 &
+nohup python3 zmd_zszq_list.py > /dev/null 2>&1 &
+nohup python3 zmd_zyx_details.py > /dev/null 2>&1 &
+nohup python3 zmd_zyx_list.py > /dev/null 2>&1 &
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_byx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "D3D8A41D5CDD91C065B49715B1970012", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "155846AFD16D52FD8426104BDF2089E8", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_gxq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "05BBD0A4A75ACC3249EF1E9CD665C0A9", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "D47C3DB87BF89FC5BF6256E4FBF563A2", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_kfq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "CE7686FBE7906F5C2DD7C20877A1ECC3", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "8E6EE577ACBDA9324DF2B6698FA5CAFB", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_login.py

@@ -4,7 +4,7 @@ import time
 import execjs
 import json
 import requests
-from loguru import logger
+from utils.log import logger
 
 import warnings
 

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_pyx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "F5B56265992BFA700D8E8647C7864AA5", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "D3BFEE17203193578E6EB044C24C3BA6", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_qsx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "292A7D5432869FF8A0B36D0FC9900A17", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "23D9B258E1CCFF44DEE2DEB2FAFFEF10", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_qyzq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "835FA2756DFD26DF944F870456B4D5E9", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "F7DDE264B35FA25F6FEE509E33A06BB4", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_rnx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "6CD2C67671C6C8A7BE87978EF3D8A659", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "D3B95B21BAB485EC9599B4313A537DDA", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_sbj_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "79B9E143E98A13FFDF36599C2F6555E6", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "E9DF2C0CF8FD88ACC36CD9F10373ACAC", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_scx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "82051DA95DA90A3611667AD72C7BD1BC", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "2BF477600CFE2197383300021D497ED1", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_sfq_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "CD4F4EE648F4E4DADFAB5738F281C49C", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "2EE3DBC9039671CBA0CD972B91D57896", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_spx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "BB03A8AE1D99176813B0620927898C73", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "A9CDCA058C1AAAED50C10C9D66B7E971", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

+ 1 - 1
lzz_theme/zmdszfcgdzsc/zmd_xcx_ck.json

@@ -1 +1 @@
-{"JSESSIONID": "46054129F1863D78B9AC11CA774EC0B2", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}
+{"JSESSIONID": "FA53E8C66DE51FE8317CF92B02D9A4D5", "thshop_customerName": "%E5%8C%97%E4%BA%AC%E6%8B%93%E6%99%AE%E4%B8%B0%E8%81%94%E4%BF%A1%E6%81%AF%E7%A7%91%E6%8A%80%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8"}

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff