test.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. package extract
  2. import (
  3. "data_ai/clean"
  4. "data_ai/prompt"
  5. "data_ai/ul"
  6. "fmt"
  7. log "github.com/donnie4w/go-logger/logger"
  8. new_xlsx "github.com/tealeg/xlsx/v3"
  9. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "os"
  11. "strings"
  12. "sync"
  13. "time"
  14. "unicode/utf8"
  15. )
  16. // 验证单条数据···
  17. func TestSingleFieldInfo(name string, tmpid string) {
  18. now := time.Now().Unix()
  19. tmp := ul.BidMgo.FindById(name, tmpid)
  20. if len(tmp) == 0 || tmp == nil {
  21. log.Debug("未查询到数据...", tmpid)
  22. return
  23. }
  24. data := ResolveInfo(tmp)
  25. //最终结果...
  26. for k, v := range data {
  27. log.Debug(k, "~", v)
  28. }
  29. log.Debug("耗时···", time.Now().Unix()-now)
  30. }
  31. // 调试错误数据···
  32. func TestErrInfo0923() {
  33. arr := []string{
  34. "66e39b7bb25c3e1debf2cb66",
  35. "66e39b7bb25c3e1debf2cb52",
  36. "66e39b76b25c3e1debf2cb1c",
  37. "66e39b71b25c3e1debf2ca58",
  38. "66e39b71b25c3e1debf2ca47",
  39. "66e39b71b25c3e1debf2ca3e",
  40. "66e39b71b25c3e1debf2ca1d",
  41. "66e39b62b25c3e1debf2c9a0",
  42. "66e39b0db25c3e1debf2c788",
  43. "66e39af3b25c3e1debf2c711",
  44. "66e39ae5b25c3e1debf2c6ac",
  45. "66e39ad5b25c3e1debf2c607",
  46. "66e39ad5b25c3e1debf2c5fc",
  47. "66e39ad1b25c3e1debf2c5e1",
  48. "66e39acbb25c3e1debf2c56c",
  49. "66e39ac6b25c3e1debf2c533",
  50. "66e39ac1b25c3e1debf2c526",
  51. "66e39a9eb25c3e1debf2c4e7",
  52. "66e39a80b25c3e1debf2c441",
  53. "66e39a5cb25c3e1debf2c357",
  54. "66e39a30b25c3e1debf2c18f",
  55. "66e39a12b25c3e1debf2c0cc",
  56. "66e39a08b25c3e1debf2c050",
  57. "66e39a08b25c3e1debf2bfce",
  58. "66e3993fb25c3e1debf2b875",
  59. "66e3990db25c3e1debf2b55b",
  60. "66e398f1b25c3e1debf2b4bc",
  61. "66e397ccb25c3e1debf2abed",
  62. "66e397b9b25c3e1debf2ab81",
  63. "66e3977db25c3e1debf2a7ae",
  64. "66e396b3b25c3e1debf2a049",
  65. "66e396b3b25c3e1debf29f97",
  66. "66e3969eb25c3e1debf29e62",
  67. "66e395feb25c3e1debf29abb",
  68. "66e395e0b25c3e1debf298d2",
  69. "66e395d6b25c3e1debf297e6",
  70. "66e395ccb25c3e1debf296d1",
  71. "66e39554b25c3e1debf29331",
  72. "66e39517b25c3e1debf29160",
  73. "66e394c7b25c3e1debf28f42",
  74. "66e394bdb25c3e1debf28ef6",
  75. "66e394b3b25c3e1debf28e48",
  76. "66e3944fb25c3e1debf28ab5",
  77. "66e393ccb25c3e1debf28729",
  78. "66e393c2b25c3e1debf286dd",
  79. "66e393c2b25c3e1debf286a5",
  80. "66e393aeb25c3e1debf28572",
  81. "66e3934ab25c3e1debf28423",
  82. "66e39322b25c3e1debf282c6",
  83. "66e392d1b25c3e1debf2809d",
  84. "66e39212b25c3e1debf279d1",
  85. "66e39209b25c3e1debf279c7",
  86. "66e391f5b25c3e1debf2779e",
  87. "66e391eab25c3e1debf2773f",
  88. "66e391e0b25c3e1debf276a8",
  89. "66e39168b25c3e1debf27347",
  90. "66e3912cb25c3e1debf2714a",
  91. "66e390e6b25c3e1debf26ee7",
  92. "66e390e4b25c3e1debf26e7c",
  93. "66e390b3b25c3e1debf26cce",
  94. "66e3906cb25c3e1debf26a8f",
  95. "66e3901bb25c3e1debf26822",
  96. "66e38ff5b25c3e1debf26714",
  97. "66e38fd5b25c3e1debf26694",
  98. "66e38fb8b25c3e1debf265a4",
  99. "66e38f90b25c3e1debf264c5",
  100. "66e38f7bb25c3e1debf263de",
  101. "66e38f68b25c3e1debf263b8",
  102. "66e38f5eb25c3e1debf2638c",
  103. "66e38f4ab25c3e1debf2633b",
  104. "66e38f40b25c3e1debf2631a",
  105. "66e38f18b25c3e1debf261c4",
  106. "66e38ef8b25c3e1debf260e7",
  107. "66e38ec8b25c3e1debf26063",
  108. "66e38eb3b25c3e1debf26017",
  109. "66e38e95b25c3e1debf25f78",
  110. "66e38e6db25c3e1debf25ef0",
  111. "66e38e61b25c3e1debf25eb4",
  112. "66e38a57b25c3e1debf24a45",
  113. "66e38a47b25c3e1debf24a09",
  114. "66e38a47b25c3e1debf249dd",
  115. "66e38a47b25c3e1debf249a1",
  116. "66e38a47b25c3e1debf24998",
  117. "66e38a47b25c3e1debf24995",
  118. "66e38a1fb25c3e1debf2494a",
  119. "66e389f3b25c3e1debf2482e",
  120. "66e389f3b25c3e1debf24824",
  121. "66e389f3b25c3e1debf2481d",
  122. "66e389f3b25c3e1debf24819",
  123. "66e389f3b25c3e1debf24816",
  124. "66e389f3b25c3e1debf24808",
  125. "66e389d5b25c3e1debf247ec",
  126. "66e389adb25c3e1debf24792",
  127. "66e389a6b25c3e1debf24733",
  128. "66e389a6b25c3e1debf24727",
  129. "66e389a3b25c3e1debf246f3",
  130. "66e3899cb25c3e1debf246ee",
  131. "66e3874db25c3e1debf23d91",
  132. "66e38739b25c3e1debf23d28",
  133. "66e38720b25c3e1debf23cf3",
  134. "66e386f3b25c3e1debf23cac",
  135. "66e386dfb25c3e1debf23c5b",
  136. "66e386dfb25c3e1debf23c5a",
  137. "66e386d5b25c3e1debf23c1b",
  138. "66e38699b25c3e1debf23b3b",
  139. "66e38694b25c3e1debf23b35",
  140. "66e3867bb25c3e1debf23af4",
  141. "66e38671b25c3e1debf23ad6",
  142. "66e38671b25c3e1debf23aa3",
  143. "66e38671b25c3e1debf23a97",
  144. "66e38661b25c3e1debf23a6a",
  145. "66e38657b25c3e1debf23a27",
  146. "66e3864db25c3e1debf23a10",
  147. "66e385f3b25c3e1debf23954",
  148. "66e385dfb25c3e1debf2393a",
  149. "66e385d0b25c3e1debf238de",
  150. "66e385c6b25c3e1debf23896",
  151. "66e385adb25c3e1debf2381b",
  152. "66e385a3b25c3e1debf237eb",
  153. "66e385a3b25c3e1debf237e0",
  154. "66e38570b25c3e1debf23778",
  155. "66e38566b25c3e1debf23765",
  156. "66e3855cb25c3e1debf23755",
  157. "66e38534b25c3e1debf2369e",
  158. "66e38526b25c3e1debf2367c",
  159. "66e3851cb25c3e1debf23646",
  160. "66e3851cb25c3e1debf2363d",
  161. "66e3851cb25c3e1debf2363a",
  162. "66e38512b25c3e1debf23621",
  163. "66e38512b25c3e1debf2360e",
  164. "66e384f8b25c3e1debf23590",
  165. "66e384eeb25c3e1debf23572",
  166. "66e384c2b25c3e1debf2350b",
  167. "66e3847fb25c3e1debf23474",
  168. }
  169. pool_mgo := make(chan bool, 200)
  170. wg_mgo := &sync.WaitGroup{}
  171. log.Debug("预计处理条数···", len(arr))
  172. for k, v := range arr {
  173. if k%10 == 0 {
  174. log.Debug("cur index ", k)
  175. }
  176. pool_mgo <- true
  177. wg_mgo.Add(1)
  178. go func(v string) {
  179. defer func() {
  180. <-pool_mgo
  181. wg_mgo.Done()
  182. }()
  183. data := ul.BidMgo.FindById("bidding", v)
  184. if len(data) >= 0 && data != nil {
  185. detail := qu.ObjToString(data["detail"])
  186. filetext := qu.ObjToString(data["filetext"]) //此处为附件信息···
  187. title := qu.ObjToString(data["title"])
  188. if strings.Contains(title, "开标记录") { //开标记录舍弃
  189. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  190. "ispkg": 0,
  191. "s_id": v,
  192. })
  193. log.Debug("开标记录舍弃···")
  194. return
  195. }
  196. if data["jyfb_data"] != nil { //剑鱼发布舍弃qi
  197. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  198. "ispkg": 0,
  199. "s_id": v,
  200. })
  201. log.Debug("剑鱼发布舍弃···")
  202. return
  203. }
  204. if ul.IsTool && utf8.RuneCountInString(detail) < 100 {
  205. detail = filetext
  206. }
  207. if utf8.RuneCountInString(detail) < 100 {
  208. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  209. "ispkg": 0,
  210. "s_id": v,
  211. })
  212. log.Debug("长度不符舍弃···")
  213. return
  214. }
  215. //获取外围字段数据
  216. //分包判断-获取信息
  217. //ispkg, pkg := false, map[string]interface{}{}
  218. //if ispkg = prompt.AcquireIsPackageInfo(detail); ispkg {
  219. // if pkg = prompt.AcquireMultiplePackageInfo(detail); len(pkg) > 0 {
  220. //
  221. // }
  222. //}
  223. pkg := prompt.AcquireMultiplePackageInfo(detail)
  224. if s_pkg, ok := pkg["s_pkg"].(map[string]map[string]interface{}); ok {
  225. if len(s_pkg) <= 0 {
  226. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  227. "ispkg": 0,
  228. "s_id": v,
  229. })
  230. } else if len(s_pkg) == 1 {
  231. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  232. "ispkg": 1,
  233. "pkg": pkg,
  234. "s_id": v,
  235. })
  236. } else {
  237. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  238. "ispkg": 2,
  239. "s_id": v,
  240. "pkg": pkg,
  241. })
  242. }
  243. } else {
  244. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  245. "ispkg": 0,
  246. "s_id": v,
  247. })
  248. }
  249. } else {
  250. log.Debug("未查询到数据···")
  251. ul.BidMgo.Save("zkzkzkzk", map[string]interface{}{
  252. "ispkg": -1,
  253. })
  254. }
  255. }(v)
  256. }
  257. wg_mgo.Wait()
  258. log.Debug("is over ···")
  259. }
  260. // 导出需要修复的
  261. func TestFullJinOrCodeInfo() {
  262. q := map[string]interface{}{}
  263. pool_mgo := make(chan bool, 20)
  264. wg_mgo := &sync.WaitGroup{}
  265. sess := ul.SourceMgo.GetMgoConn()
  266. defer ul.SourceMgo.DestoryMongoConn(sess)
  267. total := 0
  268. it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
  269. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  270. if total%10000 == 0 {
  271. log.Debug("cur index ", total)
  272. }
  273. pool_mgo <- true
  274. wg_mgo.Add(1)
  275. go func(tmp map[string]interface{}) {
  276. defer func() {
  277. <-pool_mgo
  278. wg_mgo.Done()
  279. }()
  280. tmpid := ul.BsonTOStringId(tmp["_id"])
  281. isPcode, update := false, map[string]interface{}{}
  282. ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
  283. o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
  284. if ext_ai_record != nil {
  285. o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
  286. o_budget = qu.Float64All((*ext_ai_record)["budget"])
  287. o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
  288. }
  289. if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
  290. if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
  291. update["budget"] = filterAmount(r_budget, o_budget)
  292. }
  293. }
  294. if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
  295. if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
  296. update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
  297. }
  298. }
  299. //对于编号
  300. if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
  301. if o_projectcode != projectcode {
  302. if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
  303. fns := getpnsinfo(data) //获取附件名字
  304. for _, v := range fns {
  305. if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
  306. if strings.Contains(v, projectcode) {
  307. isPcode = true
  308. break
  309. }
  310. }
  311. }
  312. if isPcode {
  313. update["projectcode"] = o_projectcode
  314. }
  315. }
  316. }
  317. }
  318. if len(update) > 0 {
  319. //更新抽取表
  320. ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
  321. "$set": update,
  322. })
  323. //保存待修复表
  324. update["_id"] = tmp["_id"]
  325. ul.SourceMgo.Save("zzzzz_kkk_uc_0907", update)
  326. }
  327. }(tmp)
  328. tmp = make(map[string]interface{})
  329. }
  330. wg_mgo.Wait()
  331. log.Debug("repair ai is over ...")
  332. }
  333. // 修复金额和编号
  334. func TestRepairJinOrCodeInfo() {
  335. q := map[string]interface{}{}
  336. pool_mgo := make(chan bool, 20)
  337. wg_mgo := &sync.WaitGroup{}
  338. sess := ul.SourceMgo.GetMgoConn()
  339. defer ul.SourceMgo.DestoryMongoConn(sess)
  340. total := 0
  341. it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
  342. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  343. if total%10000 == 0 {
  344. log.Debug("cur index ", total)
  345. }
  346. pool_mgo <- true
  347. wg_mgo.Add(1)
  348. go func(tmp map[string]interface{}) {
  349. defer func() {
  350. <-pool_mgo
  351. wg_mgo.Done()
  352. }()
  353. tmpid := ul.BsonTOStringId(tmp["_id"])
  354. isPcode, update := false, map[string]interface{}{}
  355. ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"])
  356. o_projectcode, o_budget, o_bidamount := "", 0.0, 0.0
  357. if ext_ai_record != nil {
  358. o_projectcode = qu.ObjToString((*ext_ai_record)["projectcode"])
  359. o_budget = qu.Float64All((*ext_ai_record)["budget"])
  360. o_bidamount = qu.Float64All((*ext_ai_record)["bidamount"])
  361. }
  362. if r_budget := qu.Float64All(tmp["budget"]); r_budget > 0.0 && o_budget > 0.0 && r_budget < 1000000000.0 {
  363. if r_budget/o_budget == 10000.0 || o_budget/r_budget == 10000.0 {
  364. update["budget"] = filterAmount(r_budget, o_budget)
  365. }
  366. }
  367. if r_bidamount := qu.Float64All(tmp["bidamount"]); r_bidamount > 0.0 && o_bidamount > 0.0 && r_bidamount < 1000000000.0 {
  368. if r_bidamount/o_bidamount == 10000.0 || o_bidamount/r_bidamount == 10000.0 {
  369. update["bidamount"] = filterAmount(r_bidamount, o_bidamount)
  370. }
  371. }
  372. //对于编号
  373. if projectcode := qu.ObjToString(tmp["projectcode"]); projectcode != "" {
  374. if o_projectcode != projectcode {
  375. if data := ul.SourceMgo.FindById("bidding", tmpid); data != nil {
  376. fns := getpnsinfo(data) //获取附件名字
  377. for _, v := range fns {
  378. if utf8.RuneCountInString(v) >= utf8.RuneCountInString(projectcode) {
  379. if strings.Contains(v, projectcode) {
  380. isPcode = true
  381. break
  382. }
  383. }
  384. }
  385. if isPcode {
  386. update["projectcode"] = o_projectcode
  387. }
  388. }
  389. }
  390. }
  391. if len(update) > 0 {
  392. ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
  393. "$set": update,
  394. })
  395. }
  396. }(tmp)
  397. tmp = make(map[string]interface{})
  398. }
  399. wg_mgo.Wait()
  400. log.Debug("repair ai is over ...")
  401. }
  402. // 筛选金额
  403. func filterAmount(f1 float64, f2 float64) float64 {
  404. //选取一个合适的金额 ...
  405. if f1 > f2 {
  406. if f1 > 100000000.0 {
  407. return f2
  408. } else {
  409. return f1
  410. }
  411. } else if f1 < f2 {
  412. if f2 > 100000000.0 {
  413. return f1
  414. } else {
  415. return f2
  416. }
  417. } else {
  418. return f1
  419. }
  420. }
  421. func TestExportJinErInfo() {
  422. q := map[string]interface{}{}
  423. pool_mgo := make(chan bool, 20)
  424. wg_mgo := &sync.WaitGroup{}
  425. sess := ul.SourceMgo.GetMgoConn()
  426. defer ul.SourceMgo.DestoryMongoConn(sess)
  427. total, isok := 0, 0
  428. it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Iter()
  429. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  430. if total%10000 == 0 {
  431. log.Debug("cur index ", total)
  432. }
  433. isok++
  434. pool_mgo <- true
  435. wg_mgo.Add(1)
  436. go func(tmp map[string]interface{}) {
  437. defer func() {
  438. <-pool_mgo
  439. wg_mgo.Done()
  440. }()
  441. tmpid := ul.BsonTOStringId(tmp["_id"])
  442. budget := qu.Float64All(tmp["budget"])
  443. bidamount := qu.Float64All(tmp["bidamount"])
  444. saveinfo := map[string]interface{}{}
  445. if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
  446. ext_budget := qu.Float64All((*ext_ai_record)["budget"])
  447. ext_bidamount := qu.Float64All((*ext_ai_record)["bidamount"])
  448. if budget > 0.0 && ext_budget > 0.0 {
  449. if budget/ext_budget == 10000.0 || ext_budget/budget == 10000.0 {
  450. saveinfo["budget"] = budget
  451. saveinfo["ext_budget"] = ext_budget
  452. }
  453. }
  454. if bidamount > 0.0 && ext_bidamount > 0.0 {
  455. if bidamount/ext_bidamount == 10000.0 || ext_bidamount/bidamount == 10000.0 {
  456. saveinfo["bidamount"] = bidamount
  457. saveinfo["ext_bidamount"] = ext_bidamount
  458. }
  459. }
  460. }
  461. if len(saveinfo) > 0 && tmpid != "" {
  462. saveinfo["toptype"] = tmp["toptype"]
  463. saveinfo["subtype"] = tmp["subtype"]
  464. saveinfo["href"] = tmp["href"]
  465. saveinfo["jyhref"] = tmp["jytest_href"]
  466. ul.SourceMgo.Save("zktest_zzzzzkkk_0903", saveinfo)
  467. }
  468. }(tmp)
  469. tmp = make(map[string]interface{})
  470. }
  471. wg_mgo.Wait()
  472. log.Debug("repair ai is over ...", isok)
  473. }
  474. // 修正buyer等字段
  475. func TestRepairBuyerInfo(name string) {
  476. q := map[string]interface{}{}
  477. pool_mgo := make(chan bool, 20)
  478. wg_mgo := &sync.WaitGroup{}
  479. sess := ul.SourceMgo.GetMgoConn()
  480. defer ul.SourceMgo.DestoryMongoConn(sess)
  481. total, isok := 0, 0
  482. it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
  483. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  484. if total%10000 == 0 {
  485. log.Debug("cur index ", total)
  486. }
  487. isok++
  488. pool_mgo <- true
  489. wg_mgo.Add(1)
  490. go func(tmp map[string]interface{}) {
  491. defer func() {
  492. <-pool_mgo
  493. wg_mgo.Done()
  494. }()
  495. tmpid := ul.BsonTOStringId(tmp["_id"])
  496. buyer := qu.ObjToString(tmp["buyer"])
  497. agency := qu.ObjToString(tmp["agency"])
  498. winner := qu.ObjToString(tmp["winner"])
  499. update := map[string]interface{}{}
  500. if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
  501. o_buyer := qu.ObjToString((*ext_ai_record)["buyer"])
  502. if buyer == agency && o_buyer != "" {
  503. update["buyer"] = o_buyer
  504. }
  505. o_winner := qu.ObjToString((*ext_ai_record)["winner"])
  506. if o_winner != "" && strings.Contains(winner, o_winner) && o_winner != o_winner {
  507. update["winner"] = o_winner
  508. }
  509. }
  510. if len(update) > 0 && tmpid != "" {
  511. ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
  512. "$set": update,
  513. })
  514. }
  515. }(tmp)
  516. tmp = make(map[string]interface{})
  517. }
  518. wg_mgo.Wait()
  519. log.Debug("repair ai is over ...", isok)
  520. }
  521. func TestDelUpBuyerAi() {
  522. dataArr, _ := ul.SourceMgo.Find("zktest_buyer_0828_new", map[string]interface{}{}, nil, nil)
  523. pool_mgo := make(chan bool, 50)
  524. wg_mgo := &sync.WaitGroup{}
  525. for k, v := range dataArr {
  526. if k%1000 == 0 {
  527. log.Debug(k, "~", v["_id"])
  528. }
  529. pool_mgo <- true
  530. wg_mgo.Add(1)
  531. go func(v map[string]interface{}) {
  532. defer func() {
  533. <-pool_mgo
  534. wg_mgo.Done()
  535. }()
  536. buyer := qu.ObjToString(v["buyer"])
  537. tmpid := ul.BsonTOStringId(v["_id"])
  538. data1 := ul.SourceMgo.FindById("result_20220218", tmpid)
  539. if len(data1) > 0 {
  540. ul.SourceMgo.UpdateById("result_20220218", tmpid, map[string]interface{}{
  541. "$set": map[string]interface{}{"buyer": buyer},
  542. })
  543. }
  544. data2 := ul.SourceMgo.FindById("result_20220219", tmpid)
  545. if len(data2) > 0 {
  546. ul.SourceMgo.UpdateById("result_20220219", tmpid, map[string]interface{}{
  547. "$set": map[string]interface{}{"buyer": buyer},
  548. })
  549. }
  550. }(v)
  551. }
  552. wg_mgo.Wait()
  553. log.Debug("del ai is over ...")
  554. }
  555. func TestAiBuyerInfo() {
  556. //dataArr, _ := ul.SourceMgo.Find("zktest_buyer_info", map[string]interface{}{}, nil, nil)
  557. q := map[string]interface{}{}
  558. pool_mgo := make(chan bool, 50)
  559. wg_mgo := &sync.WaitGroup{}
  560. sess := ul.SourceMgo.GetMgoConn()
  561. defer ul.SourceMgo.DestoryMongoConn(sess)
  562. total, isok := 0, 0
  563. it := sess.DB(ul.SourceMgo.DbName).C("zktest_repeat_new").Find(&q).Sort("_id").Iter()
  564. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  565. if total%1000 == 0 {
  566. log.Debug("cur index ", total)
  567. }
  568. isok++
  569. pool_mgo <- true
  570. wg_mgo.Add(1)
  571. go func(tmp map[string]interface{}) {
  572. defer func() {
  573. <-pool_mgo
  574. wg_mgo.Done()
  575. }()
  576. tmpid := ul.BsonTOStringId(tmp["_id"])
  577. if buyer := qu.ObjToString(tmp["buyer"]); buyer != "" {
  578. if zp_buyer := prompt.AcquireBuyerInfo(buyer); zp_buyer["实体单位"] != nil {
  579. if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
  580. ul.SourceMgo.UpdateById("zktest_repeat_new", tmpid, map[string]interface{}{
  581. "$set": map[string]interface{}{"buyer": ns_buyer},
  582. })
  583. }
  584. }
  585. }
  586. }(tmp)
  587. tmp = make(map[string]interface{})
  588. }
  589. wg_mgo.Wait()
  590. log.Debug("repair ai is over ...", isok)
  591. }
  592. func TestExportAiBuyer() {
  593. sess := ul.SourceMgo.GetMgoConn()
  594. defer ul.SourceMgo.DestoryMongoConn(sess)
  595. pool_mgo := make(chan bool, 10)
  596. wg_mgo := &sync.WaitGroup{}
  597. q, total := map[string]interface{}{
  598. "_id": map[string]interface{}{
  599. "$lte": ul.StringTOBsonId("66cd8299b25c3e1deb9488dd"),
  600. },
  601. }, 0
  602. it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("_id").Select(map[string]interface{}{
  603. "ai_zhipu": 1,
  604. "ext_ai_record": 1,
  605. }).Iter()
  606. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  607. if total%10000 == 0 {
  608. log.Debug("cur index ", total, "~", tmp["_id"])
  609. }
  610. pool_mgo <- true
  611. wg_mgo.Add(1)
  612. go func(tmp map[string]interface{}) {
  613. defer func() {
  614. <-pool_mgo
  615. wg_mgo.Done()
  616. }()
  617. ai_buyer, ext_buyer := "", ""
  618. if ai_zhipu := qu.ObjToMap(tmp["ai_zhipu"]); ai_zhipu != nil {
  619. ai_buyer = qu.ObjToString((*ai_zhipu)["s_buyer"])
  620. }
  621. if ext_ai_record := qu.ObjToMap(tmp["ext_ai_record"]); ext_ai_record != nil {
  622. ext_buyer = qu.ObjToString((*ext_ai_record)["buyer"])
  623. }
  624. if ai_buyer != "" {
  625. ul.SourceMgo.Save("zktest_buyer_0827", map[string]interface{}{
  626. "_id": tmp["_id"],
  627. "ai_buyer": ai_buyer,
  628. "ext_buyer": ext_buyer,
  629. })
  630. }
  631. }(tmp)
  632. tmp = make(map[string]interface{})
  633. }
  634. wg_mgo.Wait()
  635. log.Debug("export is over ", total)
  636. }
  637. func TestIsPackage() {
  638. tmpArr := []string{}
  639. pkgArr := []int{}
  640. ok := 0
  641. for k, v := range tmpArr {
  642. data := ul.SourceMgo.FindById("ai_41411", v)
  643. if len(data) == 0 {
  644. data = ul.SourceMgo.FindById("ai_294", v)
  645. }
  646. detail := qu.ObjToString(data["detail"])
  647. ispkg := prompt.AcquireIsPackageInfo(detail)
  648. if (ispkg && pkgArr[k] == 1) || (!ispkg && pkgArr[k] == 0) {
  649. ok++
  650. } else {
  651. log.Debug("错误~", v)
  652. }
  653. }
  654. log.Debug("is over ~ ", len(tmpArr)-ok)
  655. }
  656. func TestPackageInfo() {
  657. sess := ul.SourceMgo.GetMgoConn()
  658. defer ul.SourceMgo.DestoryMongoConn(sess)
  659. q, total := map[string]interface{}{"ai_zhipu.ispkg": true}, 0
  660. it := sess.DB(ul.SourceMgo.DbName).C("result_20220218").Find(&q).Sort("-_id").Iter()
  661. isok := 0
  662. os.Remove("test.xlsx")
  663. f := new_xlsx.NewFile()
  664. sheet, _ := f.AddSheet("数据信息")
  665. row := sheet.AddRow()
  666. writeRow(row, []string{"序号", "唯一标识", "站点", "项目名称", "一级分类", "二级分类", "原文链接", "剑鱼链接", "子包名称", "子包单位", "子包金额"})
  667. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  668. if total%10000 == 0 {
  669. log.Debug("cur index ", total, "~", isok)
  670. }
  671. tmpid := ul.BsonTOStringId(tmp["_id"])
  672. site := qu.ObjToString(tmp["site"])
  673. projectname := qu.ObjToString(tmp["projectname"])
  674. toptype := qu.ObjToString(tmp["toptype"])
  675. subtype := qu.ObjToString(tmp["subtype"])
  676. href := qu.ObjToString(tmp["href"])
  677. jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
  678. ai_zhipu := *qu.ObjToMap(tmp["ai_zhipu"])
  679. if s_pkg := qu.ObjToMap(ai_zhipu["s_pkg"]); s_pkg != nil {
  680. if s_info := qu.ObjToMap((*s_pkg)["s_pkg"]); s_info != nil && len(*s_info) > 1 {
  681. isok++
  682. for _, v := range *s_info {
  683. if v1 := qu.ObjToMap(v); v1 != nil {
  684. row = sheet.AddRow()
  685. arr := []string{}
  686. arr = append(arr, fmt.Sprintf("%d", isok))
  687. arr = append(arr, tmpid)
  688. arr = append(arr, site)
  689. arr = append(arr, projectname)
  690. arr = append(arr, toptype)
  691. arr = append(arr, subtype)
  692. arr = append(arr, href)
  693. arr = append(arr, jyhref)
  694. arr = append(arr, qu.ObjToString((*v1)["name"]))
  695. arr = append(arr, qu.ObjToString((*v1)["winner"]))
  696. bidamount := qu.Float64All((*v1)["bidamount"])
  697. if bidamount > 0.0 {
  698. arr = append(arr, fmt.Sprintf("%.2f", bidamount))
  699. } else {
  700. arr = append(arr, "")
  701. }
  702. writeRow(row, arr)
  703. }
  704. }
  705. if isok > 1000 {
  706. break
  707. }
  708. }
  709. }
  710. tmp = make(map[string]interface{})
  711. }
  712. log.Debug("is over ", total, isok)
  713. if err := f.Save("test.xlsx"); err != nil {
  714. fmt.Println("保存xlsx失败:", err)
  715. } else {
  716. fmt.Println("保存xlsx成功:", err)
  717. }
  718. log.Debug("is over ...")
  719. }
  720. // 更新链接
  721. func TestUpdateJyhref(name string) {
  722. dataArr, _ := ul.SourceMgo.Find(name, map[string]interface{}{}, nil, map[string]interface{}{"_id": 1})
  723. for _, v := range dataArr {
  724. tmpid := ul.BsonTOStringId(v["_id"])
  725. jyhref := fmt.Sprintf(ul.Url, qu.CommonEncodeArticle("content", tmpid))
  726. ul.SourceMgo.UpdateById(name, tmpid, map[string]interface{}{
  727. "$set": map[string]interface{}{
  728. "jyhref": jyhref,
  729. },
  730. })
  731. }
  732. log.Debug("is over ...")
  733. }
  734. func writeRow(row *new_xlsx.Row, arr []string) {
  735. for _, v := range arr {
  736. row.AddCell().Value = v
  737. }
  738. }