source_qianlima_mt.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # -*- coding: utf-8 -*-
  2. import random
  3. import time
  4. from concurrent.futures import Future, ThreadPoolExecutor, wait
  5. from loguru import logger
  6. from source_qianlima import (
  7. core as reqeust,
  8. disrupt_account_pool
  9. )
  10. from utils.config_parms import area_dict, city_dict, province_dict, channel_dict
  11. from utils.tools import get_today_of_day
  12. def spider(account):
  13. date = get_today_of_day(-1)
  14. follow = account['follow']
  15. phone = account['phone']
  16. try:
  17. for category, category_name in channel_dict.items():
  18. for area_id in follow:
  19. cities = area_dict[area_id]
  20. for city in cities:
  21. logger.info(' && '.join([
  22. phone,
  23. date,
  24. category_name,
  25. province_dict[area_id],
  26. city_dict[city]
  27. ]))
  28. if len(cities) == 1:
  29. city = area_id
  30. ret = reqeust(date, category, city, account, page_size=100)
  31. if ret is False:
  32. return
  33. except Exception as e:
  34. raise e
  35. def show_exception(f: Future):
  36. error = f.exception()
  37. if error:
  38. logger.exception(f'工作线程运行错误:{error}')
  39. def delay_bar():
  40. try:
  41. interval = random.randint(300, 900)
  42. ts = time.time()
  43. start_dt = time.strftime('%Y-%m-%d %H:%M:%S',
  44. time.localtime(ts + interval))
  45. i = 0
  46. logger.info(f'采集开始时间:{start_dt}')
  47. while time.time() - ts < interval:
  48. dots = '.' * i
  49. print(f'\r请稍候{dots}', end='', flush=True)
  50. time.sleep(.5)
  51. if i >= 3:
  52. i = 0
  53. else:
  54. i += 1
  55. finally:
  56. print('')
  57. def start():
  58. try:
  59. delay_bar()
  60. logger.info('+++ 采集开始 +++')
  61. account_pool = disrupt_account_pool()
  62. with ThreadPoolExecutor(max_workers=5) as pool:
  63. fs = []
  64. for account in account_pool:
  65. f = pool.submit(spider, account)
  66. f.add_done_callback(show_exception)
  67. wait(fs)
  68. except KeyboardInterrupt:
  69. pass
  70. finally:
  71. print('')
  72. logger.info('+++ 采集结束 +++')
  73. if __name__ == '__main__':
  74. start()