main.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334
  1. from crawler.spiders.DetailPageSpider import CrawlDetailPageSpider
  2. from crawler.spiders.ListPageSpider import CrawlListPageSpider
  3. def list_page_spider():
  4. headers = {
  5. 'Host': 'www.zbytb.com',
  6. 'Upgrade-Insecure-Requests': '1',
  7. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
  8. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  9. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  10. }
  11. CrawlListPageSpider(
  12. db='py_spider',
  13. crawl_tab='zbytb_list',
  14. error_tab='crawl_error',
  15. enable_proxy=True,
  16. max_page=20,
  17. headers=headers,
  18. allow_show_exception=False
  19. ).start(workers=4)
  20. def detail_page_spider():
  21. CrawlDetailPageSpider(
  22. db='py_spider',
  23. crawl_tab='zbytb_list',
  24. save_tab='data_bak',
  25. error_tab='crawl_error',
  26. ).start()
  27. if __name__ == '__main__':
  28. detail_page_spider()