data_spider
/
topic_spider


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
							from crawler.download import Downloader, RenderDownloader
from crawler.services.channel import bfs

if __name__ == '__main__':
    d = Downloader()
    r = RenderDownloader()

    # url = 'http://zbpt.zycqjy.com/rest/sub_list_nav.cs#'
    # url = 'http://fgw.hubei.gov.cn/fbjd/xxgkml/xkfw/xzxkjg/xmbaqk/'
    # url = 'https://fzggw.zj.gov.cn/col/col1599544/index.html'
    # url = 'http://113.200.193.24:8009/Main/Projects#'
    # url = 'http://jjc.usx.edu.cn/zbxx.htm#'
    # url = 'https://www.xxggzy.cn/jyxx/089003/089003001/moreinfo_len6.html'
    # url = 'http://www.hdzbgs.com/List.aspx?id=12'
    # url = 'https://ggzy.qiannan.gov.cn/zfcg_500203/zbgg_5060411/index.html'
    # url = 'http://www.lzlcgroup.com/cms/column/index/id/57.html'
    # url = 'http://ggzy.zjlg.gov.cn:86/TPFront/jyxx/004002/'
    # url = 'https://www.elongbiao.com/List/NoticeP/9'
    # url = 'https://www.elongbiao.com/List/Notice/12'  # 多时间文本 算法优化一次
    # url = 'http://lytjj.longyan.gov.cn/xxgk/tjgg/'
    # url = 'http://www.lydeyy.com/plus/list.php?tid=36'  # 时间文本 算法优化一次
    # url = 'https://ggzy.longyan.gov.cn/lyztb/gcjs/007004/moreinfo.html' # 算法优化一次
    # url = 'https://ggzy.longyan.gov.cn/lyztb/gcjs/007002/007002004/moreinfo.html'
    # url = 'http://ly.fjycw.com/NewsList.aspx?GUID=48-48-55'

    # url = 'http://www.hljcg.gov.cn/welcome.jsp?dq=2302'  # 多个时间文本窗口栏目抽取，完成优化
    # url = 'https://ggzy.longyan.gov.cn/lyztb/zqcg/008004/moreinfo.html' # 优化时间文本块数量多与先辈节点个数，导致无法全部删除，残余时间文本块干扰问题
    # url = 'http://www.shanghang.gov.cn/zwgk/zwgkzdgz/gczb/sphzbaxx/'
    # url = 'http://www.qlebid.com/cms/channel/1ywgg4qb/index.htm'
    # url = ' http://zhaobiao.elongcheng.com:82/'
    # url = 'http://www.gdgpo.gov.cn/queryPlanList.do'
    # url = 'http://www.ccgp-hebei.gov.cn/province/cggg/dyly/'
    # url = 'http://www.xtsrmyy.com.cn/newlist.asp?bigclassid=4&smallclassid=5'
    # url = 'http://jsj.yima.gov.cn/col/col109/index.html'
    # url = 'http://zw.hainan.gov.cn/wssc/ra/projects/rp_list.html?num=3'
    # url = 'http://www.hlbeggzyjy.org.cn/jygk/021001/trade_public.html'
    # url = 'http://jsj.yima.gov.cn/col/col109/index.html?uid=8327&pageNum=4'
    # url = 'http://oldzfcg.scsczt.cn/CmsNewsController.do?method=recommendBulletinList&moreType=provincebuyBulletinMore&channelCode=cggg&rp=25&page=1'
    # url = 'http://www.ccgp-xizang.gov.cn/freecms/site/xizang/index.html'
    # url = 'http://ggzy.yn.gov.cn/#/tradeHall/tradeList'
    # url = 'http://www.gdgpo.gov.cn/queryPlanList.do'
    # url = 'http://www.ccgp-gansu.gov.cn/web/contract/0/index.htm?contractsInfo.id=d0'
    # javascript 渲染页面
    # url = 'http://zhaobiao.elongcheng.com:82/'  # 详情所在 onclick
    url = 'https://ebid.espic.com.cn/newgdtcms//category/purchaseListNew.html?dates=300&categoryId=2&tenderMethod=00&tabName=%E9%87%87%E8%B4%AD%E4%BF%A1%E6%81%AF&page=1'

    resp = r.get(url, timeout=3)
    # resp = r.get(url, timeout=3)
    print(resp)
    # bfs(resp, url)