|
- import requests
- from lxml import etree
- import time
- start_time = time.time()
- UA = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
- for ye in range(1,10): #定义网址和爬取页数
- if ye == 1 :
- url = 'http://www.netbian.com/meinv'
- else:
- url = f'http://www.netbian.com/meinv/index_{ye}.htm'
- #print(url)
- respon = requests.get(url=url,headers=UA).text
- jiexi = etree.HTML(respon)
- picurl = jiexi.xpath('//*[@id="main"]/div[3]/ul/li/a/@href')
- picurl = ['http://www.netbian.com'+picurl for picurl in picurl]
- #print(picurl)
- for i in picurl:
- #print(i)
- respon = requests.get(url=i, headers=UA).text
- jiexi = etree.HTML(respon)
- lasturl = jiexi.xpath('//*[@id="main"]/div[3]/div/p/a/img/@src')
- for j in lasturl:
- respon =requests.get(url=j).content
- picname = j.split('/')[-1]
- pinadd = 'c:/cl/a/'+picname
- with open(pinadd,'wb') as a:
- a.write(respon)
- print(picname,'下载完成。')
- end_time = time.time()
- shijian = end_time - start_time
- print(f'一共用时{shijian}秒')
复制代码
|
|