爬取彼岸网站壁纸
import requestsfrom lxml import etree
import time
start_time = time.time()
UA = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
for ye in range(1,10): #定义网址和爬取页数
if ye == 1 :
url = 'http://www.netbian.com/meinv'
else:
url = f'http://www.netbian.com/meinv/index_{ye}.htm'
#print(url)
respon = requests.get(url=url,headers=UA).text
jiexi = etree.HTML(respon)
picurl = jiexi.xpath('//*[@id="main"]/div/ul/li/a/@href')
picurl = ['http://www.netbian.com'+picurl for picurl in picurl]
#print(picurl)
for i in picurl:
#print(i)
respon = requests.get(url=i, headers=UA).text
jiexi = etree.HTML(respon)
lasturl = jiexi.xpath('//*[@id="main"]/div/div/p/a/img/@src')
for j in lasturl:
respon =requests.get(url=j).content
picname = j.split('/')[-1]
pinadd = 'c:/cl/a/'+picname
with open(pinadd,'wb') as a:
a.write(respon)
print(picname,'下载完成。')
end_time = time.time()
shijian = end_time - start_time
print(f'一共用时{shijian}秒')
页:
[1]