赵乾舟 发表于 2021-8-21 12:11:06

爬取彼岸网站壁纸

import requests
from lxml import etree
import time
start_time = time.time()
UA = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
for ye in range(1,10):      #定义网址和爬取页数
    if ye == 1 :
      url = 'http://www.netbian.com/meinv'
    else:
      url = f'http://www.netbian.com/meinv/index_{ye}.htm'
    #print(url)
    respon = requests.get(url=url,headers=UA).text
    jiexi = etree.HTML(respon)
    picurl = jiexi.xpath('//*[@id="main"]/div/ul/li/a/@href')
    picurl = ['http://www.netbian.com'+picurl for picurl in picurl]
    #print(picurl)
    for i in picurl:
      #print(i)
      respon = requests.get(url=i, headers=UA).text
      jiexi = etree.HTML(respon)
      lasturl = jiexi.xpath('//*[@id="main"]/div/div/p/a/img/@src')
      for j in lasturl:
            respon =requests.get(url=j).content
            picname = j.split('/')[-1]
            pinadd = 'c:/cl/a/'+picname
            with open(pinadd,'wb') as a:
                a.write(respon)
                print(picname,'下载完成。')
end_time = time.time()
shijian = end_time - start_time
print(f'一共用时{shijian}秒')


页: [1]
查看完整版本: 爬取彼岸网站壁纸