|
我自己写的代码,还可以精简很多代码,目前还没有翻页的效果
- import os
- from lxml import etree
- import requests
- if not os.path.exists('d:/ppt1'):
- os.mkdir('d:/ppt1')
- url = 'http://www.1ppt.com/moban/jianjie/'
- ua = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
- respon = requests.get(url= url,headers= ua)
- respon.encoding = 'gb2312'
- jiexi = etree.HTML(respon.text)
- lianjie = jiexi.xpath('//ul[@class="tplist"]/li/a/@href')
- listlianjie = []
- for i in lianjie:
- lianjie = 'http://www.1ppt.com' + i
- listlianjie.append(lianjie)
- name = jiexi.xpath('//ul[@class="tplist"]/li/a/img/@alt')
- #print(name)
- dwonlist = []
- for url in listlianjie:
- respon = requests.get(url=url,headers=ua).text
- jiexi = etree.HTML(respon)
- downurl = jiexi.xpath('//ul[@class="downurllist"]/li/a/@href')
- downurl = ['http://www.1ppt.com' + downurl for downurl in downurl]
- #print(downurl[0])
- respon = requests.get(url=downurl[0],headers=ua).text
- jiexi = etree.HTML(respon)
- downurl1 = jiexi.xpath('//li[@class="c1"]/a/@href')
- for i in downurl1:
- dwonlist.append(i)
- #print(dwonlist)
- zidian = dict(zip(name,dwonlist))
- for name,dwonlist in zidian.items():
- houzhui = dwonlist.split('.')[-1]
- add = 'd:/ppt1/' + name +'.'+houzhui
- respon = requests.get(url=dwonlist,headers = ua).content
- with open(add,'wb') as a:
- a.write(respon)
- print(name,'下载完成')
复制代码
|
|