|
- import requests
- import os
- from lxml import etree
- if not os.path.exists('c:/c'):
- os.mkdir('c:/c')
- url = 'https://www.jkl.com.cn/cn/invest.aspx'
- listpdf = []
- listname = []
- for page in range(1,4):
- fanye = {'__EVENTTARGET': 'AspNetPager1',
- '__EVENTARGUMENT': page}
- UA = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
- respon = requests.get( url = url,params=fanye,headers=UA).text
- jiexi = etree.HTML(respon)
- pdflist = jiexi.xpath('//div[@class="newsLis"]//li//@href')
- name = jiexi.xpath('//div[@class="newsLis"]//li/a/text()')
- for i in pdflist:
- i = 'https://www.jkl.com.cn' + i
- listpdf.append(i)
- #print(i)
- for j in name:
- j= j.strip()
- listname.append(j)
- zidian = dict(zip(listname,listpdf))
- for a,b in zidian.items():
- houzhui = b.split('.')[-1]
- pdfshuju = requests.get(url=b,headers=UA).content
- add = 'c:/c/' + a +'.'+houzhui
- with open(add,'wb') as u:
- u.write(pdfshuju)
- print(a,'下载成功')
复制代码
|
|