赵乾舟 发表于 2021-7-22 21:48:37

爬京客隆PDF代码


import requests
import os
from lxml import etree
if not os.path.exists('c:/c'):
    os.mkdir('c:/c')
url = 'https://www.jkl.com.cn/cn/invest.aspx'
listpdf = []
listname = []
for page in range(1,4):

    fanye = {'__EVENTTARGET': 'AspNetPager1',
            '__EVENTARGUMENT': page}

    UA = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
    respon = requests.get( url = url,params=fanye,headers=UA).text
    jiexi = etree.HTML(respon)
    pdflist = jiexi.xpath('//div[@class="newsLis"]//li//@href')
    name = jiexi.xpath('//div[@class="newsLis"]//li/a/text()')
    for i in pdflist:

      i = 'https://www.jkl.com.cn' + i
      listpdf.append(i)
      #print(i)
    for j in name:
      j= j.strip()
      listname.append(j)

zidian = dict(zip(listname,listpdf))

for a,b in zidian.items():
    houzhui = b.split('.')[-1]
    pdfshuju = requests.get(url=b,headers=UA).content
    add = 'c:/c/' + a +'.'+houzhui
    with open(add,'wb') as u:
      u.write(pdfshuju)
      print(a,'下载成功')


页: [1]
查看完整版本: 爬京客隆PDF代码