|
- from lxml import etree
- import requests
- import xlrd
- wb = xlrd.open_workbook(r'E:\新建文件夹\01.xls')
- ws = wb.sheet_by_index(0)
- lst = ws.col_values(0)[1:]
- urllst = []
- ua = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
- for a in lst:
- re = a.encode('unicode_escape')
- res = str(re).split('\\')[-1].replace("'",'')[1:5]
- url = f'http://bs.kaishicha.com/{res}.html'
- urllst.append(url)
- print(lst)
- print(urllst)
- zidian = dict(zip(lst,urllst))
- for zi,i in zidian.items():
- respon = requests.get(url=i)
- respon.encoding = 'gb2312'
- jiexi = etree.HTML(respon.text)
- lianjie = jiexi.xpath('//*[@class="bsfbt"]//@src')
- lianjie = ['http://bs.kaishicha.com/'+d for d in lianjie]
- print(lianjie)
- add = f"E:\新建文件夹\\{zi}.png"
- print(add)
- for l in lianjie:
- pic = requests.get(l).content
- with open(add,'wb') as a:
- a.write(pic)
复制代码
|
|