|
- import requests
- from pyquery import PyQuery as pq
- from openpyxl import Workbook
- wb = Workbook()
- sheet = wb.active
- sheet['a1'] = '标题'
- sheet['b1'] = '作者'
- url = 'https://club.coovm.com/forum-53-{pn}.html'
- headers = {
- "user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
- }
- for page in range(1, 50):
- lasturl = url.format(pn=page)
- #print(lasturl)
- r = requests.get(url=lasturl, headers=headers).text
- d = pq(r)
- for i in d('#threadlisttableid tbody'):
- sub_d = pq(i)
- print(sub_d('.xst').text(),end=' ')
- print(sub_d('.by').text().split(' ')[0])
- sheet.append([sub_d('.xst').text(),sub_d('.by a').text().split(' ')[0]])
- wb.save('mine4.xlsx')
复制代码
|
|