赵乾舟 发表于 2021-5-22 17:21:55

爬取多页多列数据代码

import requests
from pyquery import PyQuery as pq
from openpyxl import Workbook
wb = Workbook()
sheet = wb.active
sheet['a1'] = '标题'
sheet['b1'] = '作者'
url = 'https://club.coovm.com/forum-53-{pn}.html'
headers = {
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
    }
for page in range(1, 50):
    lasturl = url.format(pn=page)
    #print(lasturl)
    r = requests.get(url=lasturl, headers=headers).text
    d = pq(r)
    for i in d('#threadlisttableid tbody'):
      sub_d = pq(i)
      print(sub_d('.xst').text(),end=' ')
      print(sub_d('.by').text().split(' '))
      sheet.append(])
wb.save('mine4.xlsx')



页: [1]
查看完整版本: 爬取多页多列数据代码