爬取一页内容的多列数据
import requestsfrom pyquery import PyQuery as pq
from openpyxl import Workbook
wb = Workbook()
sheet = wb.active
sheet['a1'] = '标题'
sheet['b1'] = '作者'
url ='http://suixian666.com/forum.php?mod=forumdisplay&fid=54'
spon = requests.get(url=url).text
d = pq(spon)
for item in d('#threadlisttableid tbody'):
sub_b = pq(item)
print(sub_b('.xst').text(),end=' ')
print(sub_b('.by a').text().split(' '))
sheet.append(])
wb.save('mine.xlsx')
发现个怪事,我抄代码的时候,把pq换成其他内容,也可以正常访问抓取
页:
[1]