爬取电影论坛前十页分析存储电影类型到Excel
import requestsfrom pyquery import PyQuery
from openpyxl import Workbook
wb = Workbook() #创建一个Excel文件
sheet = wb.active
sheet['a1'] = '类型'
sheet['b1']= '个数'
move = [] #存储电影全称
cla = [] #存储电影类型
tem = 'https://club.coovm.com/forum-53-{pn}.html'
for page in range(1,11):
url = tem.format(pn=page)
spon = requests.get(url=url)
#print(spon.text) #获得HTML网页数据
#print(spon.content) #获得返回的数据(二进制)
doc = PyQuery(spon.text)
for item in doc.items('#threadlisttableid .xst'):
move.append()
cla.append(item.text().split('】')+'】')
cls = []
wordset = list(set(cla))
for word in wordset:
freq = cla.count(word)
cls.append()
sheet.append()
wb.save('电影类型1.xlsx')
代码还能精简,整了几个小时有点头蒙,以后有机会再弄
页:
[1]