|
- import requests
- from pyquery import PyQuery
- from openpyxl import Workbook
- wb = Workbook() #创建一个Excel文件
- sheet = wb.active
- sheet['a1'] = '类型'
- sheet['b1']= '个数'
- move = [] #存储电影全称
- cla = [] #存储电影类型
- tem = 'https://club.coovm.com/forum-53-{pn}.html'
- for page in range(1,11):
- url = tem.format(pn=page)
- spon = requests.get(url=url)
- #print(spon.text) #获得HTML网页数据
- #print(spon.content) #获得返回的数据(二进制)
- doc = PyQuery(spon.text)
- for item in doc.items('#threadlisttableid .xst'):
- move.append([item.text()])
- cla.append(item.text().split('】')[0]+'】')
- cls = []
- wordset = list(set(cla))
- for word in wordset:
- freq = cla.count(word)
- cls.append([word,freq])
- sheet.append([word,freq])
- wb.save('电影类型1.xlsx')
复制代码
代码还能精简,整了几个小时有点头蒙,以后有机会再弄
|
|