# 导入模块 import requests from bs4 import BeautifulSoup
defget_movies(): # 添加header headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'} # 创建存储信息的列表 movie_title=[] movie_other=[] movie_info=[] movie_fen=[] movie_num=[] # 遍历每个页面链接并发出GET请求 for i inrange(0,10): link='https://movie.douban.com/top250?start='+str(i*25) r=requests.get(link,headers=headers) print('已获取第',str(i*25+1),'到',str(i*25+25),'部电影信息。') # 获取电影信息 soup=BeautifulSoup(r.text,'lxml') for t in soup.find_all('div', 'hd'): name = t.find('span', 'title').get_text() movie_title.append(name) for t in soup.find_all('div', 'hd'): name = t.find('span', 'other').get_text() movie_other.append(name) for t in soup.find_all('div', 'info'): info = t.find('p').get_text().replace(' ','') movie_info.append(info) for t in soup.find_all('div', 'star'): fen = t.find('span', 'rating_num').get_text() movie_fen.append(fen) for t in soup.find_all('div','star'): num = t.find_all('span')[3].get_text() movie_num.append(num) return movie_title,movie_other,movie_info,movie_fen,movie_num
a,b,c,d,e=get_movies() defmain(): import codecs # 将获取信息写入TXT文件 with codecs.open('c:\\Users\\32662\\Desktop\\douban.txt', 'wb',encoding='utf-8') as f: f.write('豆瓣电影 Top 250\n\n') for n inrange(0,250): f.write('Top'+str(n+1)+'\n') f.write('电影名:'+a[n]+b[n]+c[n]+'豆瓣评分'+d[n]+' '+e[n]) f.write('\n\n')