https://study.163.com/course/courseLearn.htm?courseId=1005913008#/learn/video?lessonId=1053258282&courseId=1005913008
课堂上的代码,做个记录
1 import requests 2 from bs4 import BeautifulSoup 3 import json 4 5 6 def get_page(): 7 url = 'https://movie.douban.com/cinema/nowplaying/changsha/' 8 headers = { 9 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"10 }11 response = requests.get(url, headers=headers, verify=False)12 text = response.text13 return text14 15 16 def parse_page(text):17 soup = BeautifulSoup(text, 'lxml')18 movies = []19 liList = soup.find_all('li', attrs={ "data-category":"nowplaying"})20 for li in liList:21 movie = {}22 title = li['data-title']23 score = li['data-score']24 release = li['data-release']25 region = li['data-region']26 director = li['data-director']27 actors = li['data-actors']28 img = li.find('img')['src']29 30 movie['title'] = title31 movie['score'] = score32 movie['release'] = release33 movie['region'] = region34 movie['director'] = director35 movie['actors'] = actors36 movie['img'] = img37 movies.append(movie)38 return movies39 40 41 def save_data(data):42 # 返回一个文件指针43 with open('douban.json', 'w', encoding='utf-8') as fp:44 # json.dump作用45 # 将字典、列表dump成满足json格式的字符串46 # ensure_ascii=False可以保存非ascii的值47 json.dump(data, fp, ensure_ascii=False)48 49 50 if __name__ == '__main__':51 text = get_page()52 movies = parse_page(text)53 save_data(movies)