Code refactoring: separate scraper and movie objects
This commit is contained in:
@ -16,105 +16,134 @@ from bs4 import BeautifulSoup
|
||||
from collections import deque
|
||||
|
||||
|
||||
class FreeboxMoviePlanner:
|
||||
class Movie:
|
||||
def __init__(self):
|
||||
self.day = ''
|
||||
self.title = ''
|
||||
self.genre = ''
|
||||
self.channel = ''
|
||||
self.rating = ''
|
||||
self.original_title = ''
|
||||
self.overview = ''
|
||||
self.good = False
|
||||
self.tmdb_id = ''
|
||||
self.url = ''
|
||||
|
||||
def __str__(self):
|
||||
return '{}: {} - {} ({})\n TMDB: {} - {}\n @ {}\n {}'.format(
|
||||
'Today' if self.day == '' else self.day,
|
||||
self.title,
|
||||
self.genre,
|
||||
self.channel,
|
||||
self.rating,
|
||||
self.original_title,
|
||||
self.url,
|
||||
self.overview
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Movie <{}({})>'.format(self.title, self.rating)
|
||||
|
||||
|
||||
class TVGuideScraper:
|
||||
TV_GUIDE_URL = 'https://www.programme-television.org/{}?bouquet=tnt'
|
||||
|
||||
def __init__(self):
|
||||
logging.info('Opening config file: config.json')
|
||||
with open('config.json') as config_file:
|
||||
self.config = json.load(config_file)
|
||||
tmdbsimple.API_KEY = self.config['tmdb-api']
|
||||
@staticmethod
|
||||
def getMovies(day=''):
|
||||
logging.info('Connecting to {}'.format(TVGuideScraper.TV_GUIDE_URL))
|
||||
r = requests.get(TVGuideScraper.TV_GUIDE_URL.format(day))
|
||||
r.raise_for_status()
|
||||
html = BeautifulSoup(r.text, 'html.parser')
|
||||
movies = []
|
||||
for channel in html.select('.bloc_cnt'):
|
||||
if len(channel.select('em')):
|
||||
for movietag in channel.find_all(TVGuideScraper._tag_is_film):
|
||||
movie = Movie()
|
||||
movie.title = \
|
||||
movietag.select('.texte_titre a')[0]['title']
|
||||
movie.genre = movietag.select('.texte_cat a')[0].string
|
||||
movie.channel = channel.select('em')[0]\
|
||||
.string.replace('Programme ', '')
|
||||
movie.day = day.title()
|
||||
|
||||
logging.info('Found movie: {0!r}'.format(movie))
|
||||
|
||||
movies.append(movie)
|
||||
|
||||
return movies
|
||||
|
||||
@staticmethod
|
||||
def _tag_is_film(tag):
|
||||
"""
|
||||
Helper to check if a tag is a film
|
||||
"""
|
||||
return (
|
||||
tag.has_attr('data-nature')
|
||||
and
|
||||
tag['data-nature'] == 'films-telefilms'
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _printMovie(movie):
|
||||
print('{} - {} ({})'.format(
|
||||
movie['title'],
|
||||
movie['genre'],
|
||||
movie['channel']
|
||||
))
|
||||
print(' TMDB: {} - {}\n {}'.format(
|
||||
movie['rating'],
|
||||
movie['original_title'],
|
||||
movie['overview'],
|
||||
))
|
||||
|
||||
def printAllMovies(self, movies):
|
||||
for day, movies in movies.items():
|
||||
print('=== {}'.format(day.title()))
|
||||
for movie in movies:
|
||||
FreeboxMoviePlanner._printMovie(movie)
|
||||
class FreeboxMoviePlanner:
|
||||
def __init__(self):
|
||||
logging.info('Opening config file: config.json')
|
||||
with open('config.json') as config_file:
|
||||
self.config = json.load(config_file)
|
||||
tmdbsimple.API_KEY = self.config['tmdb-api']
|
||||
self.movies = []
|
||||
|
||||
def getAllMovies(self):
|
||||
def __repr__(self):
|
||||
result = 'FreeboxMoviePlanner <Movies:\n'
|
||||
for movie in self.movies:
|
||||
result += ' {!r}\n'.format(movie)
|
||||
result += '>'
|
||||
return result
|
||||
|
||||
def printAllMovies(self):
|
||||
for movie in self.movies:
|
||||
print(movie)
|
||||
print()
|
||||
|
||||
def scapeAllMovies(self):
|
||||
days = deque(['lundi', 'mardi', 'mercredi',
|
||||
'jeudi', 'vendredi', 'samedi', 'dimanche'])
|
||||
offset = datetime.datetime.today().weekday()
|
||||
days.rotate(-1-offset)
|
||||
days.appendleft('')
|
||||
movies = {}
|
||||
for day in days:
|
||||
movies[day] = self.getMovies(day)
|
||||
logging.info('Found the following movies: {}'.format(movies))
|
||||
return movies
|
||||
self.movies += TVGuideScraper.getMovies(day)
|
||||
logging.info('Found the following movies: {}'.format(self.movies))
|
||||
|
||||
def getMovies(self, day=''):
|
||||
logging.info('Connecting to {}'.format(self.TV_GUIDE_URL))
|
||||
r = requests.get(self.TV_GUIDE_URL.format(day))
|
||||
r.raise_for_status()
|
||||
html = BeautifulSoup(r.text, 'html.parser')
|
||||
movies = []
|
||||
for channel in html.select('.bloc_cnt'):
|
||||
if len(channel.select('em')):
|
||||
for movie in channel.find_all(
|
||||
FreeboxMoviePlanner._tag_is_film):
|
||||
movie_title = movie.select('.texte_titre a')[0]['title']
|
||||
def findMoviesOnTMDB(self):
|
||||
for movie in self.movies:
|
||||
tmdb_details = self._findMovieOnTMDB(movie.title)
|
||||
if tmdb_details:
|
||||
movie.rating = tmdb_details['vote_average']
|
||||
movie.original_title = \
|
||||
tmdb_details['original_title']
|
||||
movie.overview = '\n '.join(textwrap.wrap(
|
||||
tmdb_details['overview'], 75)
|
||||
)
|
||||
movie.tmdb_id = tmdb_details['id']
|
||||
movie.good = \
|
||||
float(movie.rating) >= self.config['minimum-rating']
|
||||
movie.url = 'https://www.themoviedb.org/movie/{}?language={}' \
|
||||
.format(movie.tmdb_id, self.config['tmdb-language'])
|
||||
|
||||
thismovie = {}
|
||||
thismovie['title'] = movie_title
|
||||
thismovie['genre'] = movie.select('.texte_cat a')[0].string
|
||||
thismovie['channel'] = channel.select('em')[0]\
|
||||
.string.replace('Programme ', '')
|
||||
logging.info('Found movie: {}'.format(thismovie))
|
||||
def filterBadRatings(self):
|
||||
self.movies = [movie for movie in self.movies if movie.good]
|
||||
|
||||
tmdb_details = self._getMovieRating(movie_title)
|
||||
if not tmdb_details:
|
||||
logging.warning(
|
||||
'No TMDB match for {}'.format(movie_title)
|
||||
)
|
||||
continue
|
||||
|
||||
thismovie['rating'] = tmdb_details['vote_average']
|
||||
thismovie['original_title'] = \
|
||||
tmdb_details['original_title']
|
||||
thismovie['overview'] = '\n '.join(textwrap.wrap(
|
||||
tmdb_details['overview'], 75)
|
||||
)
|
||||
if(
|
||||
float(tmdb_details['vote_average'])
|
||||
< self.config['minimum-rating']
|
||||
):
|
||||
logging.warning(
|
||||
'Bad rating ({}), skipping {}'.format(
|
||||
tmdb_details['vote_average'], movie_title))
|
||||
else:
|
||||
movies.append(thismovie)
|
||||
return movies
|
||||
|
||||
def _getMovieRating(self, movie):
|
||||
def _findMovieOnTMDB(self, movie):
|
||||
logging.info("Searching for '{}' on TMDB".format(movie))
|
||||
search = tmdbsimple.Search()
|
||||
search.movie(query=movie, language=self.config['tmdb-language'])
|
||||
logging.info("Found {}".format(search.results))
|
||||
if len(search.results):
|
||||
logging.info("Found '{}'".format(
|
||||
search.results[0]['title']
|
||||
))
|
||||
return search.results[0]
|
||||
else:
|
||||
logging.warning("'{}' not found on TMDB!".format(movie))
|
||||
return []
|
||||
|
||||
|
||||
@ -124,4 +153,7 @@ if __name__ == '__main__':
|
||||
format=' %(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
fmp = FreeboxMoviePlanner()
|
||||
fmp.printAllMovies(fmp.getAllMovies())
|
||||
fmp.scapeAllMovies()
|
||||
fmp.findMoviesOnTMDB()
|
||||
fmp.filterBadRatings()
|
||||
fmp.printAllMovies()
|
||||
|
Reference in New Issue
Block a user