Code refactoring: separate scraper and movie objects
This commit is contained in:
@ -16,105 +16,134 @@ from bs4 import BeautifulSoup
|
|||||||
from collections import deque
|
from collections import deque
|
||||||
|
|
||||||
|
|
||||||
class FreeboxMoviePlanner:
|
class Movie:
|
||||||
|
def __init__(self):
|
||||||
|
self.day = ''
|
||||||
|
self.title = ''
|
||||||
|
self.genre = ''
|
||||||
|
self.channel = ''
|
||||||
|
self.rating = ''
|
||||||
|
self.original_title = ''
|
||||||
|
self.overview = ''
|
||||||
|
self.good = False
|
||||||
|
self.tmdb_id = ''
|
||||||
|
self.url = ''
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '{}: {} - {} ({})\n TMDB: {} - {}\n @ {}\n {}'.format(
|
||||||
|
'Today' if self.day == '' else self.day,
|
||||||
|
self.title,
|
||||||
|
self.genre,
|
||||||
|
self.channel,
|
||||||
|
self.rating,
|
||||||
|
self.original_title,
|
||||||
|
self.url,
|
||||||
|
self.overview
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Movie <{}({})>'.format(self.title, self.rating)
|
||||||
|
|
||||||
|
|
||||||
|
class TVGuideScraper:
|
||||||
TV_GUIDE_URL = 'https://www.programme-television.org/{}?bouquet=tnt'
|
TV_GUIDE_URL = 'https://www.programme-television.org/{}?bouquet=tnt'
|
||||||
|
|
||||||
def __init__(self):
|
@staticmethod
|
||||||
logging.info('Opening config file: config.json')
|
def getMovies(day=''):
|
||||||
with open('config.json') as config_file:
|
logging.info('Connecting to {}'.format(TVGuideScraper.TV_GUIDE_URL))
|
||||||
self.config = json.load(config_file)
|
r = requests.get(TVGuideScraper.TV_GUIDE_URL.format(day))
|
||||||
tmdbsimple.API_KEY = self.config['tmdb-api']
|
r.raise_for_status()
|
||||||
|
html = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
movies = []
|
||||||
|
for channel in html.select('.bloc_cnt'):
|
||||||
|
if len(channel.select('em')):
|
||||||
|
for movietag in channel.find_all(TVGuideScraper._tag_is_film):
|
||||||
|
movie = Movie()
|
||||||
|
movie.title = \
|
||||||
|
movietag.select('.texte_titre a')[0]['title']
|
||||||
|
movie.genre = movietag.select('.texte_cat a')[0].string
|
||||||
|
movie.channel = channel.select('em')[0]\
|
||||||
|
.string.replace('Programme ', '')
|
||||||
|
movie.day = day.title()
|
||||||
|
|
||||||
|
logging.info('Found movie: {0!r}'.format(movie))
|
||||||
|
|
||||||
|
movies.append(movie)
|
||||||
|
|
||||||
|
return movies
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _tag_is_film(tag):
|
def _tag_is_film(tag):
|
||||||
|
"""
|
||||||
|
Helper to check if a tag is a film
|
||||||
|
"""
|
||||||
return (
|
return (
|
||||||
tag.has_attr('data-nature')
|
tag.has_attr('data-nature')
|
||||||
and
|
and
|
||||||
tag['data-nature'] == 'films-telefilms'
|
tag['data-nature'] == 'films-telefilms'
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _printMovie(movie):
|
|
||||||
print('{} - {} ({})'.format(
|
|
||||||
movie['title'],
|
|
||||||
movie['genre'],
|
|
||||||
movie['channel']
|
|
||||||
))
|
|
||||||
print(' TMDB: {} - {}\n {}'.format(
|
|
||||||
movie['rating'],
|
|
||||||
movie['original_title'],
|
|
||||||
movie['overview'],
|
|
||||||
))
|
|
||||||
|
|
||||||
def printAllMovies(self, movies):
|
class FreeboxMoviePlanner:
|
||||||
for day, movies in movies.items():
|
def __init__(self):
|
||||||
print('=== {}'.format(day.title()))
|
logging.info('Opening config file: config.json')
|
||||||
for movie in movies:
|
with open('config.json') as config_file:
|
||||||
FreeboxMoviePlanner._printMovie(movie)
|
self.config = json.load(config_file)
|
||||||
|
tmdbsimple.API_KEY = self.config['tmdb-api']
|
||||||
|
self.movies = []
|
||||||
|
|
||||||
def getAllMovies(self):
|
def __repr__(self):
|
||||||
|
result = 'FreeboxMoviePlanner <Movies:\n'
|
||||||
|
for movie in self.movies:
|
||||||
|
result += ' {!r}\n'.format(movie)
|
||||||
|
result += '>'
|
||||||
|
return result
|
||||||
|
|
||||||
|
def printAllMovies(self):
|
||||||
|
for movie in self.movies:
|
||||||
|
print(movie)
|
||||||
|
print()
|
||||||
|
|
||||||
|
def scapeAllMovies(self):
|
||||||
days = deque(['lundi', 'mardi', 'mercredi',
|
days = deque(['lundi', 'mardi', 'mercredi',
|
||||||
'jeudi', 'vendredi', 'samedi', 'dimanche'])
|
'jeudi', 'vendredi', 'samedi', 'dimanche'])
|
||||||
offset = datetime.datetime.today().weekday()
|
offset = datetime.datetime.today().weekday()
|
||||||
days.rotate(-1-offset)
|
days.rotate(-1-offset)
|
||||||
days.appendleft('')
|
days.appendleft('')
|
||||||
movies = {}
|
|
||||||
for day in days:
|
for day in days:
|
||||||
movies[day] = self.getMovies(day)
|
self.movies += TVGuideScraper.getMovies(day)
|
||||||
logging.info('Found the following movies: {}'.format(movies))
|
logging.info('Found the following movies: {}'.format(self.movies))
|
||||||
return movies
|
|
||||||
|
|
||||||
def getMovies(self, day=''):
|
def findMoviesOnTMDB(self):
|
||||||
logging.info('Connecting to {}'.format(self.TV_GUIDE_URL))
|
for movie in self.movies:
|
||||||
r = requests.get(self.TV_GUIDE_URL.format(day))
|
tmdb_details = self._findMovieOnTMDB(movie.title)
|
||||||
r.raise_for_status()
|
if tmdb_details:
|
||||||
html = BeautifulSoup(r.text, 'html.parser')
|
movie.rating = tmdb_details['vote_average']
|
||||||
movies = []
|
movie.original_title = \
|
||||||
for channel in html.select('.bloc_cnt'):
|
|
||||||
if len(channel.select('em')):
|
|
||||||
for movie in channel.find_all(
|
|
||||||
FreeboxMoviePlanner._tag_is_film):
|
|
||||||
movie_title = movie.select('.texte_titre a')[0]['title']
|
|
||||||
|
|
||||||
thismovie = {}
|
|
||||||
thismovie['title'] = movie_title
|
|
||||||
thismovie['genre'] = movie.select('.texte_cat a')[0].string
|
|
||||||
thismovie['channel'] = channel.select('em')[0]\
|
|
||||||
.string.replace('Programme ', '')
|
|
||||||
logging.info('Found movie: {}'.format(thismovie))
|
|
||||||
|
|
||||||
tmdb_details = self._getMovieRating(movie_title)
|
|
||||||
if not tmdb_details:
|
|
||||||
logging.warning(
|
|
||||||
'No TMDB match for {}'.format(movie_title)
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
thismovie['rating'] = tmdb_details['vote_average']
|
|
||||||
thismovie['original_title'] = \
|
|
||||||
tmdb_details['original_title']
|
tmdb_details['original_title']
|
||||||
thismovie['overview'] = '\n '.join(textwrap.wrap(
|
movie.overview = '\n '.join(textwrap.wrap(
|
||||||
tmdb_details['overview'], 75)
|
tmdb_details['overview'], 75)
|
||||||
)
|
)
|
||||||
if(
|
movie.tmdb_id = tmdb_details['id']
|
||||||
float(tmdb_details['vote_average'])
|
movie.good = \
|
||||||
< self.config['minimum-rating']
|
float(movie.rating) >= self.config['minimum-rating']
|
||||||
):
|
movie.url = 'https://www.themoviedb.org/movie/{}?language={}' \
|
||||||
logging.warning(
|
.format(movie.tmdb_id, self.config['tmdb-language'])
|
||||||
'Bad rating ({}), skipping {}'.format(
|
|
||||||
tmdb_details['vote_average'], movie_title))
|
|
||||||
else:
|
|
||||||
movies.append(thismovie)
|
|
||||||
return movies
|
|
||||||
|
|
||||||
def _getMovieRating(self, movie):
|
def filterBadRatings(self):
|
||||||
|
self.movies = [movie for movie in self.movies if movie.good]
|
||||||
|
|
||||||
|
def _findMovieOnTMDB(self, movie):
|
||||||
logging.info("Searching for '{}' on TMDB".format(movie))
|
logging.info("Searching for '{}' on TMDB".format(movie))
|
||||||
search = tmdbsimple.Search()
|
search = tmdbsimple.Search()
|
||||||
search.movie(query=movie, language=self.config['tmdb-language'])
|
search.movie(query=movie, language=self.config['tmdb-language'])
|
||||||
logging.info("Found {}".format(search.results))
|
|
||||||
if len(search.results):
|
if len(search.results):
|
||||||
|
logging.info("Found '{}'".format(
|
||||||
|
search.results[0]['title']
|
||||||
|
))
|
||||||
return search.results[0]
|
return search.results[0]
|
||||||
else:
|
else:
|
||||||
|
logging.warning("'{}' not found on TMDB!".format(movie))
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
@ -124,4 +153,7 @@ if __name__ == '__main__':
|
|||||||
format=' %(asctime)s - %(levelname)s - %(message)s'
|
format=' %(asctime)s - %(levelname)s - %(message)s'
|
||||||
)
|
)
|
||||||
fmp = FreeboxMoviePlanner()
|
fmp = FreeboxMoviePlanner()
|
||||||
fmp.printAllMovies(fmp.getAllMovies())
|
fmp.scapeAllMovies()
|
||||||
|
fmp.findMoviesOnTMDB()
|
||||||
|
fmp.filterBadRatings()
|
||||||
|
fmp.printAllMovies()
|
||||||
|
Reference in New Issue
Block a user