Source code for sportsipy.fb.schedule

import pandas as pd
import re
from .constants import SCHEDULE_SCHEME, SQUAD_URL
from datetime import datetime
from ..decorators import float_property_decorator, int_property_decorator
from .fb_utils import _lookup_team
from pyquery import PyQuery as pq
from sportsipy import utils
from sportsipy.constants import (AWAY,
                                 DRAW,
                                 HOME,
                                 LOSS,
                                 NEUTRAL,
                                 WIN)
from urllib.error import HTTPError


[docs]class Game: """ A representation of a matchup between two teams. Stores all relevant high-level match information for a game in a team's schedule including date, time, week, opponent, and score. Parameters ---------- game_data : string The row containing the specified game information. """ def __init__(self, game_data): self._competition = None self._matchweek = None self._day = None self._date = None self._time = None self._datetime = None self._venue = None self._result = None self._goals_for = None self._goals_against = None self._opponent = None self._opponent_id = None self._expected_goals = None self._expected_goals_against = None self._attendance = None self._captain = None self._captain_id = None self._formation = None self._referee = None self._match_report = None self._notes = None self._parse_game_data(game_data) def __str__(self): """ Return the string representation of the class. """ return f'{self.date} - {self.opponent}' def __repr__(self): """ Return the string representation of the class. """ return self.__str__() def _parse_opponent_id(self, game_data): """ Parse the opponent's squad ID. The opponent field has a squad ID embedded in the URL which can be used to more directly lookup or match an opponent. By pulling the opponent field and removing all other unnecessary parts of the URL, the ID can be safely parsed and returned. Parameters ---------- game_data : string A ``string`` containing all of the rows of stats for a given game. Returns ------- string Returns a ``string`` of the opponent's squad ID. """ opponent = game_data(SCHEDULE_SCHEME['opponent']) opponent_id = opponent('a').attr('href') try: opponent_id = re.sub(r'.*\/squads\/', '', opponent_id) opponent_id = re.sub(r'\/.*', '', opponent_id) except TypeError: opponent_id = None return opponent_id def _parse_captain_id(self, game_data): """ Parse the captain's player ID. The captain field contains a link to the captain's unique player ID in the URL which can be used to more directly lookup or match the player. By pulling the captain field and removing all other unnecessary parts of the URL, the ID can be safely parsed and returned. Parameters ---------- game_data : string A ``string`` containing all of the rows of stats for a given game. Returns ------- string Returns a ``string`` of the player's unique ID. """ captain = game_data(SCHEDULE_SCHEME['captain']) captain_id = captain('a').attr('href') try: captain_id = re.sub(r'.*\/players\/', '', captain_id) captain_id = re.sub(r'\/.*', '', captain_id) except TypeError: captain_id = None return captain_id def _parse_match_report(self, game_data): """ Parse the match report ID. The match report field contains a link to the detailed match report via the match report ID which is embedded in the URL. By pulling the match report field and removing all other unnecessary parts of the URL, the ID can be safely parsed and returned. Parameters ---------- game_data : string A ``string`` containing all of the rows of stats for a given game. Returns ------- string Returns a ``string`` of the match report's unique ID. """ match_report = game_data(SCHEDULE_SCHEME['match_report']) match_report_id = match_report('a').attr('href') try: match_report_id = re.sub(r'.*\/matches\/', '', match_report_id) match_report_id = re.sub(r'\/.*', '', match_report_id) except TypeError: match_report_id = None return match_report_id def _parse_game_data(self, game_data): """ Parse a value for every attribute. The function looks through every attribute with the exception of those listed below and retrieves the value according to the parsing scheme and index of the attribute from the passed HTML data. Once the value is retrieved, the attribute's value is updated with the returned result. Note that this method is called directly once Game is invoked and does not need to be called manually. Parameters ---------- game_data : string A ``string`` containing all of the rows of stats for a given game. """ for field in self.__dict__: # Remove the leading '_' from the name short_name = str(field)[1:] if short_name == 'datetime': continue if short_name == 'opponent_id': value = self._parse_opponent_id(game_data) setattr(self, field, value) continue if short_name == 'captain_id': value = self._parse_captain_id(game_data) setattr(self, field, value) continue if short_name == 'match_report': value = self._parse_match_report(game_data) setattr(self, field, value) continue value = utils._parse_field(SCHEDULE_SCHEME, game_data, short_name) setattr(self, field, value) @property def dataframe(self): """ Returns a pandas ``DataFrame`` containing all other class properties and values. The index for the DataFrame is the match report ID. """ if self._goals_for is None and self._goals_against is None: return None fields_to_include = { 'competition': self.competition, 'matchweek': self.matchweek, 'day': self.day, 'date': self.date, 'time': self.time, 'datetime': self.datetime, 'venue': self.venue, 'result': self.result, 'goals_for': self.goals_for, 'goals_against': self.goals_against, 'shootout_scored': self.shootout_scored, 'shootout_against': self.shootout_against, 'opponent': self.opponent, 'opponent_id': self.opponent_id, 'expected_goals': self.expected_goals, 'expected_goals_against': self.expected_goals_against, 'attendance': self.attendance, 'captain': self.captain, 'captain_id': self.captain_id, 'formation': self.formation, 'referee': self.referee, 'match_report': self.match_report, 'notes': self.notes } return pd.DataFrame([fields_to_include], index=[self.match_report]) @property def competition(self): """ Returns a ``string`` of the competitions name, such as 'Premier League' or 'Champions Lg'. """ return self._competition @property def matchweek(self): """ Returns a ``string`` of the matchweek the game was played in, such as 'Matchweek 1' or 'Group Stage'. """ return self._matchweek @property def day(self): """ Returns a ``string`` of the day of the week the game was played on. """ return self._day @property def date(self): """ Returns a ``string`` of the date the game was played in the format 'YYYY-MM-DD'. """ return self._date @property def time(self): """ Returns a ``string`` of the time the game started in 24-hour format, local to the home venue. """ return self._time @property def datetime(self): """ Returns a ``datetime`` object representing the date and time the match started. If the time is not present, the default time of midnight on the given day will be used instead. """ try: date = self.date.split('-') except AttributeError: return None try: time = re.sub(' .*', '', self.time) time = time.split(':') except TypeError: time = None if len(date) != 3: return None year, month, day = date hour, minute = 0, 0 if time and len(time) == 2: hour, minute = time else: time = None try: year = int(year) month = int(month) day = int(day) except ValueError: return None try: hour = int(hour) minute = int(minute) except ValueError: # As long as we have a valid date, we can still create a meaningful # datetime object, even if the time is invalid, so stick to the # default hour and minute in case they can't be parsed. hour = 0 minute = 0 datetime_ = datetime(year, month, day, hour, minute) return datetime_ @property def venue(self): """ Returns a ``string`` constant representing if the team played at home ('Home'), on the road ('Away'), or at a neutral site ('Neutral'). """ if not self._venue: return None if self._venue.upper() == 'HOME': return HOME if self._venue.upper() == 'AWAY': return AWAY if self._venue.upper() == 'NEUTRAL': return NEUTRAL @property def result(self): """ Returns a ``string`` constant representing if the team won ('Win'), drew ('Draw'), or lost ('Loss'). """ if not self._result: return None if self._result.upper() == 'W': return WIN if self._result.upper() == 'D': return DRAW if self._result.upper() == 'L': return LOSS @int_property_decorator def goals_for(self): """ Returns an ``int`` of the number of goals the team scored. """ # If the game went to a shootout, remove the penalties. if '(' in self._goals_for and ')' in self._goals_for: return re.sub(' .*', '', self._goals_for) return self._goals_for @int_property_decorator def goals_against(self): """ Returns an ``int`` of the number of goals the team conceded. """ # If the game went to a shootout, remove the penalties. if '(' in self._goals_against and ')' in self._goals_against: return re.sub(' .*', '', self._goals_against) return self._goals_against @int_property_decorator def shootout_scored(self): """ Returns an ``int`` of the number of penalties the team scored if the game went to a shootout after normal play. """ penalties = re.findall(r'\(\d+\)', self._goals_for) if penalties: penalties = re.sub(r'\(|\)', '', penalties[0]) return penalties @int_property_decorator def shootout_against(self): """ Returns an ``int`` of the number of penalties the team conceded if the game went to a shootout after normal play. """ penalties = re.findall(r'\(\d+\)', self._goals_against) if penalties: penalties = re.sub(r'\(|\)', '', penalties[0]) return penalties @property def opponent(self): """ Returns a ``string`` of the opponents name, such as 'Arsenal'. """ return self._opponent @property def opponent_id(self): """ Returns a ``string`` of the opponents squad ID, such as '18bb7c10' for Arsenal. """ return self._opponent_id @float_property_decorator def expected_goals(self): """ Returns a ``float`` of the number of goals the team was expected to score based on the quality of shots taken. """ return self._expected_goals @float_property_decorator def expected_goals_against(self): """ Returns a ``float`` of the number of goals the team was expected to concede based on the quality of shots taken. """ return self._expected_goals_against @int_property_decorator def attendance(self): """ Returns an ``int`` of the recorded attendance at the game. """ try: return self._attendance.replace(',', '') except AttributeError: return None @property def captain(self): """ Returns a ``string`` representing the captain's name, such as 'Harry Kane'. """ return self._captain @property def captain_id(self): """ Returns a ``string`` of the captain's unique ID on fbref.com, such as '21a66f6a' for Harry Kane. """ return self._captain_id @property def formation(self): """ Returns a ``string`` of the formation the team started with during the game, such as '4-4-2'. """ return self._formation @property def referee(self): """ Returns a ``string`` of the first and last name of the referee for the match. """ return self._referee @property def match_report(self): """ Returns a ``string`` of the 8-digit match ID for the game. """ return self._match_report @property def notes(self): """ Returns a ``string`` of any notes that might be included with the game. """ return self._notes
[docs]class Schedule: """ An object of the given team's schedule. Generates a team's schedule for the season including wins, losses, draws, and scores if applicable. Parameters ---------- team_id : string The team's 8-digit squad ID or the team's name, such as 'Tottenham Hotspur'. doc : PyQuery object (optional) If passed to the class instantiation, this will be used to pull all information instead of making another request to the website. If the document is not provided, it will be pulled during a later step. """ def __init__(self, team_id, doc=None): self._games = [] self._pull_schedule(team_id, doc) def __getitem__(self, index): """ Return a specified game. Returns a specified game as requested by the index number in the array. The input index is 0-based and must be within the range of the schedule array. Parameters ---------- index : int The 0-based index of the game to return. Returns ------- Game instance If the requested game can be found, its Game instance is returned. Raises ------ IndexError If the requested index is not within the bounds of the schedule. """ return self._games[index] def __call__(self, date): """ Return a specified game. Returns a specific game as requested by the passed datetime. The input datetime must have the same year, month, and day, but can have any time be used to match the game. Parameters ---------- date : datetime A datetime object of the month, day, and year to identify a particular game that was played. Returns ------- Game instance If the requested game can be found, its Game instance is returned. Raises ------ ValueError If the requested date cannot be matched with a game in the schedule. """ for game in self._games: if not game.datetime: continue # pragma: no cover if game.datetime.year == date.year and \ game.datetime.month == date.month and \ game.datetime.day == date.day: return game raise ValueError('No games found for requested date') def __str__(self): """ Return the string representation of the class. """ games = [f'{game.date} - {game.opponent}'.strip() for game in self._games] return '\n'.join(games) def __repr__(self): """ Return the string representation of the class. """ return self.__str__() def __iter__(self): """ Returns an iterator of all of the games scheduled for the given team. """ return iter(self._games) def __len__(self): """ Returns the number of scheduled games for the given team. """ return len(self._games) def _add_games_to_schedule(self, schedule): """ Add game information to the list of games. Create a Game instance for the given game in the schedule and add it to the list of games the team has or will play during the season. Parameters ---------- schedule : PyQuery object A PyQuery object pertaining to a team's schedule table. """ for item in schedule: if 'class="thead"' in str(item): continue # pragma: no cover game = Game(item) self._games.append(game) def _pull_schedule(self, team_id, doc): """ Download and create objects for the team's schedule. Given the team's abbreviation, pull the squad page and parse all of the games on the list. If a document is already provided (occurs when called directly from the Team class), that can be used to save an extra call to the website and games can be parsed from that object. A Game instance is created for every item in the team's schedule and appended to the '_games' property. Parameters ---------- team_id : string The team's 8-digit squad ID or the team's name, such as 'Tottenham Hotspur'. doc : PyQuery object If passed to the class instantiation, this will be used to pull all information instead of making another request to the website. If the document is not provided, this value will be None. """ if not doc: squad_id = _lookup_team(team_id) try: doc = pq(SQUAD_URL % squad_id) except HTTPError: return schedule = utils._get_stats_table(doc, 'table#matchlogs_all') if not schedule: utils._no_data_found() return self._add_games_to_schedule(schedule)