import re
from .constants import SQUAD_URL
from ..decorators import float_property_decorator, int_property_decorator
from .fb_utils import _lookup_team
from pyquery import PyQuery as pq
from .roster import Roster
from .schedule import Schedule
from .squad_ids import SQUAD_IDS
from urllib.error import HTTPError
from .. import utils
[docs]class Team:
"""
The high-level stats and information for a single professional team.
By requesting a team via either a name or squad ID, an object will be
created which contains high-level information and stats for that team, if
found. The information ranges from the name of their primary competition,
including their point-return, position, and place, plus the number of goals
they have scored, and a pointer to the team roster and schedule.
If a team cannot be identified for the given name or ID, a list of the
closest matches will be returned as a dictionary instead.
Parameters
----------
team_id : string
A string representing either the team's full name, such as 'Tottenham
Hotspur', or the team's 8-digit squad ID, such as '361ca564' for
Tottenham. If a team can't be found for the given name, a list of
suggestions will be returned with corresponding squad IDs.
squad_page : string (optional)
Optionally specify the filename of a local file to use to pull data
instead of downloading from sports-reference.com. This file should be
of the Squad page for the designated year.
"""
def __init__(self, team_id, squad_page=None):
self._squad_id = None
self._name = None
self._season = None
self._record = None
self._position = None
self._points = None
self._league = None
self._manager = None
self._country = None
self._gender = None
self._goals_scored = None
self._goals_against = None
self._goal_difference = None
self._expected_goals = None
self._expected_goals_against = None
self._expected_goal_difference = None
self._home_record = None
self._home_games = None
self._away_record = None
self._away_games = None
self._home_wins = None
self._home_draws = None
self._home_losses = None
self._away_wins = None
self._away_draws = None
self._away_losses = None
self._home_points = None
self._away_points = None
self._squad_id = _lookup_team(team_id)
self._pull_team_page(squad_page)
def __str__(self):
"""
Return the string representation of the class.
"""
return f'{self.name} ({self.squad_id}) - {self.season}'
def __repr__(self):
"""
Return the string representation of the class.
"""
return self.__str__()
def _parse_name(self, doc):
"""
Parse the team's name and season.
The squad header includes both the season (in the format '2019-2020' or
'2020') as well as the official team name, such as 'Tottenham Hotspur'.
Parameters
----------
doc : PyQuery object
A PyQuery object of the squad's entire HTML page.
"""
name = doc('h1[itemprop="name"]')
name = name('span').text()
# Name is in format "YYYY-YYYY Team Name Stats"
# or "YYYY Team Name Stats"
# ie. "2019-2020 Tottenham Hotspur Stats"
# or "2020 Sporting KC Stats"
# The season will always be the first part of the string.
season = name.split(' ')[0]
# The team's name will always be between the season and the string
# "Stats", and therefore only those pieces should be pulled.
name = ' '.join(name.split(' ')[1:-1])
self._season = season
self._name = name
def _location_records(self, record_line):
"""
Parse the team's home and away record.
The squad's header contains information on the team's home and away
record in the league, including the points gained both at home and on
the road. Only the integer-based points as well as a string of the
record should be parsed and returned for later use.
Parameters
----------
record_line : string
A ``string`` representing the team's home and away record in as
displayed in the squad team's header.
Returns
-------
tuple
Returns a ``tuple`` of the location-based records in the following
format: (home record, away record, home points, away points).
"""
home_record, away_record = None, None
home_points, away_points = None, None
records = record_line.lower().replace('home record: ', '')
records = records.replace('away record: ', '')
match_records = re.findall(r'\(.*?\)', records)
p = re.compile(r'[\(\)]')
if len(match_records) == 2:
home_record, away_record = [p.sub(' ', x).strip()
for x in match_records]
points = re.sub(r'\(.*?\)', '', records)
points = re.findall(r'\d+', points)
if len(points) == 2:
home_points, away_points = [int(p) for p in points]
return home_record, away_record, home_points, away_points
def _records(self, record_line):
"""
Parse the team's record in their primary competition.
The team's record line found on the header of their squad page includes
the team's record, position in the league, points, and league name for
their primary competition during the season.
Parameters
----------
record_line : string
A ``string`` representing the first line of the team's header which
contains information on the squad's record, position, and league.
Returns
-------
tuple
A ``tuple`` of the parsed results in the following format: (record,
points, position, league name).
"""
records = record_line.lower().replace('record: ', '')
records_split = records.split(',')
if len(records_split) != 3:
return None, None, None, None
record, points, position = records_split
points = re.sub(' point.*', '', points).strip()
position = re.sub(r'\(.*\)', '', position).strip()
league = re.sub('.* in ', '', position).title()
try:
position = re.findall(r'\d+', position)[0]
except IndexError:
position = None
return record, points, position, league
def _goals(self, goals_line):
"""
Parse the number of goals the team scored and conceded.
The number of goals the team scored and conceded, along with the
difference, can be found in the header. Only the integer point values
should be parsed and returned.
Parameters
----------
goals_line : string
A ``string`` representation of the HTML line for the goals scored
and conceded by the team.
Returns
-------
tuple
Returns a ``tuple`` of the teams goals in the following format:
(goals scored, goals against, goal difference).
"""
goals = re.sub(r'\(.*?\)', '', goals_line.lower())
goals = re.findall(r'\d+', goals)
if len(goals) != 3:
return None, None, None
return goals
def _parse_expected_goals(self, goals_line):
"""
Parse the expected goals for the team.
The expected goal values can be found in the header with the xG, xGA
prefixes. This is the number of goals the team was expected to score
and concede during the season, as opposed to the actual numbers they
tallied. The line also includes the difference between the expected
goals scored and conceded.
Parameters
----------
goals_line : string
A ``string`` representation of the HTML line for the expected goals
found in the squad's header.
Returns
-------
tuple
Returns a ``tuple`` of the team's expected goals in the following
format: (expected goals scored, expected goals against, expected
goal difference).
"""
goals = goals_line.replace('xG: ', '')
goals = goals.replace(', xGA: ', ' ')
goals = goals.replace(', Diff: ', ' ')
goals = goals.split(' ')
if len(goals) != 3:
return None, None, None
return goals
def _parse_header(self, doc):
"""
Parse the various components on the squad's header.
Each squad page contains information relevant to the team's selected
year, including the season, record, goals, position in their league
competition, manager, and more. Much of this information should be used
to augment the squad class to provide further details and context on
the team.
The function pulls the header for the squad's page and parses
information line-by-line to retrieve relevant values. Since various
pages may have less information than others, a looping structure with
an if-elif block to check the contents of the line is the best way to
ensure only the expected information is collected in each part.
Parameters
----------
doc : PyQuery object
A PyQuery object containing the entire HTML contents of the squad's
home page.
"""
header = doc('div[data-template="Partials/Teams/Summary"]')
for header_line in header('p'):
line = pq(header_line).text()
if 'home record' in line.lower():
# Returns in the format (home_record, away_record, home_points,
# away_points).
records = self._location_records(line)
self._home_record = records[0]
self._away_record = records[1]
self._home_points = records[2]
self._away_points = records[3]
elif 'record' in line.lower():
# Returns in format (record, points, position, league).
records = self._records(line)
self._record = records[0]
self._points = records[1]
self._position = records[2]
self._league = records[3]
elif 'goals' in line.lower():
# Returns in format (scored, against, difference).
goals = self._goals(line)
self._goals_scored = goals[0]
self._goals_against = goals[1]
self._goal_difference = goals[2]
elif 'xGA' in line and 'Diff' in line:
# Returns in format (expected, against, difference).
goals = self._parse_expected_goals(line)
self._expected_goals = goals[0]
self._expected_goals_against = goals[1]
self._expected_goal_difference = goals[2]
elif 'manager' in line.lower():
self._manager = line.replace('Manager: ', '')
elif 'governing country' in line.lower():
self._country = pq(header_line)('a').text()
elif 'gender' in line.lower():
self._gender = line.replace('Gender: ', '')
def _pull_team_page(self, squad_page=None):
"""
Pull the team page and parse results.
Using the requested squad ID, first pull the team page, then parse
the header for relevant information on the team including records,
goals, manager, league results, and more.
Parameters
----------
squad_page : string (optional)
Optionally specify the filename of a local file to use to pull data
instead of downloading from sports-reference.com. This file should
be of the Squad page for the designated year.
"""
try:
doc = utils._pull_page(SQUAD_URL % self.squad_id, squad_page)
except HTTPError:
return
self._doc = doc
self._parse_name(doc)
self._parse_header(doc)
@property
def squad_id(self):
"""
Returns a ``string`` of the team's squad ID according to
sports-reference.com, such as '361ca564' for Tottenham Hotspur.
"""
return self._squad_id
@property
def name(self):
"""
Returns a ``string`` of the team's full name, such as 'Tottenham
Hotspur'.
"""
return self._name
@property
def schedule(self):
"""
Returns an instance of the Schedule class containing the team's
complete schedule for the season.
"""
if not hasattr(self, '_doc'):
self._doc = None
return Schedule(self.squad_id, self._doc)
@property
def roster(self):
"""
Returns an instance of the Roster class containing instances of every
player on the team.
"""
if not hasattr(self, '_doc'):
self._doc = None
return Roster(self._squad_id, self._doc)
@property
def season(self):
"""
Returns a ``string`` of the season's year(s) in the format YYYY or
YYYY-YYYY. For example, '2020' or '2019-2020'.
"""
return self._season
@property
def record(self):
"""
Returns a ``string`` of the team's record during their primary
competition (ie. Premier League) for the current season in the format
'Wins-Draws-Losses'.
"""
return self._record
@int_property_decorator
def position(self):
"""
Returns an ``int`` of the team's place in the table (ie. 1 for first)
for the current season in their primary competition (ie. Premier
League).
"""
return self._position
@int_property_decorator
def points(self):
"""
Returns an ``int`` of the number of points the team has gained in their
primary competition (ie. Premier League).
"""
return self._points
@property
def league(self):
"""
Returns a ``string`` of the team's primary competition, such as
'Premier League'.
"""
return self._league
@property
def manager(self):
"""
Returns a ``string`` of the full name of the team's manager, such as
'José Mourinho'.
"""
return self._manager
@property
def country(self):
"""
Returns a ``string`` of the team's governing country, such as
'England'.
"""
return self._country
@property
def gender(self):
"""
Returns a ``string`` denoting which gender the team competes in (ie.
'Female').
"""
return self._gender
@int_property_decorator
def goals_scored(self):
"""
Returns an ``int`` of the number of goals the team has scored during
their primary competition (ie. Premier League).
"""
return self._goals_scored
@int_property_decorator
def goals_against(self):
"""
Returns an ``int`` of the number of goals the team has allowed during
their primary competition (ie. Premier League).
"""
return self._goals_against
@int_property_decorator
def goal_difference(self):
"""
Returns an ``int`` of the team's goal difference during their primary
competition (ie. Premier League).
"""
return self._goal_difference
@float_property_decorator
def expected_goals(self):
"""
Returns a ``float`` of the number of goals the team was expected to
score during their primary competition (ie. Premier League).
"""
return self._expected_goals
@float_property_decorator
def expected_goals_against(self):
"""
Returns a ``float`` of the number of goals the team was expected to
concede during their primary competition (ie. Premier League).
"""
return self._expected_goals_against
@float_property_decorator
def expected_goal_difference(self):
"""
Returns a ``float`` of the difference between the team's expected
goals scored and conceded during their primary competition (ie. Premier
League).
"""
return self._expected_goal_difference
@property
def home_record(self):
"""
Returns a ``string`` of the team's home record during their primary
competition (ie. Premier League) for the current season in the format
'Wins-Draws-Losses'.
"""
return self._home_record
@int_property_decorator
def home_games(self):
"""
Returns an ``int`` of the number of games the team has played at home
during their primary competition (ie. Premier League).
"""
try:
return self.home_wins + self.home_draws + self.home_losses
except TypeError:
return None
@property
def away_record(self):
"""
Returns a ``string`` of the team's away record during their primary
competition (ie. Premier League) for the current season in the format
'Wins-Draws-Losses'.
"""
return self._away_record
@int_property_decorator
def away_games(self):
"""
Returns an ``int`` of the number of games the team has played away
during their primary competition (ie. Premier League).
"""
try:
return self.away_wins + self.away_draws + self.away_losses
except TypeError:
return None
@int_property_decorator
def home_wins(self):
"""
Returns an ``int`` of the number of games the team has won at home
during their primary competition (ie. Premier League) for the current
season.
"""
try:
record = self._home_record.split('-')
wins = record[0]
wins = int(wins)
except ValueError:
return None
except AttributeError:
return None
return wins
@int_property_decorator
def home_draws(self):
"""
Returns an ``int`` of the number of games the team has drawn at home
during their primary competition (ie. Premier League) for the current
season.
"""
try:
record = self._home_record.split('-')
draws = record[1]
draws = int(draws)
except IndexError:
return None
except ValueError:
return None
except AttributeError:
return None
return draws
@int_property_decorator
def home_losses(self):
"""
Returns an ``int`` of the number of games the team has lost at home
during their primary competition (ie. Premier League) for the current
season.
"""
try:
record = self._home_record.split('-')
losses = record[2]
losses = int(losses)
except IndexError:
return None
except ValueError:
return None
except AttributeError:
return None
return losses
@int_property_decorator
def away_wins(self):
"""
Returns an ``int`` of the number of games the team has won while away
during their primary competition (ie. Premier League) for the current
season.
"""
try:
record = self._away_record.split('-')
wins = record[0]
wins = int(wins)
except ValueError:
return None
except AttributeError:
return None
return wins
@int_property_decorator
def away_draws(self):
"""
Returns an ``int`` of the number of games the team has drawn while away
during their primary competition (ie. Premier League) for the current
season.
"""
try:
record = self._away_record.split('-')
draws = record[1]
draws = int(draws)
except IndexError:
return None
except ValueError:
return None
except AttributeError:
return None
return draws
@int_property_decorator
def away_losses(self):
"""
Returns an ``int`` of the number of games the team has lost while away
during their primary competition (ie. Premier League) for the current
season.
"""
try:
record = self._away_record.split('-')
losses = record[2]
losses = int(losses)
except IndexError:
return None
except ValueError:
return None
except AttributeError:
return None
return losses
@int_property_decorator
def home_points(self):
"""
Returns an ``int`` of the number of points the team has gained while at
home during their primary competition (ie. Premier League) for the
current season.
"""
return self._home_points
@int_property_decorator
def away_points(self):
"""
Returns an ``int`` of the number of points the team has gained while
away during their primary competition (ie. Premier League) for the
current season.
"""
return self._away_points