Project: Soccer#
import matplotlib.pyplot as plt
import numpy as np
Import Data#
In this project, the soccer-data-api package is used to import soccer data from https://www.sports-reference.com/
Available leagues:
English Premier League
Spanish La Liga
French Ligue 1
German Bundesliga
Italian Serie A
Dutch Eredivisie
Russian Premier League
English Championship.
To install the package, use !pip install soccer-data-api
.
# !pip install soccer-data-api
from soccer_data_api import SoccerDataAPI
soccer_data = SoccerDataAPI()
The soccer_data object contains methods specific to each league.
type(soccer_data)
soccer_data_api.soccer_api.SoccerDataAPI
# methods
dir(soccer_data)[-9:]
['bundesliga',
'english_championship',
'english_premier',
'eredivisie',
'get_data',
'la_liga',
'ligue_1',
'russian_premier',
'serie_a']
soccer_data.english_premier()
[]
English Premier League#
Each league is represented as a list.
ep = soccer_data.english_premier()
type(ep)
list
len(ep)
0
The list elements consist of dictionaries for each team, ordered by their ranking.
ep
[]
Each team is represented as a dictionary.
ep[0]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[10], line 1
----> 1 ep[0]
IndexError: list index out of range
For each team, the following information is provided.
ep[0].keys()
Most Scorer Team#
max_goals_for = 0
max_goals_for_team = ''
for t in ep:
if int(t['goals_for']) > max_goals_for:
max_goals_for = int(t['goals_for'])
max_goals_for_team = t['team']
print(f'Team : {max_goals_for_team}')
print(f'Max Goals For: {max_goals_for}')
Least Scorer Team#
import numpy as np
min_goals_for = np.inf
min_goals_for_team = ''
for t in ep:
if int(t['goals_for']) < min_goals_for:
min_goals_for = int(t['goals_for'])
min_goals_for_team = t['team']
print(f'Team : {min_goals_for_team}')
print(f'Min Goals For: {min_goals_for}')
The most goals scored against#
max_goals_against = 0
max_goals_against_team = ''
for t in ep:
if int(t['goals_against']) > max_goals_against:
max_goals_against = int(t['goals_against'])
max_goals_against_team = t['team']
print(f'Team : {max_goals_against_team}')
print(f'Max Goals Against: {max_goals_against}')
Top Scorer Player#
top_score = 0
top_score_player = ''
for t in ep:
ts = t['top_scorer']
if int(ts[ts.rfind('-')+1:]) > top_score:
top_score = int(ts[ts.find('-')+1:])
top_score_player = ts[:ts.find('-')]
print(f'Top Scorer : {top_score_player}')
print(f'Top Score : {top_score}')
Equal Scorer Players in a Team#
top_score = 0
top_score_player = ''
for t in ep:
ts = t['top_scorer']
if ts.find(',') != -1:
print(ts)
List of Teams and Points#
teams, points = [], []
for t in ep:
teams.append(t['team'])
points.append(int(t['points']))
Bar graph of points#
plt.figure(figsize=(20,5))
plt.bar(teams, points)
plt.xticks(rotation=90);
plt.figure(figsize=(5,10))
plt.barh( teams, points);
Check Matches Played#
There is an issue with the data.
The number of draws is incorrect.
for t in ep:
if int(t['matches_played']) != int(t['wins']) + int(t['draws']) +int(t['losses']):
print(t['team'], int(t['matches_played']), int(t['wins']), int(t['draws']), int(t['losses']) )
All Leagues#
The following is the list of all the leagues available in the package.
leagues = [soccer_data.english_premier(),
soccer_data.la_liga(),
soccer_data.ligue_1(),
soccer_data.bundesliga(),
soccer_data.serie_a(),
soccer_data.eredivisie(),
soccer_data.russian_premier(),
soccer_data.english_championship()]
leagues[1]
The following is the dictionary of all the leagues available in the package.
leagues_dict = {'english_premier': soccer_data.english_premier(),
'la_liga' : soccer_data.la_liga(),
'ligue_1' : soccer_data.ligue_1(),
'bundesliga' : soccer_data.bundesliga(),
'serie_a' : soccer_data.serie_a(),
'eredivisie' : soccer_data.eredivisie(),
'russian_premier' : soccer_data.russian_premier(),
'english_championship' : soccer_data.english_championship()}
Top Scorer Player#
top_score = 0
top_score_player = ''
for league in leagues:
for t in league:
ts = t['top_scorer']
name = ts[:ts.find('-')]
score = int(ts[ts.rfind('-')+1:])
if (score > top_score) & (len(name)>0):
top_score = score
top_score_player = name
print(f'Top Scorer : {top_score_player}')
print(f'Top Score : {top_score}')
The Team with Highest and Lowest Goal Differences#
highest_goal_diff = 0
highest_goal_diff_team = ''
lowest_goal_diff = np.inf
lowest_goal_diff_team = ''
for league in leagues:
for t in league:
if int(t['goal_diff']) > highest_goal_diff:
highest_goal_diff = int(t['goal_diff'])
highest_goal_diff_team = t['team']
if int(t['goal_diff']) < lowest_goal_diff:
lowest_goal_diff = int(t['goal_diff'])
lowest_goal_diff_team = t['team']
print(f'Highest Goal Diff : {highest_goal_diff} --- Team: {highest_goal_diff_team }')
print(f'Lowest Goal Diff : {lowest_goal_diff} --- Team: {lowest_goal_diff_team }')
The League with Most Goal Scored#
max_total_goal = 0
max_total_goal_league = ''
for name, league in leagues_dict.items():
total = 0
for t in league:
total += int(t['goals_for'])
if total > max_total_goal:
max_total_goal = total
max_total_goal_league = name
print(f'Most Goal Scored League : {max_total_goal_league}')
print(f'Maximum Total Goal : {max_total_goal}')
The Team with Highest Points Per Game#
max_point_pg = 0
max_point_pg_team = ''
for league in leagues:
for t in league:
ppg = int(t['points'])/int(t['matches_played'])
if ppg > max_point_pg:
max_point_pg = ppg
max_point_pg_team = t['team']
print(f'Max Point Per Game : {max_point_pg}')
print(f'Team : {max_point_pg_team}')
The Teams with Highest Points Per Game for Each League#
max_point_pg_list = []
max_point_pg_team_list = []
for league in leagues:
max_point_pg = 0
max_point_pg_team = ''
for t in league:
ppg = int(t['points'])/int(t['matches_played'])
if ppg > max_point_pg:
max_point_pg = ppg
max_point_pg_team = t['team']
max_point_pg_list.append(max_point_pg)
max_point_pg_team_list.append(max_point_pg_team)
plt.figure(figsize=(10,5))
plt.bar(max_point_pg_team_list, max_point_pg_list)
plt.xticks(rotation=90);
Future work#
Store all goal differences as integers in a list (pay attention to the +/- signs).
Store the number of draws for each team in a list.
Store the names of all top scorers in a list.
Store the number of goals scored by each top scorer in a list.
Identify the team with the largest point difference from the second-highest team.