Project: Soccer#

import matplotlib.pyplot as plt
import numpy as np
Import Data#
In this project, the soccer-data-api package is used to import soccer data from https://www.sports-reference.com/
Available leagues:
- English Premier League 
- Spanish La Liga 
- French Ligue 1 
- German Bundesliga 
- Italian Serie A 
- Dutch Eredivisie 
- Russian Premier League 
- English Championship. 
To install the package, use !pip install soccer-data-api.
# !pip install soccer-data-api
from soccer_data_api import SoccerDataAPI
soccer_data = SoccerDataAPI()
- The soccer_data object contains methods specific to each league. 
type(soccer_data) 
soccer_data_api.soccer_api.SoccerDataAPI
# methods
dir(soccer_data)[-9:]
['bundesliga',
 'english_championship',
 'english_premier',
 'eredivisie',
 'get_data',
 'la_liga',
 'ligue_1',
 'russian_premier',
 'serie_a']
soccer_data.english_premier()
[]
English Premier League#
Each league is represented as a list.
ep = soccer_data.english_premier()
type(ep)
list
len(ep)
0
- The list elements consist of dictionaries for each team, ordered by their ranking. 
ep
[]
- Each team is represented as a dictionary. 
ep[0]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[10], line 1
----> 1 ep[0]
IndexError: list index out of range
- For each team, the following information is provided. 
ep[0].keys()
Most Scorer Team#
max_goals_for = 0
max_goals_for_team = ''
for t in ep:
    if int(t['goals_for']) > max_goals_for:
        max_goals_for = int(t['goals_for'])
        max_goals_for_team = t['team']
print(f'Team         : {max_goals_for_team}')
print(f'Max Goals For: {max_goals_for}')
Least Scorer Team#
import numpy as np
min_goals_for = np.inf
min_goals_for_team = ''
for t in ep:
    if int(t['goals_for']) < min_goals_for:
        min_goals_for = int(t['goals_for'])
        min_goals_for_team = t['team']
print(f'Team         : {min_goals_for_team}')
print(f'Min Goals For: {min_goals_for}')
The most goals scored against#
max_goals_against = 0
max_goals_against_team = ''
for t in ep:
    if int(t['goals_against']) > max_goals_against:
        max_goals_against = int(t['goals_against'])
        max_goals_against_team = t['team']
print(f'Team             : {max_goals_against_team}')
print(f'Max Goals Against: {max_goals_against}')
Top Scorer Player#
top_score = 0
top_score_player = ''
for t in ep:
    ts = t['top_scorer']
    if int(ts[ts.rfind('-')+1:]) > top_score:
        top_score = int(ts[ts.find('-')+1:])
        top_score_player = ts[:ts.find('-')]
print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')
Equal Scorer Players in a Team#
top_score = 0
top_score_player = ''
for t in ep:
    ts = t['top_scorer']
    if ts.find(',') != -1:
        print(ts)
List of Teams and Points#
teams, points = [], []
for t in ep:
    teams.append(t['team'])
    points.append(int(t['points']))
Bar graph of points#
plt.figure(figsize=(20,5))
plt.bar(teams, points)
plt.xticks(rotation=90);
plt.figure(figsize=(5,10))
plt.barh( teams, points);
Check Matches Played#
- There is an issue with the data. 
- The number of draws is incorrect. 
for t in ep:
    if int(t['matches_played']) != int(t['wins']) + int(t['draws']) +int(t['losses']):
        print(t['team'], int(t['matches_played']), int(t['wins']), int(t['draws']), int(t['losses']) )
All Leagues#
The following is the list of all the leagues available in the package.
leagues = [soccer_data.english_premier(),
soccer_data.la_liga(),
soccer_data.ligue_1(),
soccer_data.bundesliga(),
soccer_data.serie_a(),
soccer_data.eredivisie(),
soccer_data.russian_premier(),
soccer_data.english_championship()]
leagues[1]
The following is the dictionary of all the leagues available in the package.
leagues_dict = {'english_premier': soccer_data.english_premier(),
'la_liga' : soccer_data.la_liga(),
'ligue_1' : soccer_data.ligue_1(),
'bundesliga' : soccer_data.bundesliga(),
'serie_a' : soccer_data.serie_a(),
'eredivisie' : soccer_data.eredivisie(),
'russian_premier' : soccer_data.russian_premier(),
'english_championship' : soccer_data.english_championship()}
Top Scorer Player#
top_score = 0
top_score_player = ''
for league in leagues:
    for t in league:
        ts = t['top_scorer']
        name = ts[:ts.find('-')]
        score = int(ts[ts.rfind('-')+1:])
        if (score > top_score) & (len(name)>0):
            top_score = score
            top_score_player = name
print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')
The Team with Highest and Lowest Goal Differences#
highest_goal_diff = 0
highest_goal_diff_team = ''
lowest_goal_diff = np.inf
lowest_goal_diff_team = ''
for league in leagues:
    for t in league:
        if int(t['goal_diff']) > highest_goal_diff:
            highest_goal_diff = int(t['goal_diff'])
            highest_goal_diff_team = t['team']
        if int(t['goal_diff']) < lowest_goal_diff:
            lowest_goal_diff = int(t['goal_diff'])
            lowest_goal_diff_team = t['team']
print(f'Highest Goal Diff : {highest_goal_diff}  ---  Team: {highest_goal_diff_team }')
print(f'Lowest  Goal Diff : {lowest_goal_diff}   ---  Team: {lowest_goal_diff_team }')
The League with Most Goal Scored#
max_total_goal = 0
max_total_goal_league = ''
for name, league in leagues_dict.items():
    total = 0
    for t in league:
        total += int(t['goals_for'])
    if total > max_total_goal:
        
        max_total_goal = total       
        max_total_goal_league = name
print(f'Most Goal Scored League : {max_total_goal_league}')
print(f'Maximum Total Goal      : {max_total_goal}')
The Team with Highest Points Per Game#
max_point_pg = 0
max_point_pg_team = ''
for league in leagues:
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']
print(f'Max Point Per Game : {max_point_pg}')
print(f'Team               : {max_point_pg_team}')
The Teams with Highest Points Per Game for Each League#
max_point_pg_list = []
max_point_pg_team_list = []
for league in leagues:
    max_point_pg = 0
    max_point_pg_team = ''
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']
    
    max_point_pg_list.append(max_point_pg)
    max_point_pg_team_list.append(max_point_pg_team)
    
plt.figure(figsize=(10,5))
plt.bar(max_point_pg_team_list, max_point_pg_list)
plt.xticks(rotation=90);
Future work#
- Store all goal differences as integers in a list (pay attention to the +/- signs). 
- Store the number of draws for each team in a list. 
- Store the names of all top scorers in a list. 
- Store the number of goals scored by each top scorer in a list. 
- Identify the team with the largest point difference from the second-highest team. 
