Project: Soccer#

Section Title: Soccer

import matplotlib.pyplot as plt
import numpy as np

Import Data#

In this project, the soccer-data-api package is used to import soccer data from https://www.sports-reference.com/

Available leagues:

  • English Premier League

  • Spanish La Liga

  • French Ligue 1

  • German Bundesliga

  • Italian Serie A

  • Dutch Eredivisie

  • Russian Premier League

  • English Championship.

To install the package, use !pip install soccer-data-api.

# !pip install soccer-data-api
from soccer_data_api import SoccerDataAPI
soccer_data = SoccerDataAPI()
  • The soccer_data object contains methods specific to each league.

type(soccer_data) 
soccer_data_api.soccer_api.SoccerDataAPI
# methods
dir(soccer_data)[-9:]
['bundesliga',
 'english_championship',
 'english_premier',
 'eredivisie',
 'get_data',
 'la_liga',
 'ligue_1',
 'russian_premier',
 'serie_a']
soccer_data.english_premier()
[]

English Premier League#

Each league is represented as a list.

ep = soccer_data.english_premier()
type(ep)
list
len(ep)
0
  • The list elements consist of dictionaries for each team, ordered by their ranking.

ep
[]
  • Each team is represented as a dictionary.

ep[0]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[10], line 1
----> 1 ep[0]

IndexError: list index out of range
  • For each team, the following information is provided.

ep[0].keys()

Most Scorer Team#

max_goals_for = 0
max_goals_for_team = ''

for t in ep:
    if int(t['goals_for']) > max_goals_for:
        max_goals_for = int(t['goals_for'])
        max_goals_for_team = t['team']

print(f'Team         : {max_goals_for_team}')
print(f'Max Goals For: {max_goals_for}')

Least Scorer Team#

import numpy as np

min_goals_for = np.inf
min_goals_for_team = ''

for t in ep:
    if int(t['goals_for']) < min_goals_for:
        min_goals_for = int(t['goals_for'])
        min_goals_for_team = t['team']

print(f'Team         : {min_goals_for_team}')
print(f'Min Goals For: {min_goals_for}')

The most goals scored against#

max_goals_against = 0
max_goals_against_team = ''

for t in ep:
    if int(t['goals_against']) > max_goals_against:
        max_goals_against = int(t['goals_against'])
        max_goals_against_team = t['team']

print(f'Team             : {max_goals_against_team}')
print(f'Max Goals Against: {max_goals_against}')

Top Scorer Player#

top_score = 0
top_score_player = ''

for t in ep:
    ts = t['top_scorer']
    if int(ts[ts.rfind('-')+1:]) > top_score:
        top_score = int(ts[ts.find('-')+1:])
        top_score_player = ts[:ts.find('-')]

print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')

Equal Scorer Players in a Team#

top_score = 0
top_score_player = ''

for t in ep:
    ts = t['top_scorer']
    if ts.find(',') != -1:
        print(ts)

List of Teams and Points#

teams, points = [], []

for t in ep:
    teams.append(t['team'])
    points.append(int(t['points']))

Bar graph of points#

plt.figure(figsize=(20,5))
plt.bar(teams, points)
plt.xticks(rotation=90);
plt.figure(figsize=(5,10))
plt.barh( teams, points);

Check Matches Played#

  • There is an issue with the data.

  • The number of draws is incorrect.

for t in ep:
    if int(t['matches_played']) != int(t['wins']) + int(t['draws']) +int(t['losses']):
        print(t['team'], int(t['matches_played']), int(t['wins']), int(t['draws']), int(t['losses']) )

All Leagues#

The following is the list of all the leagues available in the package.

leagues = [soccer_data.english_premier(),
soccer_data.la_liga(),
soccer_data.ligue_1(),
soccer_data.bundesliga(),
soccer_data.serie_a(),
soccer_data.eredivisie(),
soccer_data.russian_premier(),
soccer_data.english_championship()]
leagues[1]

The following is the dictionary of all the leagues available in the package.

leagues_dict = {'english_premier': soccer_data.english_premier(),
'la_liga' : soccer_data.la_liga(),
'ligue_1' : soccer_data.ligue_1(),
'bundesliga' : soccer_data.bundesliga(),
'serie_a' : soccer_data.serie_a(),
'eredivisie' : soccer_data.eredivisie(),
'russian_premier' : soccer_data.russian_premier(),
'english_championship' : soccer_data.english_championship()}

Top Scorer Player#

top_score = 0
top_score_player = ''

for league in leagues:
    for t in league:
        ts = t['top_scorer']
        name = ts[:ts.find('-')]
        score = int(ts[ts.rfind('-')+1:])
        if (score > top_score) & (len(name)>0):
            top_score = score
            top_score_player = name

print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')

The Team with Highest and Lowest Goal Differences#

highest_goal_diff = 0
highest_goal_diff_team = ''
lowest_goal_diff = np.inf
lowest_goal_diff_team = ''


for league in leagues:
    for t in league:
        if int(t['goal_diff']) > highest_goal_diff:
            highest_goal_diff = int(t['goal_diff'])
            highest_goal_diff_team = t['team']
        if int(t['goal_diff']) < lowest_goal_diff:
            lowest_goal_diff = int(t['goal_diff'])
            lowest_goal_diff_team = t['team']

print(f'Highest Goal Diff : {highest_goal_diff}  ---  Team: {highest_goal_diff_team }')
print(f'Lowest  Goal Diff : {lowest_goal_diff}   ---  Team: {lowest_goal_diff_team }')

The League with Most Goal Scored#

max_total_goal = 0
max_total_goal_league = ''

for name, league in leagues_dict.items():
    total = 0
    for t in league:
        total += int(t['goals_for'])
    if total > max_total_goal:
        
        max_total_goal = total       
        max_total_goal_league = name

print(f'Most Goal Scored League : {max_total_goal_league}')
print(f'Maximum Total Goal      : {max_total_goal}')

The Team with Highest Points Per Game#

max_point_pg = 0
max_point_pg_team = ''

for league in leagues:
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']

print(f'Max Point Per Game : {max_point_pg}')
print(f'Team               : {max_point_pg_team}')

The Teams with Highest Points Per Game for Each League#

max_point_pg_list = []
max_point_pg_team_list = []


for league in leagues:
    max_point_pg = 0
    max_point_pg_team = ''
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']
    
    max_point_pg_list.append(max_point_pg)
    max_point_pg_team_list.append(max_point_pg_team)
    
plt.figure(figsize=(10,5))
plt.bar(max_point_pg_team_list, max_point_pg_list)
plt.xticks(rotation=90);

Future work#

  • Store all goal differences as integers in a list (pay attention to the +/- signs).

  • Store the number of draws for each team in a list.

  • Store the names of all top scorers in a list.

  • Store the number of goals scored by each top scorer in a list.

  • Identify the team with the largest point difference from the second-highest team.