Project: Soccer#

Section Title: Soccer

import matplotlib.pyplot as plt
import numpy as np

Import Data#

In this project, the soccer-data-api package is used to import soccer data from https://www.sports-reference.com/

Available leagues:

  • English Premier League

  • Spanish La Liga

  • French Ligue 1

  • German Bundesliga

  • Italian Serie A

  • Dutch Eredivisie

  • Russian Premier League

  • English Championship.

To install the package, use !pip install soccer-data-api.

# !pip install soccer-data-api
from soccer_data_api import SoccerDataAPI
soccer_data = SoccerDataAPI()
  • The soccer_data object contains methods specific to each league.

type(soccer_data) 
soccer_data_api.soccer_api.SoccerDataAPI
# methods
dir(soccer_data)[-9:]
['bundesliga',
 'english_championship',
 'english_premier',
 'eredivisie',
 'get_data',
 'la_liga',
 'ligue_1',
 'russian_premier',
 'serie_a']
soccer_data.bundesliga()
[]

English Premier League#

Each league is represented as a list.

ep = soccer_data.english_premier()
type(ep)
list
len(ep)
0
  • The list elements consist of dictionaries for each team, ordered by their ranking.

ep
[]
  • Each team is represented as a dictionary.

ep[0]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[10], line 1
----> 1 ep[0]

IndexError: list index out of range
  • For each team, the following information is provided.

ep[0].keys()
dict_keys(['team', 'pos', 'points', 'matches_played', 'wins', 'draws', 'losses', 'goals_for', 'goals_against', 'goal_diff', 'top_scorer'])

Most Scorer Team#

max_goals_for = 0
max_goals_for_team = ''

for t in ep:
    if int(t['goals_for']) > max_goals_for:
        max_goals_for = int(t['goals_for'])
        max_goals_for_team = t['team']

print(f'Team         : {max_goals_for_team}')
print(f'Max Goals For: {max_goals_for}')
Team         : Liverpool
Max Goals For: 54

Least Scorer Team#

import numpy as np

min_goals_for = np.inf
min_goals_for_team = ''

for t in ep:
    if int(t['goals_for']) < min_goals_for:
        min_goals_for = int(t['goals_for'])
        min_goals_for_team = t['team']

print(f'Team         : {min_goals_for_team}')
print(f'Min Goals For: {min_goals_for}')
Team         : Southampton
Min Goals For: 16

The most goals scored against#

max_goals_against = 0
max_goals_against_team = ''

for t in ep:
    if int(t['goals_against']) > max_goals_against:
        max_goals_against = int(t['goals_against'])
        max_goals_against_team = t['team']

print(f'Team             : {max_goals_against_team}')
print(f'Max Goals Against: {max_goals_against}')
Team             : Southampton
Max Goals Against: 53

Top Scorer Player#

top_score = 0
top_score_player = ''

for t in ep:
    ts = t['top_scorer']
    if int(ts[ts.rfind('-')+1:]) > top_score:
        top_score = int(ts[ts.find('-')+1:])
        top_score_player = ts[:ts.find('-')]

print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')
Top Scorer : Mohamed Salah 
Top Score  : 19

Equal Scorer Players in a Team#

top_score = 0
top_score_player = ''

for t in ep:
    ts = t['top_scorer']
    if ts.find(',') != -1:
        print(ts)
Tomáš Souček, Jarrod Bowen - 5
Adam Armstrong, Jan Bednarek... - 2

List of Teams and Points#

teams, points = [], []

for t in ep:
    teams.append(t['team'])
    points.append(int(t['points']))

Bar graph of points#

plt.figure(figsize=(20,5))
plt.bar(teams, points)
plt.xticks(rotation=90);
_images/0b48e26e3113c1c47750bbd5bbeac6815fafc4dd1592f35589ff7bce94508ea7.png
plt.figure(figsize=(5,10))
plt.barh( teams, points);
_images/711b067b18db52c43ea4cf62aa24c7f826502f43391ce1ad23e532316436471c.png

Check Matches Played#

  • There is an issue with the data.

  • The number of draws is incorrect.

for t in ep:
    if int(t['matches_played']) != int(t['wins']) + int(t['draws']) +int(t['losses']):
        print(t['team'], int(t['matches_played']), int(t['wins']), int(t['draws']), int(t['losses']) )
Liverpool 23 16 16 1
Arsenal 23 13 13 2
Nott'ham Forest 23 13 13 5
Manchester City 23 12 12 6
Newcastle Utd 23 12 12 6
Chelsea 23 11 11 5
Bournemouth 23 11 11 5
Aston Villa 22 10 10 6
Brighton 23 8 8 5
Fulham 23 8 8 6
Brentford 23 9 9 10
Manchester Utd 22 8 8 10
Crystal Palace 23 6 6 8
West Ham 23 7 7 10
Tottenham 23 7 7 13
Everton 23 5 5 9
Leicester City 23 4 4 14
Ipswich Town 23 3 3 13
Southampton 23 1 1 19

All Leagues#

The following is the list of all the leagues available in the package.

leagues = [soccer_data.english_premier(),
soccer_data.la_liga(),
soccer_data.ligue_1(),
soccer_data.bundesliga(),
soccer_data.serie_a(),
soccer_data.eredivisie(),
soccer_data.russian_premier(),
soccer_data.english_championship()]

The following is the dictionary of all the leagues available in the package.

leagues[1]
[{'team': 'Real Madrid',
  'pos': '1',
  'points': '49',
  'matches_played': '21',
  'wins': '15',
  'draws': '15',
  'losses': '2',
  'goals_for': '50',
  'goals_against': '20',
  'goal_diff': '+30',
  'top_scorer': 'Kylian Mbappé - 15'},
 {'team': 'Atlético Madrid',
  'pos': '2',
  'points': '45',
  'matches_played': '21',
  'wins': '13',
  'draws': '13',
  'losses': '2',
  'goals_for': '35',
  'goals_against': '14',
  'goal_diff': '+21',
  'top_scorer': 'Alexander Sørloth - 8'},
 {'team': 'Barcelona',
  'pos': '3',
  'points': '42',
  'matches_played': '21',
  'wins': '13',
  'draws': '13',
  'losses': '5',
  'goals_for': '59',
  'goals_against': '24',
  'goal_diff': '+35',
  'top_scorer': 'Robert Lewandowski - 17'},
 {'team': 'Athletic Club',
  'pos': '4',
  'points': '40',
  'matches_played': '21',
  'wins': '11',
  'draws': '11',
  'losses': '3',
  'goals_for': '31',
  'goals_against': '18',
  'goal_diff': '+13',
  'top_scorer': 'Oihan Sancet - 7'},
 {'team': 'Villarreal',
  'pos': '5',
  'points': '34',
  'matches_played': '21',
  'wins': '9',
  'draws': '9',
  'losses': '5',
  'goals_for': '39',
  'goals_against': '32',
  'goal_diff': '+7',
  'top_scorer': 'Ayoze Pérez - 8'},
 {'team': 'Mallorca',
  'pos': '6',
  'points': '30',
  'matches_played': '21',
  'wins': '9',
  'draws': '9',
  'losses': '9',
  'goals_for': '19',
  'goals_against': '26',
  'goal_diff': '-7',
  'top_scorer': 'Cyle Larin - 5'},
 {'team': 'Rayo Vallecano',
  'pos': '7',
  'points': '29',
  'matches_played': '21',
  'wins': '7',
  'draws': '7',
  'losses': '6',
  'goals_for': '25',
  'goals_against': '24',
  'goal_diff': '+1',
  'top_scorer': 'Jorge de Frutos - 4'},
 {'team': 'Girona',
  'pos': '8',
  'points': '28',
  'matches_played': '21',
  'wins': '8',
  'draws': '8',
  'losses': '9',
  'goals_for': '29',
  'goals_against': '29',
  'goal_diff': '0',
  'top_scorer': 'Cristhian Stuani - 4'},
 {'team': 'Real Sociedad',
  'pos': '9',
  'points': '28',
  'matches_played': '21',
  'wins': '8',
  'draws': '8',
  'losses': '9',
  'goals_for': '17',
  'goals_against': '17',
  'goal_diff': '0',
  'top_scorer': 'Mikel Oyarzabal, Takefusa Kubo - 4'},
 {'team': 'Betis',
  'pos': '10',
  'points': '28',
  'matches_played': '21',
  'wins': '7',
  'draws': '7',
  'losses': '7',
  'goals_for': '23',
  'goals_against': '26',
  'goal_diff': '-3',
  'top_scorer': 'Giovani Lo Celso - 7'},
 {'team': 'Osasuna',
  'pos': '11',
  'points': '27',
  'matches_played': '21',
  'wins': '6',
  'draws': '6',
  'losses': '6',
  'goals_for': '25',
  'goals_against': '30',
  'goal_diff': '-5',
  'top_scorer': 'Ante Budimir - 10'},
 {'team': 'Sevilla',
  'pos': '12',
  'points': '27',
  'matches_played': '21',
  'wins': '7',
  'draws': '7',
  'losses': '8',
  'goals_for': '24',
  'goals_against': '30',
  'goal_diff': '-6',
  'top_scorer': 'Dodi Lukebakio - 9'},
 {'team': 'Celta Vigo',
  'pos': '13',
  'points': '25',
  'matches_played': '21',
  'wins': '7',
  'draws': '7',
  'losses': '10',
  'goals_for': '30',
  'goals_against': '33',
  'goal_diff': '-3',
  'top_scorer': 'Iago Aspas - 6'},
 {'team': 'Getafe',
  'pos': '14',
  'points': '23',
  'matches_played': '21',
  'wins': '5',
  'draws': '5',
  'losses': '8',
  'goals_for': '17',
  'goals_against': '17',
  'goal_diff': '0',
  'top_scorer': 'Mauro Arambarri - 5'},
 {'team': 'Las Palmas',
  'pos': '15',
  'points': '23',
  'matches_played': '21',
  'wins': '6',
  'draws': '6',
  'losses': '10',
  'goals_for': '26',
  'goals_against': '34',
  'goal_diff': '-8',
  'top_scorer': 'Sandro Ramírez - 7'},
 {'team': 'Leganés',
  'pos': '16',
  'points': '23',
  'matches_played': '21',
  'wins': '5',
  'draws': '5',
  'losses': '8',
  'goals_for': '19',
  'goals_against': '29',
  'goal_diff': '-10',
  'top_scorer': 'Juan Cruz - 4'},
 {'team': 'Alavés',
  'pos': '17',
  'points': '21',
  'matches_played': '21',
  'wins': '5',
  'draws': '5',
  'losses': '10',
  'goals_for': '25',
  'goals_against': '33',
  'goal_diff': '-8',
  'top_scorer': 'Kiké - 9'},
 {'team': 'Espanyol',
  'pos': '18',
  'points': '20',
  'matches_played': '21',
  'wins': '5',
  'draws': '5',
  'losses': '11',
  'goals_for': '20',
  'goals_against': '33',
  'goal_diff': '-13',
  'top_scorer': 'Javi Puado - 7'},
 {'team': 'Valencia',
  'pos': '19',
  'points': '16',
  'matches_played': '21',
  'wins': '3',
  'draws': '3',
  'losses': '11',
  'goals_for': '20',
  'goals_against': '36',
  'goal_diff': '-16',
  'top_scorer': 'Hugo Duro - 7'},
 {'team': 'Valladolid',
  'pos': '20',
  'points': '15',
  'matches_played': '21',
  'wins': '4',
  'draws': '4',
  'losses': '14',
  'goals_for': '14',
  'goals_against': '42',
  'goal_diff': '-28',
  'top_scorer': 'Raúl Moro - 3'}]

Top Scorer Player#

top_score = 0
top_score_player = ''

for league in leagues:
    for t in league:
        ts = t['top_scorer']
        name = ts[:ts.find('-')]
        score = int(ts[ts.rfind('-')+1:])
        if (score > top_score) & (len(name)>0):
            top_score = score
            top_score_player = name

print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')
Top Scorer : Mohamed Salah 
Top Score  : 19

The Team with Highest and Lowest Goal Differences#

highest_goal_diff = 0
highest_goal_diff_team = ''
lowest_goal_diff = np.inf
lowest_goal_diff_team = ''


for league in leagues:
    for t in league:
        if int(t['goal_diff']) > highest_goal_diff:
            highest_goal_diff = int(t['goal_diff'])
            highest_goal_diff_team = t['team']
        if int(t['goal_diff']) < lowest_goal_diff:
            lowest_goal_diff = int(t['goal_diff'])
            lowest_goal_diff_team = t['team']

print(f'Highest Goal Diff : {highest_goal_diff}  ---  Team: {highest_goal_diff_team }')
print(f'Lowest  Goal Diff : {lowest_goal_diff}   ---  Team: {lowest_goal_diff_team }')
Highest Goal Diff : 45  ---  Team: PSV Eindhoven
Lowest  Goal Diff : -37   ---  Team: Southampton

The League with Most Goal Scored#

max_total_goal = 0
max_total_goal_league = ''

for name, league in leagues_dict.items():
    total = 0
    for t in league:
        total += int(t['goals_for'])
    if total > max_total_goal:
        max_total_goal = total       
        max_total_goal_league = name

print(f'Most Goal Scored League : {max_total_goal_league}')
print(f'Maximum Total Goal      : {max_total_goal}')
Most Goal Scored League : english_premier
Maximum Total Goal      : 691

The Team with Highest Points Per Game#

max_point_pg = 0
max_point_pg_team = ''

for league in leagues:
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']

print(f'Max Point Per Game : {max_point_pg}')
print(f'Team               : {max_point_pg_team}')
Max Point Per Game : 2.5789473684210527
Team               : PSV Eindhoven

The Teams with Highest Points Per Game for Each League#

max_point_pg_list = []
max_point_pg_team_list = []


for league in leagues:
    max_point_pg = 0
    max_point_pg_team = ''
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']
    
    max_point_pg_list.append(max_point_pg)
    max_point_pg_team_list.append(max_point_pg_team)
    
plt.figure(figsize=(10,5))
plt.bar(max_point_pg_team_list, max_point_pg_list)
plt.xticks(rotation=90);
_images/a66963eba9195bbeeb80f388a60103a43816b4d877c6aa9fc52a9f4d77497a09.png

Future work#

  • Store all goal differences as integers in a list (pay attention to the +/- signs).

  • Store the number of draws for each team in a list.

  • Store the names of all top scorers in a list.

  • Store the number of goals scored by each top scorer in a list.

  • Identify the team with the largest point difference from the second-highest team.