Project: Soccer#

import matplotlib.pyplot as plt
import numpy as np

Import Data#

In this project, the soccer-data-api package is used to import soccer data from https://www.sports-reference.com/

Available leagues:

  • English Premier League

  • Spanish La Liga

  • French Ligue 1

  • German Bundesliga

  • Italian Serie A

  • Dutch Eredivisie

  • Russian Premier League

  • English Championship.

To install the package, use !pip install soccer-data-api.

# !pip install soccer-data-api
from soccer_data_api import SoccerDataAPI
soccer_data = SoccerDataAPI()
# methods
dir(soccer_data)[-9:]
['bundesliga',
 'english_championship',
 'english_premier',
 'eredivisie',
 'get_data',
 'la_liga',
 'ligue_1',
 'russian_premier',
 'serie_a']

English Premier League#

Each league is represented as a list.

ep = soccer_data.english_premier()
type(ep)
list
len(ep)
20
  • The list elements consist of dictionaries for each team, ordered by their ranking.

ep
[{'team': 'Manchester City',
  'pos': '1',
  'points': '9',
  'matches_played': '3',
  'wins': '3',
  'draws': '3',
  'losses': '0',
  'goals_for': '9',
  'goals_against': '2',
  'goal_diff': '+7',
  'top_scorer': 'Erling Haaland - 7'},
 {'team': 'Liverpool',
  'pos': '2',
  'points': '9',
  'matches_played': '3',
  'wins': '3',
  'draws': '3',
  'losses': '0',
  'goals_for': '7',
  'goals_against': '0',
  'goal_diff': '+7',
  'top_scorer': 'Mohamed Salah, Luis Díaz - 3'},
 {'team': 'Brighton',
  'pos': '3',
  'points': '7',
  'matches_played': '3',
  'wins': '2',
  'draws': '2',
  'losses': '0',
  'goals_for': '6',
  'goals_against': '2',
  'goal_diff': '+4',
  'top_scorer': 'Danny Welbeck, João Pedro - 2'},
 {'team': 'Arsenal',
  'pos': '4',
  'points': '7',
  'matches_played': '3',
  'wins': '2',
  'draws': '2',
  'losses': '0',
  'goals_for': '5',
  'goals_against': '1',
  'goal_diff': '+4',
  'top_scorer': 'Kai Havertz - 2'},
 {'team': 'Newcastle Utd',
  'pos': '5',
  'points': '7',
  'matches_played': '3',
  'wins': '2',
  'draws': '2',
  'losses': '0',
  'goals_for': '4',
  'goals_against': '2',
  'goal_diff': '+2',
  'top_scorer': 'Joelinton, Alexander Isak... - 1'},
 {'team': 'Brentford',
  'pos': '6',
  'points': '6',
  'matches_played': '3',
  'wins': '2',
  'draws': '2',
  'losses': '1',
  'goals_for': '5',
  'goals_against': '4',
  'goal_diff': '+1',
  'top_scorer': 'Bryan Mbeumo - 3'},
 {'team': 'Aston Villa',
  'pos': '7',
  'points': '6',
  'matches_played': '3',
  'wins': '2',
  'draws': '2',
  'losses': '1',
  'goals_for': '4',
  'goals_against': '4',
  'goal_diff': '0',
  'top_scorer': 'Amadou Onana, Jhon Durán - 2'},
 {'team': 'Bournemouth',
  'pos': '8',
  'points': '5',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '0',
  'goals_for': '5',
  'goals_against': '4',
  'goal_diff': '+1',
  'top_scorer': 'Antoine Semenyo - 2'},
 {'team': "Nott'ham Forest",
  'pos': '9',
  'points': '5',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '0',
  'goals_for': '3',
  'goals_against': '2',
  'goal_diff': '+1',
  'top_scorer': 'Chris Wood - 2'},
 {'team': 'Tottenham',
  'pos': '10',
  'points': '4',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '1',
  'goals_for': '6',
  'goals_against': '3',
  'goal_diff': '+3',
  'top_scorer': 'Son Heung-min - 2'},
 {'team': 'Chelsea',
  'pos': '11',
  'points': '4',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '1',
  'goals_for': '7',
  'goals_against': '5',
  'goal_diff': '+2',
  'top_scorer': 'Noni Madueke - 3'},
 {'team': 'Fulham',
  'pos': '12',
  'points': '4',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '1',
  'goals_for': '3',
  'goals_against': '3',
  'goal_diff': '0',
  'top_scorer': 'Adama Traoré, Alex Iwobi... - 1'},
 {'team': 'West Ham',
  'pos': '13',
  'points': '3',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '2',
  'goals_for': '4',
  'goals_against': '5',
  'goal_diff': '-1',
  'top_scorer': 'Tomáš Souček, Lucas Paquetá... - 1'},
 {'team': 'Manchester Utd',
  'pos': '14',
  'points': '3',
  'matches_played': '3',
  'wins': '1',
  'draws': '1',
  'losses': '2',
  'goals_for': '2',
  'goals_against': '5',
  'goal_diff': '-3',
  'top_scorer': 'Joshua Zirkzee, Amad Diallo - 1'},
 {'team': 'Leicester City',
  'pos': '15',
  'points': '1',
  'matches_played': '3',
  'wins': '0',
  'draws': '0',
  'losses': '2',
  'goals_for': '3',
  'goals_against': '5',
  'goal_diff': '-2',
  'top_scorer': 'Jamie Vardy, Wout Faes... - 1'},
 {'team': 'Crystal Palace',
  'pos': '16',
  'points': '1',
  'matches_played': '3',
  'wins': '0',
  'draws': '0',
  'losses': '2',
  'goals_for': '2',
  'goals_against': '5',
  'goal_diff': '-3',
  'top_scorer': 'Eberechi Eze - 1'},
 {'team': 'Ipswich Town',
  'pos': '17',
  'points': '1',
  'matches_played': '3',
  'wins': '0',
  'draws': '0',
  'losses': '2',
  'goals_for': '2',
  'goals_against': '7',
  'goal_diff': '-5',
  'top_scorer': 'Sammie Szmodics, Liam Delap - 1'},
 {'team': 'Wolves',
  'pos': '18',
  'points': '1',
  'matches_played': '3',
  'wins': '0',
  'draws': '0',
  'losses': '2',
  'goals_for': '3',
  'goals_against': '9',
  'goal_diff': '-6',
  'top_scorer': 'Jean-Ricner Bellegarde, Jørgen Strand Larsen... - 1'},
 {'team': 'Southampton',
  'pos': '19',
  'points': '0',
  'matches_played': '3',
  'wins': '0',
  'draws': '0',
  'losses': '3',
  'goals_for': '1',
  'goals_against': '5',
  'goal_diff': '-4',
  'top_scorer': 'Yukinari Sugawara - 1'},
 {'team': 'Everton',
  'pos': '20',
  'points': '0',
  'matches_played': '3',
  'wins': '0',
  'draws': '0',
  'losses': '3',
  'goals_for': '2',
  'goals_against': '10',
  'goal_diff': '-8',
  'top_scorer': 'Michael Keane, Dominic Calvert-Lewin - 1'}]
ep[0]
{'team': 'Manchester City',
 'pos': '1',
 'points': '9',
 'matches_played': '3',
 'wins': '3',
 'draws': '3',
 'losses': '0',
 'goals_for': '9',
 'goals_against': '2',
 'goal_diff': '+7',
 'top_scorer': 'Erling Haaland - 7'}
  • For each team, the following information is provided.

ep[0].keys()
dict_keys(['team', 'pos', 'points', 'matches_played', 'wins', 'draws', 'losses', 'goals_for', 'goals_against', 'goal_diff', 'top_scorer'])

Most Scorer Team#

max_goals_for = 0
max_goals_for_team = ''

for t in ep:
    if int(t['goals_for']) > max_goals_for:
        max_goals_for = int(t['goals_for'])
        max_goals_for_team = t['team']

print(f'Team         : {max_goals_for_team}')
print(f'Max Goals For: {max_goals_for}')
Team         : Manchester City
Max Goals For: 9

Least Scorer Team#

import numpy as np

min_goals_for = np.inf
min_goals_for_team = ''

for t in ep:
    if int(t['goals_for']) < min_goals_for:
        min_goals_for = int(t['goals_for'])
        min_goals_for_team = t['team']

print(f'Team         : {min_goals_for_team}')
print(f'Min Goals For: {min_goals_for}')
Team         : Southampton
Min Goals For: 1

The most goals scored against#

max_goals_against = 0
max_goals_against_team = ''

for t in ep:
    if int(t['goals_against']) > max_goals_against:
        max_goals_against = int(t['goals_against'])
        max_goals_against_team = t['team']

print(f'Team             : {max_goals_against_team}')
print(f'Max Goals Against: {max_goals_against}')
Team             : Everton
Max Goals Against: 10

Top Scorer Player#

top_score = 0
top_score_player = ''

for t in ep:
    ts = t['top_scorer']
    if int(ts[ts.rfind('-')+1:]) > top_score:
        top_score = int(ts[ts.find('-')+1:])
        top_score_player = ts[:ts.find('-')]

print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')
Top Scorer : Erling Haaland 
Top Score  : 7

Equal Scorer Players in a Team#

top_score = 0
top_score_player = ''

for t in ep:
    ts = t['top_scorer']
    if ts.find(',') != -1:
        print(ts)
Mohamed Salah, Luis Díaz - 3
Danny Welbeck, João Pedro - 2
Joelinton, Alexander Isak... - 1
Amadou Onana, Jhon Durán - 2
Adama Traoré, Alex Iwobi... - 1
Tomáš Souček, Lucas Paquetá... - 1
Joshua Zirkzee, Amad Diallo - 1
Jamie Vardy, Wout Faes... - 1
Sammie Szmodics, Liam Delap - 1
Jean-Ricner Bellegarde, Jørgen Strand Larsen... - 1
Michael Keane, Dominic Calvert-Lewin - 1

List of Teams and Points#

teams, points = [], []

for t in ep:
    teams.append(t['team'])
    points.append(int(t['points']))

Bar graph of points#

plt.figure(figsize=(20,5))
plt.bar(teams, points)
plt.xticks(rotation=90);
_images/06506c22b65e0b1e86998e319262462f10aea076108c6f922a9c732a3da672a6.png
plt.figure(figsize=(5,10))
plt.barh( teams, points);
_images/3c2e569562cbbf2d7542e9e14ec459dba6cff9ee248d0f67eb7713b9088e01e0.png

Check Matches Played#

  • There is an issue with the data.

  • The number of draws is incorrect.

for t in ep:
    if int(t['matches_played']) != int(t['wins']) + int(t['draws']) +int(t['losses']):
        print(t['team'], int(t['matches_played']), int(t['wins']), int(t['draws']), int(t['losses']) )
Manchester City 3 3 3 0
Liverpool 3 3 3 0
Brighton 3 2 2 0
Arsenal 3 2 2 0
Newcastle Utd 3 2 2 0
Brentford 3 2 2 1
Aston Villa 3 2 2 1
Bournemouth 3 1 1 0
Nott'ham Forest 3 1 1 0
West Ham 3 1 1 2
Manchester Utd 3 1 1 2
Leicester City 3 0 0 2
Crystal Palace 3 0 0 2
Ipswich Town 3 0 0 2
Wolves 3 0 0 2

All Leagues#

The following is the list of all the leagues available in the package.

leagues = [soccer_data.english_premier(),
soccer_data.la_liga(),
soccer_data.ligue_1(),
soccer_data.bundesliga(),
soccer_data.serie_a(),
soccer_data.eredivisie(),
soccer_data.russian_premier(),
soccer_data.english_championship()]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[19], line 6
      1 leagues = [soccer_data.english_premier(),
      2 soccer_data.la_liga(),
      3 soccer_data.ligue_1(),
      4 soccer_data.bundesliga(),
      5 soccer_data.serie_a(),
----> 6 soccer_data.eredivisie(),
      7 soccer_data.russian_premier(),
      8 soccer_data.english_championship()]

File ~/anaconda3/lib/python3.11/site-packages/soccer_data_api/soccer_api.py:73, in SoccerDataAPI.eredivisie(self)
     72 def eredivisie(self):
---> 73     self.get_data = GetData(CONF['leagues']['eredivisie'])
     74     response = json_response(self.get_data.get_club_name(), self.get_data.get_position(),
     75                              self.get_data.get_points(),
     76                              self.get_data.get_matches_played(), self.get_data.get_wins(),
     77                              self.get_data.get_draws(), self.get_data.get_losses(),
     78                              self.get_data.get_goals_for(), self.get_data.get_goals_against(),
     79                              self.get_data.get_goal_diff(), self.get_data.get_top_scorer())
     81     return response

File ~/anaconda3/lib/python3.11/site-packages/soccer_data_api/get_soccer_data.py:41, in GetData.__init__(self, league)
     40 def __init__(self, league):
---> 41     page = requests.get(CONF['url']+league)
     42     self.soup = BeautifulSoup(page.content, features="html.parser")
     43     self.league = ""

File ~/anaconda3/lib/python3.11/site-packages/requests/api.py:73, in get(url, params, **kwargs)
     62 def get(url, params=None, **kwargs):
     63     r"""Sends a GET request.
     64 
     65     :param url: URL for the new :class:`Request` object.
   (...)
     70     :rtype: requests.Response
     71     """
---> 73     return request("get", url, params=params, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/requests/api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File ~/anaconda3/lib/python3.11/site-packages/requests/sessions.py:747, in Session.send(self, request, **kwargs)
    744         pass
    746 if not stream:
--> 747     r.content
    749 return r

File ~/anaconda3/lib/python3.11/site-packages/requests/models.py:899, in Response.content(self)
    897         self._content = None
    898     else:
--> 899         self._content = b"".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b""
    901 self._content_consumed = True
    902 # don't need to release the connection; that's been handled by urllib3
    903 # since we exhausted the data.

File ~/anaconda3/lib/python3.11/site-packages/requests/models.py:816, in Response.iter_content.<locals>.generate()
    814 if hasattr(self.raw, "stream"):
    815     try:
--> 816         yield from self.raw.stream(chunk_size, decode_content=True)
    817     except ProtocolError as e:
    818         raise ChunkedEncodingError(e)

File ~/anaconda3/lib/python3.11/site-packages/urllib3/response.py:624, in HTTPResponse.stream(self, amt, decode_content)
    608 """
    609 A generator wrapper for the read() method. A call will block until
    610 ``amt`` bytes have been read from the connection or until the
   (...)
    621     'content-encoding' header.
    622 """
    623 if self.chunked and self.supports_chunked_reads():
--> 624     for line in self.read_chunked(amt, decode_content=decode_content):
    625         yield line
    626 else:

File ~/anaconda3/lib/python3.11/site-packages/urllib3/response.py:831, in HTTPResponse.read_chunked(self, amt, decode_content)
    829 if self.chunk_left == 0:
    830     break
--> 831 chunk = self._handle_chunk(amt)
    832 decoded = self._decode(
    833     chunk, decode_content=decode_content, flush_decoder=False
    834 )
    835 if decoded:

File ~/anaconda3/lib/python3.11/site-packages/urllib3/response.py:775, in HTTPResponse._handle_chunk(self, amt)
    773     self.chunk_left = None
    774 elif amt < self.chunk_left:
--> 775     value = self._fp._safe_read(amt)
    776     self.chunk_left = self.chunk_left - amt
    777     returned_chunk = value

File ~/anaconda3/lib/python3.11/http/client.py:638, in HTTPResponse._safe_read(self, amt)
    631 def _safe_read(self, amt):
    632     """Read the number of bytes requested.
    633 
    634     This function should be used when <amt> bytes "should" be present for
    635     reading. If the bytes are truly not available (due to EOF), then the
    636     IncompleteRead exception can be used to detect the problem.
    637     """
--> 638     data = self.fp.read(amt)
    639     if len(data) < amt:
    640         raise IncompleteRead(data, amt-len(data))

File ~/anaconda3/lib/python3.11/socket.py:706, in SocketIO.readinto(self, b)
    704 while True:
    705     try:
--> 706         return self._sock.recv_into(b)
    707     except timeout:
    708         self._timeout_occurred = True

File ~/anaconda3/lib/python3.11/ssl.py:1315, in SSLSocket.recv_into(self, buffer, nbytes, flags)
   1311     if flags != 0:
   1312         raise ValueError(
   1313           "non-zero flags not allowed in calls to recv_into() on %s" %
   1314           self.__class__)
-> 1315     return self.read(nbytes, buffer)
   1316 else:
   1317     return super().recv_into(buffer, nbytes, flags)

File ~/anaconda3/lib/python3.11/ssl.py:1167, in SSLSocket.read(self, len, buffer)
   1165 try:
   1166     if buffer is not None:
-> 1167         return self._sslobj.read(len, buffer)
   1168     else:
   1169         return self._sslobj.read(len)

KeyboardInterrupt: 

The following is the dictionary of all the leagues available in the package.

leagues_dict = {'english_premier':soccer_data.english_premier(),
'la_liga': soccer_data.la_liga(),
'ligue_1': soccer_data.ligue_1(),
'bundesliga': soccer_data.bundesliga(),
'serie_a': soccer_data.serie_a(),
'eredivisie': soccer_data.eredivisie(),
'russian_premier': soccer_data.russian_premier(),
'english_championship': soccer_data.english_championship()}

Top Scorer Player#

top_score = 0
top_score_player = ''

for league in leagues:
    for t in league:
        ts = t['top_scorer']
        if int(ts[ts.rfind('-')+1:]) > top_score:
            top_score = int(ts[ts.find('-')+1:])
            top_score_player = ts[:ts.find('-')]

print(f'Top Scorer : {top_score_player}')
print(f'Top Score  : {top_score}')
Top Scorer : Harry Kane 
Top Score  : 36

The Team with Highest and Lowest Goal Differences#

highest_goal_diff = 0
highest_goal_diff_team = ''
lowest_goal_diff = np.inf
lowest_goal_diff_team = ''


for league in leagues:
    for t in league:
        if int(t['goal_diff']) > highest_goal_diff:
            highest_goal_diff = int(t['goal_diff'])
            highest_goal_diff_team = t['team']
        if int(t['goal_diff']) < lowest_goal_diff:
            lowest_goal_diff = int(t['goal_diff'])
            lowest_goal_diff_team = t['team']

print(f'Highest Goal Diff : {highest_goal_diff}  ---  Team: {highest_goal_diff_team }')
print(f'Lowest  Goal Diff : {lowest_goal_diff}   ---  Team: {lowest_goal_diff_team }')
Highest Goal Diff : 90  ---  Team: PSV Eindhoven
Lowest  Goal Diff : -69   ---  Team: Sheffield Utd

The League with Most Goal Scored#

max_total_goal = 0
max_total_goal_league = ''

for name, league in leagues_dict.items():
    total = 0
    for t in league:
        total += int(t['goals_for'])
    if total > max_total_goal:
        max_total_goal = total       
        max_total_goal_league = name

print(f'Most Goal Scored League : {max_total_goal_league}')
print(f'Maximum Total Goal      : {max_total_goal}')
Most Goal Scored League : english_championship
Maximum Total Goal      : 1480

The Team with Highest Points Per Game#

max_point_pg = 0
max_point_pg_team = ''

for league in leagues:
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']

print(f'Max Point Per Game : {max_point_pg}')
print(f'Team               : {max_point_pg_team}')
Max Point Per Game : 2.676470588235294
Team               : PSV Eindhoven

The Teams with Highest Points Per Game for Each League#

max_point_pg_list = []
max_point_pg_team_list = []


for league in leagues:
    max_point_pg = 0
    max_point_pg_team = ''
    for t in league:
        ppg = int(t['points'])/int(t['matches_played'])
        if ppg > max_point_pg:
            max_point_pg = ppg
            max_point_pg_team = t['team']
    
    max_point_pg_list.append(max_point_pg)
    max_point_pg_team_list.append(max_point_pg_team)
    
plt.figure(figsize=(10,5))
plt.bar(max_point_pg_team_list, max_point_pg_list)
plt.xticks(rotation=90);
_images/b1a6dbfff9cbd513d544d5f73157719063e30007fbf89eea2bcbbf28cdba00b0.png