Project: Soccer#
import matplotlib.pyplot as plt
import numpy as np
Import Data#
In this project, the soccer-data-api package is used to import soccer data from https://www.sports-reference.com/
Available leagues:
English Premier League
Spanish La Liga
French Ligue 1
German Bundesliga
Italian Serie A
Dutch Eredivisie
Russian Premier League
English Championship.
To install the package, use !pip install soccer-data-api
.
# !pip install soccer-data-api
from soccer_data_api import SoccerDataAPI
soccer_data = SoccerDataAPI()
# methods
dir(soccer_data)[-9:]
['bundesliga',
'english_championship',
'english_premier',
'eredivisie',
'get_data',
'la_liga',
'ligue_1',
'russian_premier',
'serie_a']
English Premier League#
Each league is represented as a list.
ep = soccer_data.english_premier()
type(ep)
list
len(ep)
20
The list elements consist of dictionaries for each team, ordered by their ranking.
ep
[{'team': 'Manchester City',
'pos': '1',
'points': '9',
'matches_played': '3',
'wins': '3',
'draws': '3',
'losses': '0',
'goals_for': '9',
'goals_against': '2',
'goal_diff': '+7',
'top_scorer': 'Erling Haaland - 7'},
{'team': 'Liverpool',
'pos': '2',
'points': '9',
'matches_played': '3',
'wins': '3',
'draws': '3',
'losses': '0',
'goals_for': '7',
'goals_against': '0',
'goal_diff': '+7',
'top_scorer': 'Mohamed Salah, Luis Díaz - 3'},
{'team': 'Brighton',
'pos': '3',
'points': '7',
'matches_played': '3',
'wins': '2',
'draws': '2',
'losses': '0',
'goals_for': '6',
'goals_against': '2',
'goal_diff': '+4',
'top_scorer': 'Danny Welbeck, João Pedro - 2'},
{'team': 'Arsenal',
'pos': '4',
'points': '7',
'matches_played': '3',
'wins': '2',
'draws': '2',
'losses': '0',
'goals_for': '5',
'goals_against': '1',
'goal_diff': '+4',
'top_scorer': 'Kai Havertz - 2'},
{'team': 'Newcastle Utd',
'pos': '5',
'points': '7',
'matches_played': '3',
'wins': '2',
'draws': '2',
'losses': '0',
'goals_for': '4',
'goals_against': '2',
'goal_diff': '+2',
'top_scorer': 'Joelinton, Alexander Isak... - 1'},
{'team': 'Brentford',
'pos': '6',
'points': '6',
'matches_played': '3',
'wins': '2',
'draws': '2',
'losses': '1',
'goals_for': '5',
'goals_against': '4',
'goal_diff': '+1',
'top_scorer': 'Bryan Mbeumo - 3'},
{'team': 'Aston Villa',
'pos': '7',
'points': '6',
'matches_played': '3',
'wins': '2',
'draws': '2',
'losses': '1',
'goals_for': '4',
'goals_against': '4',
'goal_diff': '0',
'top_scorer': 'Amadou Onana, Jhon Durán - 2'},
{'team': 'Bournemouth',
'pos': '8',
'points': '5',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '0',
'goals_for': '5',
'goals_against': '4',
'goal_diff': '+1',
'top_scorer': 'Antoine Semenyo - 2'},
{'team': "Nott'ham Forest",
'pos': '9',
'points': '5',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '0',
'goals_for': '3',
'goals_against': '2',
'goal_diff': '+1',
'top_scorer': 'Chris Wood - 2'},
{'team': 'Tottenham',
'pos': '10',
'points': '4',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '1',
'goals_for': '6',
'goals_against': '3',
'goal_diff': '+3',
'top_scorer': 'Son Heung-min - 2'},
{'team': 'Chelsea',
'pos': '11',
'points': '4',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '1',
'goals_for': '7',
'goals_against': '5',
'goal_diff': '+2',
'top_scorer': 'Noni Madueke - 3'},
{'team': 'Fulham',
'pos': '12',
'points': '4',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '1',
'goals_for': '3',
'goals_against': '3',
'goal_diff': '0',
'top_scorer': 'Adama Traoré, Alex Iwobi... - 1'},
{'team': 'West Ham',
'pos': '13',
'points': '3',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '2',
'goals_for': '4',
'goals_against': '5',
'goal_diff': '-1',
'top_scorer': 'Tomáš Souček, Lucas Paquetá... - 1'},
{'team': 'Manchester Utd',
'pos': '14',
'points': '3',
'matches_played': '3',
'wins': '1',
'draws': '1',
'losses': '2',
'goals_for': '2',
'goals_against': '5',
'goal_diff': '-3',
'top_scorer': 'Joshua Zirkzee, Amad Diallo - 1'},
{'team': 'Leicester City',
'pos': '15',
'points': '1',
'matches_played': '3',
'wins': '0',
'draws': '0',
'losses': '2',
'goals_for': '3',
'goals_against': '5',
'goal_diff': '-2',
'top_scorer': 'Jamie Vardy, Wout Faes... - 1'},
{'team': 'Crystal Palace',
'pos': '16',
'points': '1',
'matches_played': '3',
'wins': '0',
'draws': '0',
'losses': '2',
'goals_for': '2',
'goals_against': '5',
'goal_diff': '-3',
'top_scorer': 'Eberechi Eze - 1'},
{'team': 'Ipswich Town',
'pos': '17',
'points': '1',
'matches_played': '3',
'wins': '0',
'draws': '0',
'losses': '2',
'goals_for': '2',
'goals_against': '7',
'goal_diff': '-5',
'top_scorer': 'Sammie Szmodics, Liam Delap - 1'},
{'team': 'Wolves',
'pos': '18',
'points': '1',
'matches_played': '3',
'wins': '0',
'draws': '0',
'losses': '2',
'goals_for': '3',
'goals_against': '9',
'goal_diff': '-6',
'top_scorer': 'Jean-Ricner Bellegarde, Jørgen Strand Larsen... - 1'},
{'team': 'Southampton',
'pos': '19',
'points': '0',
'matches_played': '3',
'wins': '0',
'draws': '0',
'losses': '3',
'goals_for': '1',
'goals_against': '5',
'goal_diff': '-4',
'top_scorer': 'Yukinari Sugawara - 1'},
{'team': 'Everton',
'pos': '20',
'points': '0',
'matches_played': '3',
'wins': '0',
'draws': '0',
'losses': '3',
'goals_for': '2',
'goals_against': '10',
'goal_diff': '-8',
'top_scorer': 'Michael Keane, Dominic Calvert-Lewin - 1'}]
ep[0]
{'team': 'Manchester City',
'pos': '1',
'points': '9',
'matches_played': '3',
'wins': '3',
'draws': '3',
'losses': '0',
'goals_for': '9',
'goals_against': '2',
'goal_diff': '+7',
'top_scorer': 'Erling Haaland - 7'}
For each team, the following information is provided.
ep[0].keys()
dict_keys(['team', 'pos', 'points', 'matches_played', 'wins', 'draws', 'losses', 'goals_for', 'goals_against', 'goal_diff', 'top_scorer'])
Most Scorer Team#
max_goals_for = 0
max_goals_for_team = ''
for t in ep:
if int(t['goals_for']) > max_goals_for:
max_goals_for = int(t['goals_for'])
max_goals_for_team = t['team']
print(f'Team : {max_goals_for_team}')
print(f'Max Goals For: {max_goals_for}')
Team : Manchester City
Max Goals For: 9
Least Scorer Team#
import numpy as np
min_goals_for = np.inf
min_goals_for_team = ''
for t in ep:
if int(t['goals_for']) < min_goals_for:
min_goals_for = int(t['goals_for'])
min_goals_for_team = t['team']
print(f'Team : {min_goals_for_team}')
print(f'Min Goals For: {min_goals_for}')
Team : Southampton
Min Goals For: 1
The most goals scored against#
max_goals_against = 0
max_goals_against_team = ''
for t in ep:
if int(t['goals_against']) > max_goals_against:
max_goals_against = int(t['goals_against'])
max_goals_against_team = t['team']
print(f'Team : {max_goals_against_team}')
print(f'Max Goals Against: {max_goals_against}')
Team : Everton
Max Goals Against: 10
Top Scorer Player#
top_score = 0
top_score_player = ''
for t in ep:
ts = t['top_scorer']
if int(ts[ts.rfind('-')+1:]) > top_score:
top_score = int(ts[ts.find('-')+1:])
top_score_player = ts[:ts.find('-')]
print(f'Top Scorer : {top_score_player}')
print(f'Top Score : {top_score}')
Top Scorer : Erling Haaland
Top Score : 7
Equal Scorer Players in a Team#
top_score = 0
top_score_player = ''
for t in ep:
ts = t['top_scorer']
if ts.find(',') != -1:
print(ts)
Mohamed Salah, Luis Díaz - 3
Danny Welbeck, João Pedro - 2
Joelinton, Alexander Isak... - 1
Amadou Onana, Jhon Durán - 2
Adama Traoré, Alex Iwobi... - 1
Tomáš Souček, Lucas Paquetá... - 1
Joshua Zirkzee, Amad Diallo - 1
Jamie Vardy, Wout Faes... - 1
Sammie Szmodics, Liam Delap - 1
Jean-Ricner Bellegarde, Jørgen Strand Larsen... - 1
Michael Keane, Dominic Calvert-Lewin - 1
List of Teams and Points#
teams, points = [], []
for t in ep:
teams.append(t['team'])
points.append(int(t['points']))
Bar graph of points#
plt.figure(figsize=(20,5))
plt.bar(teams, points)
plt.xticks(rotation=90);
plt.figure(figsize=(5,10))
plt.barh( teams, points);
Check Matches Played#
There is an issue with the data.
The number of draws is incorrect.
for t in ep:
if int(t['matches_played']) != int(t['wins']) + int(t['draws']) +int(t['losses']):
print(t['team'], int(t['matches_played']), int(t['wins']), int(t['draws']), int(t['losses']) )
Manchester City 3 3 3 0
Liverpool 3 3 3 0
Brighton 3 2 2 0
Arsenal 3 2 2 0
Newcastle Utd 3 2 2 0
Brentford 3 2 2 1
Aston Villa 3 2 2 1
Bournemouth 3 1 1 0
Nott'ham Forest 3 1 1 0
West Ham 3 1 1 2
Manchester Utd 3 1 1 2
Leicester City 3 0 0 2
Crystal Palace 3 0 0 2
Ipswich Town 3 0 0 2
Wolves 3 0 0 2
All Leagues#
The following is the list of all the leagues available in the package.
leagues = [soccer_data.english_premier(),
soccer_data.la_liga(),
soccer_data.ligue_1(),
soccer_data.bundesliga(),
soccer_data.serie_a(),
soccer_data.eredivisie(),
soccer_data.russian_premier(),
soccer_data.english_championship()]
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[19], line 6
1 leagues = [soccer_data.english_premier(),
2 soccer_data.la_liga(),
3 soccer_data.ligue_1(),
4 soccer_data.bundesliga(),
5 soccer_data.serie_a(),
----> 6 soccer_data.eredivisie(),
7 soccer_data.russian_premier(),
8 soccer_data.english_championship()]
File ~/anaconda3/lib/python3.11/site-packages/soccer_data_api/soccer_api.py:73, in SoccerDataAPI.eredivisie(self)
72 def eredivisie(self):
---> 73 self.get_data = GetData(CONF['leagues']['eredivisie'])
74 response = json_response(self.get_data.get_club_name(), self.get_data.get_position(),
75 self.get_data.get_points(),
76 self.get_data.get_matches_played(), self.get_data.get_wins(),
77 self.get_data.get_draws(), self.get_data.get_losses(),
78 self.get_data.get_goals_for(), self.get_data.get_goals_against(),
79 self.get_data.get_goal_diff(), self.get_data.get_top_scorer())
81 return response
File ~/anaconda3/lib/python3.11/site-packages/soccer_data_api/get_soccer_data.py:41, in GetData.__init__(self, league)
40 def __init__(self, league):
---> 41 page = requests.get(CONF['url']+league)
42 self.soup = BeautifulSoup(page.content, features="html.parser")
43 self.league = ""
File ~/anaconda3/lib/python3.11/site-packages/requests/api.py:73, in get(url, params, **kwargs)
62 def get(url, params=None, **kwargs):
63 r"""Sends a GET request.
64
65 :param url: URL for the new :class:`Request` object.
(...)
70 :rtype: requests.Response
71 """
---> 73 return request("get", url, params=params, **kwargs)
File ~/anaconda3/lib/python3.11/site-packages/requests/api.py:59, in request(method, url, **kwargs)
55 # By using the 'with' statement we are sure the session is closed, thus we
56 # avoid leaving sockets open which can trigger a ResourceWarning in some
57 # cases, and look like a memory leak in others.
58 with sessions.Session() as session:
---> 59 return session.request(method=method, url=url, **kwargs)
File ~/anaconda3/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File ~/anaconda3/lib/python3.11/site-packages/requests/sessions.py:747, in Session.send(self, request, **kwargs)
744 pass
746 if not stream:
--> 747 r.content
749 return r
File ~/anaconda3/lib/python3.11/site-packages/requests/models.py:899, in Response.content(self)
897 self._content = None
898 else:
--> 899 self._content = b"".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b""
901 self._content_consumed = True
902 # don't need to release the connection; that's been handled by urllib3
903 # since we exhausted the data.
File ~/anaconda3/lib/python3.11/site-packages/requests/models.py:816, in Response.iter_content.<locals>.generate()
814 if hasattr(self.raw, "stream"):
815 try:
--> 816 yield from self.raw.stream(chunk_size, decode_content=True)
817 except ProtocolError as e:
818 raise ChunkedEncodingError(e)
File ~/anaconda3/lib/python3.11/site-packages/urllib3/response.py:624, in HTTPResponse.stream(self, amt, decode_content)
608 """
609 A generator wrapper for the read() method. A call will block until
610 ``amt`` bytes have been read from the connection or until the
(...)
621 'content-encoding' header.
622 """
623 if self.chunked and self.supports_chunked_reads():
--> 624 for line in self.read_chunked(amt, decode_content=decode_content):
625 yield line
626 else:
File ~/anaconda3/lib/python3.11/site-packages/urllib3/response.py:831, in HTTPResponse.read_chunked(self, amt, decode_content)
829 if self.chunk_left == 0:
830 break
--> 831 chunk = self._handle_chunk(amt)
832 decoded = self._decode(
833 chunk, decode_content=decode_content, flush_decoder=False
834 )
835 if decoded:
File ~/anaconda3/lib/python3.11/site-packages/urllib3/response.py:775, in HTTPResponse._handle_chunk(self, amt)
773 self.chunk_left = None
774 elif amt < self.chunk_left:
--> 775 value = self._fp._safe_read(amt)
776 self.chunk_left = self.chunk_left - amt
777 returned_chunk = value
File ~/anaconda3/lib/python3.11/http/client.py:638, in HTTPResponse._safe_read(self, amt)
631 def _safe_read(self, amt):
632 """Read the number of bytes requested.
633
634 This function should be used when <amt> bytes "should" be present for
635 reading. If the bytes are truly not available (due to EOF), then the
636 IncompleteRead exception can be used to detect the problem.
637 """
--> 638 data = self.fp.read(amt)
639 if len(data) < amt:
640 raise IncompleteRead(data, amt-len(data))
File ~/anaconda3/lib/python3.11/socket.py:706, in SocketIO.readinto(self, b)
704 while True:
705 try:
--> 706 return self._sock.recv_into(b)
707 except timeout:
708 self._timeout_occurred = True
File ~/anaconda3/lib/python3.11/ssl.py:1315, in SSLSocket.recv_into(self, buffer, nbytes, flags)
1311 if flags != 0:
1312 raise ValueError(
1313 "non-zero flags not allowed in calls to recv_into() on %s" %
1314 self.__class__)
-> 1315 return self.read(nbytes, buffer)
1316 else:
1317 return super().recv_into(buffer, nbytes, flags)
File ~/anaconda3/lib/python3.11/ssl.py:1167, in SSLSocket.read(self, len, buffer)
1165 try:
1166 if buffer is not None:
-> 1167 return self._sslobj.read(len, buffer)
1168 else:
1169 return self._sslobj.read(len)
KeyboardInterrupt:
The following is the dictionary of all the leagues available in the package.
leagues_dict = {'english_premier':soccer_data.english_premier(),
'la_liga': soccer_data.la_liga(),
'ligue_1': soccer_data.ligue_1(),
'bundesliga': soccer_data.bundesliga(),
'serie_a': soccer_data.serie_a(),
'eredivisie': soccer_data.eredivisie(),
'russian_premier': soccer_data.russian_premier(),
'english_championship': soccer_data.english_championship()}
Top Scorer Player#
top_score = 0
top_score_player = ''
for league in leagues:
for t in league:
ts = t['top_scorer']
if int(ts[ts.rfind('-')+1:]) > top_score:
top_score = int(ts[ts.find('-')+1:])
top_score_player = ts[:ts.find('-')]
print(f'Top Scorer : {top_score_player}')
print(f'Top Score : {top_score}')
Top Scorer : Harry Kane
Top Score : 36
The Team with Highest and Lowest Goal Differences#
highest_goal_diff = 0
highest_goal_diff_team = ''
lowest_goal_diff = np.inf
lowest_goal_diff_team = ''
for league in leagues:
for t in league:
if int(t['goal_diff']) > highest_goal_diff:
highest_goal_diff = int(t['goal_diff'])
highest_goal_diff_team = t['team']
if int(t['goal_diff']) < lowest_goal_diff:
lowest_goal_diff = int(t['goal_diff'])
lowest_goal_diff_team = t['team']
print(f'Highest Goal Diff : {highest_goal_diff} --- Team: {highest_goal_diff_team }')
print(f'Lowest Goal Diff : {lowest_goal_diff} --- Team: {lowest_goal_diff_team }')
Highest Goal Diff : 90 --- Team: PSV Eindhoven
Lowest Goal Diff : -69 --- Team: Sheffield Utd
The League with Most Goal Scored#
max_total_goal = 0
max_total_goal_league = ''
for name, league in leagues_dict.items():
total = 0
for t in league:
total += int(t['goals_for'])
if total > max_total_goal:
max_total_goal = total
max_total_goal_league = name
print(f'Most Goal Scored League : {max_total_goal_league}')
print(f'Maximum Total Goal : {max_total_goal}')
Most Goal Scored League : english_championship
Maximum Total Goal : 1480
The Team with Highest Points Per Game#
max_point_pg = 0
max_point_pg_team = ''
for league in leagues:
for t in league:
ppg = int(t['points'])/int(t['matches_played'])
if ppg > max_point_pg:
max_point_pg = ppg
max_point_pg_team = t['team']
print(f'Max Point Per Game : {max_point_pg}')
print(f'Team : {max_point_pg_team}')
Max Point Per Game : 2.676470588235294
Team : PSV Eindhoven
The Teams with Highest Points Per Game for Each League#
max_point_pg_list = []
max_point_pg_team_list = []
for league in leagues:
max_point_pg = 0
max_point_pg_team = ''
for t in league:
ppg = int(t['points'])/int(t['matches_played'])
if ppg > max_point_pg:
max_point_pg = ppg
max_point_pg_team = t['team']
max_point_pg_list.append(max_point_pg)
max_point_pg_team_list.append(max_point_pg_team)
plt.figure(figsize=(10,5))
plt.bar(max_point_pg_team_list, max_point_pg_list)
plt.xticks(rotation=90);