DataFrames Visualization#

Section Title: DataFrames Visualization

There are numerous DataFrame plotting methods available. Throughout this section, we’ll use the following dataframes.

import pandas as pd
df_grades = pd.read_csv('https://raw.githubusercontent.com/datasmp/datasets/main/grades.csv')
df_grades.head()
Name ID Grade Gender HW Test-1 Test-2 Test-3 Test-4 Final
0 mqtvy 37047871 10 M 30 91 69 93 17 50
1 jbbsx 35439616 11 F 6 18 93 9 98 91
2 mrvab 35543247 11 M 78 92 60 43 34 26
3 bjyve 61282135 9 M 60 8 10 99 80 87
4 rlpsr 53448034 10 M 3 38 45 43 79 69
df_stock = pd.read_excel('https://raw.githubusercontent.com/datasmp/datasets/main/stock.xlsx')
df_stock.head()
Date APPLE TESLA AMAZON VISA SP500
0 2020-01-02 74.33 86.05 1898.01 189.66 3257.85
1 2020-01-03 73.61 88.60 1874.97 188.15 3234.85
2 2020-01-06 74.20 90.31 1902.88 187.74 3246.28
3 2020-01-07 73.85 93.81 1906.86 187.24 3237.18
4 2020-01-08 75.04 98.43 1891.97 190.45 3253.05

Plot Types#

Line Plot#

  • The default value of the kind parameter is ‘line’.

# all columns
df_stock.plot();     # kind = 'line'
_images/2d12cafe34984ef955baef86687974766b35d938a81307de7893e3b3be589941.png
df_stock.plot( y='AMAZON');
_images/e40d50f41100994541d9c36ef5699c6ff8ca1a29134623d13e120e5ab515a309.png
df_stock.plot( y=['TESLA','APPLE']);     # two columns
_images/def9b9e17299e4fecdfdc5daa4e619bab6af271842084c41b2dc91ee88bdf5dc.png

Scatter Plot#

  • x-coordinates should be provided.

df_stock.plot(x='APPLE', y='AMAZON', kind='scatter');
_images/79581fb3c1df84042cd125d39657cb3a8e798fa676419ca6de47ecc2380fbb4e.png

Histogram#

df_stock.plot(y='AMAZON', kind='hist');
_images/a7ba4decd890db3172adb862773204029e826d5a26fd0d21fb4e458534f1b43d.png
# horizontal
df_stock.plot(y='AMAZON', kind='hist', orientation='horizontal');
_images/820b2ea61fc4cac1d24cb9edaaf4e2d5f5feceba5700fcb59c5a68676bc24a52.png
df_stock.hist(layout=(1,5), figsize=(10,4));
_images/efd53d7a644e18299742ebbcd816d6cb838d648a6b38c24ea21ac1f2cfdbf055.png

Bar Plot#

df_stock.head().plot(kind='bar');
_images/de2a6a1b089cfbb878c2abddd3bb8437b86af85b76b6269f70eb092aa91f4ef9.png
df_stock.head().plot(kind='barh');
_images/b6a52291af0617e24cebbefab372384ccac02b7ab8839b79fc2adad9077d0a0b.png

Stacked Bar#

df_stock.head().plot(kind='bar', stacked=True);
_images/8f89a242d46af48f4e281d2fdbf9197f82bafdd56f96140916863f0dda446332.png
df_stock.head().plot(kind='barh', stacked=True);
_images/0aac60dca0ab2f41b70a753b3e4420ae83c571cc6c69c887d6015f40a58f3a42.png

Pie Chart#

df_grades.Gender.value_counts()
Gender
M    54
F    46
Name: count, dtype: int64
df_grades.Gender.value_counts().plot(kind='pie', autopct='%.0f%%' );
_images/880f69b44fd9478276a175f604b05f12d28424a81bacbd9c544aaa3398e5a799.png

kde#

df_stock.plot(kind='kde');
_images/fccfac1edabe9e46b3cd3e2c6e6392d870a047f1c8d62611d2540fc50774b3c5.png
df_stock.plot(y='VISA', kind='kde');
_images/bbcbd03c955ad2cb7a37e39657b38c100edab0672352a4151383ce7ee86e81f8.png

Box Plot#

df_stock.plot(kind='box');
_images/1d6b6714dff8091077275a3eedcdf9de65881b18dcdb1e2220234de218071298.png
df_stock.plot(y='VISA', kind='box');
_images/2344cbbfb306cdc181a9d9b2642476652336f7a3e20c7c41450766fb3e299323.png
# horizontal
df_stock.plot(y='VISA', kind='box', vert=False);
_images/2d4a49bfc5cc73c86e5b96230d2acc34fbf7e3713db183b042da7ec44522e6d2.png
df_stock.boxplot();
_images/1c25bb7ccae950da27de9b021cad015ea70afcd78ad117e7ef87f0408464e694.png

Area#

df_stock.plot(kind='area');
_images/9653cb8a5d317f0a2600e9961cd31a380a892a1436b02e250c2a69fce010e896.png

Plotting Parameters#

Color#

df_stock.plot( y='AMAZON', c='red'); 
_images/5d803a28a01e4d7200bc1eccdd4f9f3f64e510c1f3165b3ed46caa4b5587154f.png

Figsize#

df_stock.plot( y='AMAZON', figsize=(20,4)); 
_images/e66c92bc8f3729155f387d14c7a73bc4ce815ae66f0efcec6a404b2a125e9a2b.png

Title#

df_stock.plot( y='AMAZON', title='Stock Prices'); 
_images/b5f25cd0103036e383712475e4fe0a97669cab92cbf321c2b69e88ef3858fa39.png

Axis Labels#

df_stock.plot( y='AMAZON', xlabel='Day', ylabel='Stock Values'); 
_images/27ca6ea47af134e4f794805b856b72104a714f019129acc780d69480b69c148c.png

Linewidth#

df_stock.plot( y='AMAZON', linewidth=5); 
_images/b5bfe4738a8caa2ebd4cc827cd877d10c1a6c3b45438f3c641e65ad9c4efbce7.png

Linestyle#

df_stock.plot( y='AMAZON', linestyle='dashed'); 
_images/9464f3e898d24cb7e6871a07bfcf766add7f9ef816086da161a0cf8987ffb244.png
df_stock.plot( y='AMAZON', linestyle='dotted'); 
_images/0dcada2d9ceaa99f111bbc381e59349b0b18168cfa1814ca24e4b97f55128330.png

Size#

df_stock.plot(x='APPLE', y='AMAZON', kind='scatter', s=20);
_images/79581fb3c1df84042cd125d39657cb3a8e798fa676419ca6de47ecc2380fbb4e.png

Matplotlib and Dataframes#

import matplotlib.pyplot as plt

In the following code:

  • the index_col parameter sets the first column as the index of the DataFrame.

  • The parse_dates parameter converts string index values into Timestamps so they are considered as dates.

df_stock = pd.read_excel('https://raw.githubusercontent.com/datasmp/datasets/main/stock.xlsx', index_col=0, parse_dates=True)
df_stock.head()
APPLE TESLA AMAZON VISA SP500
Date
2020-01-02 74.33 86.05 1898.01 189.66 3257.85
2020-01-03 73.61 88.60 1874.97 188.15 3234.85
2020-01-06 74.20 90.31 1902.88 187.74 3246.28
2020-01-07 73.85 93.81 1906.86 187.24 3237.18
2020-01-08 75.04 98.43 1891.97 190.45 3253.05

Scatter Plot#

  • x-coordinates should be provided.

plt.figure(figsize=(20,4))
plt.scatter(df_stock.index, df_stock['APPLE'], c='r');
_images/778ebfea5c12d27ce2545b5bacecd0f0df12a9849006fd6e32ee5db8e85e60ef.png
plt.figure(figsize=(10,4))
plt.scatter(df_stock['VISA'], df_stock['APPLE'], c='navy', marker='*');
_images/b52e1afaf426ddc6673bf21c22a21e6f8f9885d91702885e27b36e440b9b043b.png

Line Plot#

  • The default x values are the indexes of the DataFrame.

plt.figure(figsize=(20,4))
plt.plot(df_stock['APPLE'], c='r');
_images/58afdf9b359c755ad230cf55ffb189400c61a9a44fa726543e72dfa3e9ab6908.png
plt.figure(figsize=(20,4))
plt.plot(df_stock['VISA'], c='g', label='VISA')
plt.plot(df_stock['APPLE'], c='r', label='APPLE', linestyle='dotted')
plt.legend();
_images/f7955a74e1076200e7a56504f4133e7a36a103f90fd52269a133d09f93b05994.png
plt.figure(figsize=(20,5))
plt.axes().set_facecolor('black');
plt.plot(df_stock['VISA'], color='orange')
plt.grid(visible=False);
_images/79c1794a99a903629634817d6c33570bd6d174f6ea57bb40d600834e00eaa4a0.png

Histogram#

plt.hist(df_stock['APPLE']);
_images/1a1a7c3d2872fedf07394e6db35925c44fe86f43ae4ad029aefc69fee76a904a.png