Box plots: Show distribution of data through quartiles
Violin plots: Combine box plot with kernel density
Code
import matplotlib.pyplot as pltimport seaborn as snsimport numpy as npdata = [np.random.normal(0, std, 100) for std inrange(1, 5)]plt.figure(figsize=(12, 3))plt.subplot(121)plt.boxplot(data)plt.title('Box Plot')plt.subplot(122)sns.violinplot(data)plt.title('Violin Plot')plt.tight_layout()plt.show()
Network Graphs and Trees
Network graphs: Show relationships between entities
Tree diagrams: Display hierarchical structures
Code
import matplotlib.pyplot as pltimport networkx as nximport numpy as np# Create figureplt.figure(figsize=(12, 3))# Network graph (left subplot)plt.subplot(121)G = nx.random_geometric_graph(15, 0.3) # Reduced nodes for claritypos = nx.spring_layout(G, k=1, seed=42) # Better layout with fixed seednx.draw_networkx_nodes(G, pos, node_color='lightblue', node_size=500, edgecolors='navy', linewidths=1)nx.draw_networkx_edges(G, pos, edge_color='gray', width=1, alpha=0.5)nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')plt.title('Network Graph', pad=10)# Tree diagram (right subplot)plt.subplot(122)T = nx.balanced_tree(2, 2) # Create a balanced tree with 2 children, depth 2pos_tree = nx.spring_layout(T, k=1.5, seed=42)nx.draw_networkx_nodes(T, pos_tree, node_color='lightgreen', node_size=500, edgecolors='darkgreen', linewidths=1)nx.draw_networkx_edges(T, pos_tree, edge_color='forestgreen', width=1)nx.draw_networkx_labels(T, pos_tree, font_size=8, font_weight='bold')plt.title('Tree', pad=10)plt.tight_layout()plt.show()
Ridgeline Plots
Ridgeline plots: Show distribution of data across categories
Code
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport joypy# Create realistic temperature distributionsnp.random.seed(42)months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']data = []# Temperature parameters for Helsinki, Finland (as an example)mean_temps = [-3.5, # Jan-4.5, # Feb-1.0, # Mar4.5, # Apr10.8, # May15.5, # Jun18.0, # Jul16.3, # Aug11.5, # Sep6.6, # Oct1.6, # Nov-2.0# Dec]# Winter months have more variance than summer monthsvariances = [2.5, # Jan2.5, # Feb2.2, # Mar2.0, # Apr1.8, # May1.5, # Jun1.2, # Jul1.5, # Aug1.8, # Sep2.0, # Oct2.2, # Nov2.5# Dec]for month, mean_temp, variance inzip(months, mean_temps, variances):# Add some random noise to make it more natural distribution = np.random.normal(loc=mean_temp, scale=variance, size=1000)# Add slight skewness to winter months (more extreme cold than warm days)if mean_temp <5: distribution = distribution -0.3* np.abs(distribution) data.append(pd.DataFrame({'temperature': distribution,'month': month }))df = pd.concat(data, ignore_index=True)# Create the ridgeline plotjoypy.joyplot( data=df, by="month", column="temperature", colormap=plt.cm.viridis, title="Monthly Temperature Distributions", labels=months, range_style='all', tails=0.2, overlap=0.7, grid=True, figsize=(12, 4))plt.xlabel("Temperature (°C)")plt.show()
How to Plot in Python
Python Plotting Libraries
There are many libraries for data visualization in Python
Matplotlib: The foundation for most Python plotting libraries
Seaborn: Interface for statistical data visualization
Plotly: Interactive and customizable plotting library
Bokeh: Interactive and complex plots
Joypy: Easy ridgeline plots
Matplotlib Module
Matplotlib is the foundation for most Python plotting libraries
Customizable and suitable for high-quality figures
Provides easy to use functions for plotting
Works well with Pandas DataFrames
Basic Matplotlib Example
import matplotlib.pyplot as plt # .pyplot is the main module in the packageplt.plot([1, 2, 3, 4], [10, 20, 25, 30]) # first is x-axis, second is y-axisplt.show()
Task: Create two line plots of the following data:
# Make sure to label the plots! Color and marker are optional.import numpy as npx = np.linspace(0, 10, 100) # 100 points between 0 and 10y1 = np.sin(x) # sine functiony2 = np.cos(x) # cosine function
Let’s load the temperature dataset from our last tutorial.
import pandas as pddf = pd.read_excel('data/temp_anomaly_data.xlsx')print(df.head())
Year Month Anomaly
0 1880 Jan -0.20
1 1881 Jan -0.20
2 1882 Jan 0.16
3 1883 Jan -0.29
4 1884 Jan -0.13
Example of a bad plot
Code
import pandas as pdimport matplotlib.pyplot as plt# Convert Month to numeric for proper orderingmonth_map = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}df['Month_num'] = df['Month'].map(month_map)# Sort by Year and Monthdf = df.sort_values(['Year', 'Month_num'])# Create the plotplt.figure(figsize=(12, 4))# Plot each year as a separate linefor year in df['Year'].unique(): year_data = df[df['Year'] == year] plt.plot(year_data['Month_num'], year_data['Anomaly'], label=str(year), marker='o')# Customize the plotplt.title('Temperature Anomalies by Month for Each Year')plt.xlabel('Month')plt.ylabel('Temperature Anomaly (°C)')plt.grid(True, linestyle='--', alpha=0.7)# Set x-axis ticks to show month namesplt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])# Adjust layout to prevent legend cutoffplt.tight_layout()plt.show()
An okay plot
Code
import pandas as pdimport matplotlib.pyplot as plt# Define correct month ordermonth_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']# Pivot the data and reorder columnspivot_df = df.pivot(index='Year', columns='Month', values='Anomaly')pivot_df = pivot_df[month_order] # Reorder columns according to month_order# Create the plot with a blue-to-red gradient for winter-to-summerfig, ax = plt.subplots(figsize=(12, 4)) # Create figure and axes objects# Create color gradientcolors = []for i inrange(12):if i <=5: # January to June r = i /5 b =1- (i /5) colors.append((r, 0, b))else: # July to December r =1- ((i-6) /5) b = (i-6) /5 colors.append((r, 0, b))pivot_df.plot(ax=ax, marker='x', linewidth=1, alpha=0.5, color=colors)plt.title('Temperature Anomalies by Month for Each Year')plt.xlabel('Month')plt.ylabel('Temperature Anomaly (°C)')plt.grid(True, linestyle='--', alpha=0.7)plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Year')plt.tight_layout()plt.show()
A better plot
Code
# Calculate yearly averagesyearly_means = df.groupby('Year')['Anomaly'].agg(['mean', 'std'])# Create the plotplt.figure(figsize=(12, 4))# Plot mean values as a lineplt.plot(yearly_means.index, yearly_means['mean'], color='navy', linewidth=2, marker='o', label='Mean Temperature Anomaly')# Add shaded area for standard deviationplt.fill_between(yearly_means.index, yearly_means['mean'] - yearly_means['std'], yearly_means['mean'] + yearly_means['std'], color='lightblue', alpha=0.3, label='±1 Standard Deviation')# Customize the plotplt.title('Yearly Average Temperature Anomalies with Confidence Interval')plt.xlabel('Year')plt.ylabel('Temperature Anomaly (°C)')plt.grid(True, linestyle='--', alpha=0.7)plt.legend()# Add zero reference lineplt.axhline(y=0, color='red', linestyle='--', alpha=0.3)plt.tight_layout()plt.show()
A good plot
Code
# Read the datadf = pd.read_excel('data/temp_anomaly_data.xlsx')# Aggregate to yearly averagesyearly_df = df.groupby('Year')['Anomaly'].mean().reset_index()# Set the stylefig, ax = plt.subplots(figsize=(12, 4))# Create the main line plotplt.plot(yearly_df['Year'], yearly_df['Anomaly'], color='#FF5733', linewidth=1.5, alpha=0.7)# Calculate rolling mean on yearly datarolling_mean = yearly_df['Anomaly'].rolling(window=10, center=True, min_periods=5).mean()plt.plot(yearly_df['Year'], rolling_mean, color='#C70039', linewidth=2.5, label='10-year Moving Average')# Fill between the line and zeroplt.fill_between(yearly_df['Year'], yearly_df['Anomaly'], 0, where=(yearly_df['Anomaly'] >=0), color='#FF5733', alpha=0.3, label='Positive Anomaly')plt.fill_between(yearly_df['Year'], yearly_df['Anomaly'], 0, where=(yearly_df['Anomaly'] <0), color='#3498DB', alpha=0.3, label='Negative Anomaly')# Customize the plotplt.title('Global Temperature Anomalies (1880-2023)', fontsize=14, pad=15)plt.xlabel('Year', fontsize=12)plt.ylabel('Temperature Anomaly (°C)', fontsize=12)plt.grid(True, alpha=0.3)plt.legend()# Add a horizontal line at y=0plt.axhline(y=0, color='black', linestyle='--', alpha=0.3)# Add text annotation for contextplt.text(1890, 1.15, 'Temperature anomalies relative to\n1951-1980 average', fontsize=10, alpha=0.7)plt.tight_layout()plt.show()
How to build such a plot?
Think: about what you want to build
Describe: what you want to build in detail
Use AI: to build the plot for you
Use Libraries: documentation to fine-tune the plot
. . .
Tip
As usual, the best way to learn is by doing! AI makes it very easy to get started.
Good Plotting in Action
Task: Create a plot of your own for the data.
. . .
# TODO: Load the data from the `temp_anomaly_data.xlsx` file you have saved last lecture yourself and plot the temperature anomaly data. Find a way to make the plot meaningful and attractive in order to tell a story for the reader.# YOUR CODE HERE