pystreamgraph demo¶
This notebook demonstrates the features of pystreamgraph
.
First I'll need to set up some stuff to access the package locally.
%load_ext autoreload
%autoreload 2
import sys, pathlib
repo_root = pathlib.Path("..").resolve()
src_path = repo_root / "src"
if str(src_path) not in sys.path:
sys.path.insert(0, str(src_path))
Then we do some imports, and set up the aesthetics (cmap, opinionated styling, etc.).
%%capture
# Imports
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
# Styling and aesthetics
import opinionated # Opinionated matplotlib styling
plt.style.use('opinionated_rc')
import colormaps as colormaps
from pystreamgraph import plot_streamgraph
# Default colormap for this notebook
DEFAULT_CMAP = colormaps.lipari.cut(0.25, 'right')
We load some data:
# Load sample data
DATA_PATH = os.path.join("..", "data", "sample_streamgraph_data.pkl")
with open(DATA_PATH, "rb") as f:
data = pickle.load(f)
Data Format¶
The pystreamgraph
library expects data in the following format:
- X: 1D array of shape
(n,)
representing the x-axis values (e.g., time points) - Y: 2D array of shape
(k, n)
where:k
is the number of streams/layersn
is the number of time points- Each row represents one stream's values over time
- All values should be non-negative
- labels: Optional list of
k
strings, one for each stream
The data is stacked vertically to create the streamgraph visualization, with each stream's thickness at any point determined by its corresponding Y value.
X, Y, labels = data['X_stream'][1:], data['Y_stream'][:, 1:], data['cluster_labels_to_plot']
# Display shapes and head using DataFrame (as requested)
# X: (n,), Y: (k, n)
display(pd.DataFrame({"X": X}).head())
df = pd.DataFrame(Y.T, columns=labels)
display(df.head())
n = len(X)
k = Y.shape[0]
print(f"n={n}, k={k}")
X | |
---|---|
0 | 2017.0 |
1 | 2018.0 |
2 | 2019.0 |
3 | 2020.0 |
4 | 2021.0 |
Psychological Science and Methods | Social and political theory | Metaphilosophy and Computing | Game Theory and Collective Action | Philosophy of History | Agent-Based Computational Social Science | Cognitive science and NLP | AI Ethics and Interpretability | High-dimensional data visualization | Statistical network analysis | Philosophy and Sociology of Science | Social Epistemology of Science | Complex systems modeling | Scientometrics and bibliometrics | NLP for Digital Humanities | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 87 | 72 | 32 | 41 | 50 | 1 | 47 | 1 | 1 | 7 | 7 | 7 | 14 | 2 | 2 |
1 | 45 | 33 | 67 | 9 | 12 | 1 | 93 | 5 | 10 | 24 | 13 | 9 | 11 | 35 | 3 |
2 | 63 | 38 | 46 | 19 | 11 | 11 | 22 | 10 | 44 | 32 | 22 | 13 | 16 | 51 | 31 |
3 | 14 | 26 | 31 | 16 | 60 | 20 | 16 | 11 | 45 | 60 | 36 | 36 | 60 | 52 | 37 |
4 | 13 | 24 | 22 | 245 | 2 | 83 | 12 | 15 | 14 | 23 | 38 | 29 | 19 | 15 | 33 |
n=9, k=15
Helper function to save images:
# Image saving helper
IMAGES_DIR = os.path.join("..", "images")
os.makedirs(IMAGES_DIR, exist_ok=True)
def save_fig(name: str):
path = os.path.join(IMAGES_DIR, f"{name}.png")
plt.savefig(path, bbox_inches="tight", dpi=250)
#print(f"Saved: {path}")
Basic streamgraph¶
A minimal example using the library defaults.
# Basic plot (uses notebook DEFAULT_CMAP)
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(
X,
Y,
#labels=labels,
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_title("Basic streamgraph")
save_fig("basic")
plt.show()
Sorting streams¶
Use sorted_streams=True
to order streams by their overall size, which is another common streamgraph layout. Really depends on your data, whether this makes sense.
# Sorted streams: largest-first
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, sorted_streams=True, cmap=DEFAULT_CMAP, ax=ax,label_color='black')
ax.set_title("sorted_streams=True")
save_fig("sorting_sorted_true")
plt.show()
Baseline shifts¶
# Basic plot (uses notebook DEFAULT_CMAP)
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(
X,
Y,wiggle_reduction="none",
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_title("wiggle_reduction='none'")
save_fig("basic")
plt.show()
# Basic plot (uses notebook DEFAULT_CMAP)
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(
X,
Y,wiggle_reduction="unweighted",
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_title("wiggle_reduction='unweighted'")
save_fig("basic")
plt.show()
# Basic plot (uses notebook DEFAULT_CMAP)
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(
X,
Y,wiggle_reduction="weighted",
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_title("wiggle_reduction='weighted' (default)")
save_fig("basic")
plt.show()
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(
X,
Y,wiggle_reduction="l1_weighted",
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_title("wiggle_reduction='l1_weighted'")
save_fig("basic")
plt.show()
Ordering streams¶
We can also order the streams, following the ideas in Bartolomeo and Hu 2016.
from pystreamgraph import order_bestfirst, order_twoopt
order = order_twoopt(X, Y, repeats=12, scans=4)
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(
X,
Y,global_order=order,
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_title("global_order=order")
save_fig("basic")
plt.show()
Peak vs max_width label position¶
peak
places labels at each stream's local maximum. max_width
finds where the stream is widest (an alias for max_width
).
# label_position='peak'
fig, ax = plt.subplots(figsize=(10, 8))
ax = plot_streamgraph(X, Y, labels=labels, label_position='peak',ax=ax,label_color='black',cmap=DEFAULT_CMAP,label_kwargs={'path_effects': [path_effects.withStroke(linewidth=3, foreground='white')]})
ax.set_title("label_position='peak'")
save_fig("labels_peak")
plt.show()
Balanced label placement¶
label_position='balanced'
uses an annealed placement to reduce overlaps across the chart. Tweak curve_samples
and font size to suit your layout.
# label_position='max_width'
fig, ax = plt.subplots(figsize=(10, 8))
ax = plot_streamgraph(X, Y, labels=labels, label_position='max_width',ax=ax,label_color='black',cmap=DEFAULT_CMAP)
ax.set_title("label_position='max_width'")
save_fig("labels_max_width")
plt.show()
# label_position='balanced' (fontsize=14)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(
X, Y, labels=labels,
label_position='balanced',
cmap=DEFAULT_CMAP,
curve_samples=24,
label_kwargs={"fontsize": 14}, ax=ax, label_balanced_inset_frac=0.05
)
ax.set_title("label_position='balanced' (fontsize=14)")
save_fig("labels_balanced_f14")
plt.show()
# Balanced label placement demo with enhanced styling
fig, ax = plt.subplots(figsize=(10, 8))
# Configure path effects for better label readability
stroke_effect = path_effects.withStroke(linewidth=2, foreground='white')
bbox_style = dict(
boxstyle="round,pad=0.3",
alpha=0.,
edgecolor='none'
) # We can add a bbox to get padding between the labels.
ax = plot_streamgraph(
X, Y,
labels=labels,
cmap=DEFAULT_CMAP,
sorted_streams=False, # Keep original stream order
label_position='balanced', # Use annealed placement to avoid overlaps
label_balanced_inset_frac=0.02, # Keep labels slightly inside stream boundaries
curve_samples=24, # Smooth curve interpolation
label_fontsize_min=7, # Minimum font size for smaller streams
label_fontsize_max=18, # Maximum font size for larger streams
label_color='black',
label_kwargs={
'path_effects': [stroke_effect], # White outline for better contrast
'bbox': bbox_style # Semi-transparent white background box
},
label_anchor='middle_center',
#label_balanced_debug_segments=True,
ax=ax,
label_balanced_progress=True # Monitor the annealing progress of the label-placement.
)
ax.set_title("label_position='balanced' (annealed de-overlap)")
save_fig("labels_balanced")
plt.show()
Balancing labels: 0%| | 0/138 [00:00<?, ?it/s]
Balancing labels: 0%| | 0/138 [00:00<?, ?it/s]
# Balanced with anchors shown, top_right anchor
fig, ax = plt.subplots(figsize=(10, 8))
ax = plot_streamgraph(
X, Y, labels=labels, cmap=DEFAULT_CMAP,
label_position='max_width',
curve_samples=24,
label_color='black',
label_kwargs={"fontsize": 14},
label_anchor='middle_left',
label_plot_anchors=True,
label_point_kwargs={"markersize": 6, "color": "#111"},
ax=ax
)
ax.set_title("balanced with anchor points (top_right)")
save_fig("labels_balanced_with_anchors")
plt.show()
Envelopes from the layout¶
You can access the raw bottom/top outlines via streamgraph_envelopes
to build custom visuals.
Margins between streams¶
margin_frac
controls the vertical spacing between neighboring streams. Higher values separate the layers more, which can improve readability when streams have strong overlaps.
# margin_frac = 0.0 (no gaps)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, margin_frac=0.0, cmap=DEFAULT_CMAP, ax=ax,label_color='black')
ax.set_title("margin_frac=0.0")
save_fig("margins_0_00")
plt.show()
# margin_frac = 0.05 (small gaps)
fig, ax = plt.subplots(figsize=(11, 5))
ax = plot_streamgraph(X, Y, labels=labels, margin_frac=0.05, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("margin_frac=0.05")
save_fig("margins_0_05")
plt.show()
# margin_frac = 0.5 (large gaps)
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(X, Y, labels=labels, margin_frac=0.5, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("margin_frac=0.5")
save_fig("margins_0_50")
plt.show()
Smoothing (moving average)¶
smooth_window
applies a simple moving average along the x-axis. Larger windows can reduce noise and emphasize broader trends. But be careful, too large windows, (e.g. w=7 below) can hide or falsify information. Below we compare a few settings individually.
# smooth_window = 1 (no smoothing)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, smooth_window=1, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("smooth_window=1")
save_fig("smoothing_w1")
plt.show()
# smooth_window = 2
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, smooth_window=2, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("smooth_window=2")
save_fig("smoothing_w2")
plt.show()
# smooth_window = 7 (more smoothing)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, smooth_window=7, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("smooth_window=7")
save_fig("smoothing_w7")
plt.show()
Boundary curve smoothing¶
curve_samples
controls how smoothly the stream boundaries are interpolated. Higher values increase smoothness.
# curve_samples = 1 (least smooth)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, curve_samples=1, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("curve_samples=1")
save_fig("curves_s1")
plt.show()
# curve_samples = 8 (smoother)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, curve_samples=3, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("curve_samples=8")
save_fig("curves_s8")
plt.show()
# curve_samples = 200 (very smooth)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y,curve_samples=200, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("curve_samples=200")
save_fig("curves_s200")
plt.show()
We have two smoothing modes. PCHIP-smooting (default), which respects the values well, and Catmull-Rom, which can look nicer, but occasionaly might 'overshoot' while interpolating.
# curve_samples = 200 (very smooth)
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, curve_method='catmull_rom', curve_samples=200, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("curve_method='catmull_rom'")
save_fig("curves_s200")
plt.show()
Label placement modes¶
The label_position
option places labels by different heuristics:
peak
: at each stream’s local maximumstart
: at the left edgeend
: at the right edgemax_width
: placed on top of the widest part of the plot.sliding_window
: Implements Bartolomeo and Hu's 2016 placement algorithm.balanced
: Optimized layout accross the plot that minimizes overlap among labels and with the margins, and tries to put labels in an even spread on the widest parts of each stream.
# label_position = 'peak'
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, label_position='peak', cmap=DEFAULT_CMAP, ax=ax,label_color='black',label_kwargs={'path_effects': [path_effects.withStroke(linewidth=3, foreground='white')]})
ax.set_title("label_position='peak'")
save_fig("labels_position_peak")
plt.show()
# label_position = 'start'
fig, ax = plt.subplots(figsize=(11, 7))
ax = plot_streamgraph(X, Y, labels=labels, label_position='start', cmap=DEFAULT_CMAP, ax=ax,label_color='black')
ax.set_yticks([])
ax.set_ylabel('')
ax.set_title("label_position='start'")
save_fig("labels_position_start")
plt.show()
# label_position = 'end'
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, label_position='end', cmap=DEFAULT_CMAP, ax=ax,label_color='black')
ax.set_title("label_position='end'")
save_fig("labels_position_end")
plt.show()
# label_position='max_width'
fig, ax = plt.subplots(figsize=(10, 8))
ax = plot_streamgraph(X, Y, labels=labels, label_position='max_width',ax=ax,label_color='black',cmap=DEFAULT_CMAP)
ax.set_title("label_position='max_width'")
save_fig("labels_max_width")
plt.show()
# label_position='max_width'
fig, ax = plt.subplots(figsize=(10, 8))
ax = plot_streamgraph(X, Y, labels=labels, label_position='sliding_window',ax=ax,label_color='black',cmap=DEFAULT_CMAP)
ax.set_title("label_position='sliding_window'")
save_fig("labels_max_width")
plt.show()
# Balanced label placement demo with enhanced styling
fig, ax = plt.subplots(figsize=(10, 8))
# Configure path effects for better label readability
stroke_effect = path_effects.withStroke(linewidth=2, foreground='white')
bbox_style = dict(
boxstyle="round,pad=0.3",
alpha=0.,
edgecolor='none'
) # We can add a bbox to get padding between the labels.
ax = plot_streamgraph(
X, Y,
labels=labels,
cmap=DEFAULT_CMAP,
sorted_streams=False, # Keep original stream order
label_position='balanced', # Use annealed placement to avoid overlaps
label_balanced_inset_frac=0.02, # Keep labels slightly inside stream boundaries
curve_samples=24, # Smooth curve interpolation
label_fontsize_min=7, # Minimum font size for smaller streams
label_fontsize_max=18, # Maximum font size for larger streams
label_color='black',
label_kwargs={
'path_effects': [stroke_effect], # White outline for better contrast
'bbox': bbox_style # Semi-transparent white background box
},
label_anchor='middle_center',
#label_balanced_debug_segments=True,
ax=ax,
label_balanced_progress=True # Monitor the annealing progress of the label-placement.
)
ax.set_title("label_position='balanced' (annealed de-overlap)")
save_fig("labels_balanced")
plt.show()
Balancing labels: 0%| | 0/138 [00:00<?, ?it/s]
Balancing labels: 0%| | 0/138 [00:00<?, ?it/s]
# Balanced with anchors shown, top_right anchor
fig, ax = plt.subplots(figsize=(10, 8))
ax = plot_streamgraph(
X, Y, labels=labels, cmap=DEFAULT_CMAP,
label_position='balanced',
label_balanced_inset_frac=0.01, # Keep labels slightly inside stream boundaries
label_anchor='middle_left',
label_plot_anchors=True,
label_point_kwargs={"markersize": 6, "color": "#111"},
label_color='black',
label_kwargs={
'path_effects': [stroke_effect], # White outline for better contrast
'bbox': bbox_style # Semi-transparent white background box
},
ax=ax
)
ax.set_title("balanced with anchor points (top_right)")
save_fig("labels_balanced_with_anchors")
plt.show()
Color options¶
There are various ways to set streamgraph-colors. Above we have always simply used a colormap directly. But you can also provide a name of a matplotlib-cmap:
# cmap as named matplotlib colormap
fig, ax = plt.subplots(figsize=(11, 8))
ax = plot_streamgraph(X, Y, labels=labels, cmap='tab20',ax=ax,label_color='black')
ax.set_title("cmap='tab20'")
save_fig("colormaps_named_tab20")
plt.show()
...or an explicit list of colors:
# cmap as explicit color sequence (cycled)
fig, ax = plt.subplots(figsize=(11, 8))
seq = ["#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F"]
ax = plot_streamgraph(X, Y, labels=labels, cmap=seq,ax=ax,label_color='black')
ax.set_title("explicit color sequence")
save_fig("colormaps_sequence")
plt.show()
... or a dictionary of labels to colors, for even more fine-grained control. Note that you can also pass a list of colors to label_color!
# highlight two labels, others default
fig, ax = plt.subplots(figsize=(10, 8))
map_colors = {labels[0]: "#8c564b", labels[1]: "#e15759"}
for i in range(2, len(labels)):
map_colors[labels[i]] = "#808080"
ax = plot_streamgraph(
X, Y, labels=labels, cmap=map_colors,
label_position='max_width', label_kwargs={"fontsize": 9,'path_effects': [path_effects.withStroke(linewidth=3, foreground='white')]}, ax=ax,label_color=[map_colors[x] for x in labels],
label_anchor='middle_left',
)
ax.set_title("Two highlights (brown/red)")
save_fig("colormaps_mapping_grey")
plt.show()
Further appearance tweaks¶
Adjust the stream outlines and transparency with linewidth
and alpha
, and control padding between stacks with pad_frac
.
# linewidth=0.5, alpha=0.9
fig, ax = plt.subplots(figsize=(9, 8))
ax = plot_streamgraph(X, Y, labels=labels, linewidth=0.5, alpha=0.9, cmap=DEFAULT_CMAP, ax=ax)
ax.set_title("linewidth=0.5, alpha=0.9")
save_fig("appearance_line_alpha_1")
plt.show()
# linewidth=1.5, alpha=0.6
fig, ax = plt.subplots(figsize=(9, 8))
ax = plot_streamgraph(X, Y, labels=labels,sorted_streams=True, linewidth=2, alpha=0.6, cmap=DEFAULT_CMAP, ax=ax,label_color='black', margin_frac=0.7,smooth_window=3)
ax.set_title("linewidth=1.5, alpha=0.6")
save_fig("appearance_line_alpha_2")
plt.show()
# End labels with spacing + connectors
fig, ax = plt.subplots(figsize=(11, 9))
ax = plot_streamgraph(
X, Y, labels=labels,
ax=ax,
cmap=DEFAULT_CMAP,
label_position='end',
label_min_gap_frac=0.015,
label_edge_offset_frac=0.01,
label_connectors=True,
label_connector_alpha=0.3,
label_connector_linewidth=1.0,
label_color='black',
)
ax.set_title("End labels with spacing + connectors")
save_fig("labels_end_spacing_connectors")
plt.show()
# Start labels with spacing
fig, ax = plt.subplots(figsize=(11, 9))
ax = plot_streamgraph(
X, Y, labels=labels,
label_position='start',
label_min_gap_frac=0.02,
label_edge_offset_frac=0.03,
label_connector_linewidth=1.0,
label_color='black',
label_connectors=True,
cmap=DEFAULT_CMAP,
ax=ax,
)
ax.set_yticks([])
ax.set_ylabel('')
ax.set_title("Start labels with spacing")
save_fig("labels_start_spacing")
plt.show()
Base example¶
Basic self contained example for the Quick-start.
# import numpy as np
import matplotlib.pyplot as plt
from pystreamgraph import plot_streamgraph
rng = np.random.default_rng(7)
n, k = 40, 5
X_ = np.arange(n)
base = np.linspace(0, 2*np.pi, n)
Y_ = []
for i in range(k):
phase = rng.uniform(0, 2*np.pi)
amp = rng.uniform(0.6, 1.3)
y = amp * (np.sin(base + phase) + 1.2) + rng.normal(0, 0.08, size=n) + 0.15
y = np.clip(y, 0, None)
Y_.append(y)
Y_ = np.vstack(Y_)
ax = plot_streamgraph(X_, Y_, labels=['Label ' + x for x in list("ABCDE")], sorted_streams=False,
margin_frac=0.10, smooth_window=3, cmap=colormaps.brwnyl,
curve_samples=40,alpha=0.9,label_color='lightgrey',label_placement=True,label_position='balanced')
ax.set_title("Streamgraph")
save_fig("streamgraph_base")
plt.show()