import polars as pl
from pyobsplot import Plot, d3, Math, js
penguins = pl.read_csv("data/penguins.csv")
Plot.plot(
{
"y": {"grid": True, "percent": True},
"marks": [
Plot.barY(
penguins,
Plot.groupZ(
{"y": "proportion-facet"}, {"fill": "sex", "fx": "species"}
),
),
Plot.ruleY([0, 1]),
],
}
)Transforms
Transforms provide methods to manipulate and compute on data while plotting them.
Group transform
Group transform allows to compute summary values before plotting them. It is often used to make bar charts.
Dodge transform
The Dodge transform allows to pack marks without overlapping. The following chart shows data about IPO offerings in the US.
from datetime import datetime
ipos = pl.read_csv("data/ipos.csv", try_parse_dates=True).filter(
pl.col("date") > datetime(1991, 1, 1)
)
Plot.plot(
{
"insetRight": 10,
"height": 600,
"width": 600,
"marks": [
Plot.dot(
ipos, Plot.dodgeY({"x": "date", "r": "rMVOP", "fill": "currentColor"})
),
Plot.text(
ipos,
Plot.dodgeY(
{
"filter": js("(d) => d.rMVOP > 5e3"),
"x": "date",
"r": "rMVOP",
"text": js("d => (d.rMVOP / 1e3).toFixed()"),
"fill": "white",
"fontWeight": "bold",
}
),
),
],
}
)Hexbin transform
The Hexbin transform aggregates two dimensional points into hexagonal bins.
Plot.plot(
{
"inset": 10,
"color": {"scheme": "ylgnbu", "legend": True, "label": "Count"},
"marks": [
Plot.hexagon(
penguins,
Plot.hexbin(
{"r": "count", "fill": "count"},
{
"x": "culmen_length_mm",
"y": "flipper_length_mm",
"binWidth": 50,
},
),
),
Plot.text(
penguins,
Plot.hexbin(
{"text": "count"},
{
"x": "culmen_length_mm",
"y": "flipper_length_mm",
"fill": "black",
"stroke": "white",
"binWidth": 50,
},
),
),
],
}
)Map transform
Map transform family can be used to normalize values.
stocks = pl.read_csv("data/stocks.csv", try_parse_dates=True)
Plot.plot(
{
"marginRight": 40,
"y": {
"type": "log",
"grid": True,
"label": "↑ Change in price (%)",
"tickFormat": js('(f => x => f((x - 1) * 100))(d3.format("+d"))'),
},
"marks": [
Plot.ruleY([1]),
Plot.line(
stocks, Plot.normalizeY({"x": "Date", "y": "Close", "stroke": "Symbol"})
),
Plot.text(
stocks,
Plot.selectLast(
Plot.normalizeY(
{
"x": "Date",
"y": "Close",
"z": "Symbol",
"text": "Symbol",
"textAnchor": "start",
"dx": 3,
}
)
),
),
],
}
)Transforms composition
Several group and map transforms can be applied and composed to create complex representations, such as this distribution of group ages by US states adapted from the Map transform notebook.
# Load and prepare data
stateage = (
pl.read_csv("data/us-population-state-age.csv")
.unpivot(index="name", variable_name="age", value_name="population")
.rename({"name": "state"})
)
# TODO: remove when Bigint error fixed upstream in Observable Plot
stateage = stateage.with_columns(pl.col("population").cast(pl.Int32))
# Compute list of age groups
ages = stateage.get_column("age").unique(maintain_order=True).to_list()
# Compute list of states sorted by the proportion of age ≥80
states = (
stateage.with_columns(
(pl.col("population") / pl.col("population").sum().over("state")).alias(
"percent"
)
)
.filter(pl.col("age") == "≥80")
.sort(pl.col("percent"), descending=True)
.get_column("state")
.to_list()
)
# Plot.normalizeX default arguments
xy = {"basis": "sum", "z": "state", "x": "population", "y": "state"}
# Plot specification
Plot.plot(
{
"height": 660,
"grid": True,
"x": {"axis": "top", "label": "Percent (%) →", "transform": js("d => d * 100")},
"y": {
"domain": states,
"axis": None,
},
"color": {
"scheme": "spectral",
"domain": ages,
"legend": True,
},
"marks": [
Plot.ruleX([0]),
Plot.ruleY(
stateage,
Plot.groupY({"x1": "min", "x2": "max"}, Plot.normalizeX(xy)),
),
Plot.dot(
stateage,
Plot.normalizeX({**xy, "fill": "age"}),
),
Plot.text(
stateage,
Plot.selectMinX(
Plot.normalizeX(
{
**xy,
"textAnchor": "end",
"dx": -6,
"text": "state",
}
)
),
),
],
}
)<1010-1920-2930-3940-4950-5960-6970-79≥80