Transforms

Transforms provide methods to manipulate and compute on data while plotting them.

Group transform

Group transform allows to compute summary values before plotting them. It is often used to make bar charts.

import polars as pl
from pyobsplot import Plot, d3, Math, js

penguins = pl.read_csv("data/penguins.csv")

Plot.plot(
    {
        "y": {"grid": True, "percent": True},
        "marks": [
            Plot.barY(
                penguins,
                Plot.groupZ(
                    {"y": "proportion-facet"}, {"fill": "sex", "fx": "species"}
                ),
            ),
            Plot.ruleY([0, 1]),
        ],
    }
)

Dodge transform

The Dodge transform allows to pack marks without overlapping. The following chart shows data about IPO offerings in the US.

from datetime import datetime

ipos = pl.read_csv("data/ipos.csv", try_parse_dates=True).filter(
    pl.col("date") > datetime(1991, 1, 1)
)


Plot.plot(
    {
        "insetRight": 10,
        "height": 600,
        "width": 600,
        "marks": [
            Plot.dot(
                ipos, Plot.dodgeY({"x": "date", "r": "rMVOP", "fill": "currentColor"})
            ),
            Plot.text(
                ipos,
                Plot.dodgeY(
                    {
                        "filter": js("(d) => d.rMVOP > 5e3"),
                        "x": "date",
                        "r": "rMVOP",
                        "text": js("d => (d.rMVOP / 1e3).toFixed()"),
                        "fill": "white",
                        "fontWeight": "bold",
                    }
                ),
            ),
        ],
    }
)

Hexbin transform

The Hexbin transform aggregates two dimensional points into hexagonal bins.

Plot.plot(
    {
        "inset": 10,
        "color": {"scheme": "ylgnbu", "legend": True, "label": "Count"},
        "marks": [
            Plot.hexagon(
                penguins,
                Plot.hexbin(
                    {"r": "count", "fill": "count"},
                    {
                        "x": "culmen_length_mm",
                        "y": "flipper_length_mm",
                        "binWidth": 50,
                    },
                ),
            ),
            Plot.text(
                penguins,
                Plot.hexbin(
                    {"text": "count"},
                    {
                        "x": "culmen_length_mm",
                        "y": "flipper_length_mm",
                        "fill": "black",
                        "stroke": "white",
                        "binWidth": 50,
                    },
                ),
            ),
        ],
    }
)

Map transform

Map transform family can be used to normalize values.

stocks = pl.read_csv("data/stocks.csv", try_parse_dates=True)

Plot.plot(
    {
        "marginRight": 40,
        "y": {
            "type": "log",
            "grid": True,
            "label": "↑ Change in price (%)",
            "tickFormat": js('(f => x => f((x - 1) * 100))(d3.format("+d"))'),
        },
        "marks": [
            Plot.ruleY([1]),
            Plot.line(
                stocks, Plot.normalizeY({"x": "Date", "y": "Close", "stroke": "Symbol"})
            ),
            Plot.text(
                stocks,
                Plot.selectLast(
                    Plot.normalizeY(
                        {
                            "x": "Date",
                            "y": "Close",
                            "z": "Symbol",
                            "text": "Symbol",
                            "textAnchor": "start",
                            "dx": 3,
                        }
                    )
                ),
            ),
        ],
    }
)

Transforms composition

Several group and map transforms can be applied and composed to create complex representations, such as this distribution of group ages by US states adapted from the Map transform notebook.

# Load and prepare data
stateage = (
    pl.read_csv("data/us-population-state-age.csv")
    .unpivot(index="name", variable_name="age", value_name="population")
    .rename({"name": "state"})
)
# TODO: remove when Bigint error fixed upstream in Observable Plot
stateage = stateage.with_columns(pl.col("population").cast(pl.Int32))

# Compute list of age groups
ages = stateage.get_column("age").unique(maintain_order=True).to_list()
# Compute list of states sorted by the proportion of age ≥80
states = (
    stateage.with_columns(
        (pl.col("population") / pl.col("population").sum().over("state")).alias(
            "percent"
        )
    )
    .filter(pl.col("age") == "≥80")
    .sort(pl.col("percent"), descending=True)
    .get_column("state")
    .to_list()
)

# Plot.normalizeX default arguments
xy = {"basis": "sum", "z": "state", "x": "population", "y": "state"}

# Plot specification
Plot.plot(
    {
        "height": 660,
        "grid": True,
        "x": {"axis": "top", "label": "Percent (%) →", "transform": js("d => d * 100")},
        "y": {
            "domain": states,
            "axis": None,
        },
        "color": {
            "scheme": "spectral",
            "domain": ages,
            "legend": True,
        },
        "marks": [
            Plot.ruleX([0]),
            Plot.ruleY(
                stateage,
                Plot.groupY({"x1": "min", "x2": "max"}, Plot.normalizeX(xy)),
            ),
            Plot.dot(
                stateage,
                Plot.normalizeX({**xy, "fill": "age"}),
            ),
            Plot.text(
                stateage,
                Plot.selectMinX(
                    Plot.normalizeX(
                        {
                            **xy,
                            "textAnchor": "end",
                            "dx": -6,
                            "text": "state",
                        }
                    )
                ),
            ),
        ],
    }
)

<1010-1920-2930-3940-4950-5960-6970-79≥80