Transforms

Transforms provide methods to manipulate and compute on data while plotting them.

Group transform

Group transform allows to compute summary values before plotting them. It is often used to make bar charts.

import polars as pl
from pyobsplot import Plot, d3, Math, js

penguins = pl.read_csv("data/penguins.csv")

Plot.plot(
    {
        "y": {"grid": True, "percent": True},
        "marks": [
            Plot.barY(
                penguins,
                Plot.groupZ(
                    {"y": "proportion-facet"}, {"fill": "sex", "fx": "species"}
                ),
            ),
            Plot.ruleY([0, 1]),
        ],
    }
)
AdelieChinstrapGentoospecies0102030405060708090100↑ Frequency (%)

Dodge transform

The Dodge transform allows to pack marks without overlapping. The following chart shows data about IPO offerings in the US.

from datetime import datetime

ipos = pl.read_csv("data/ipos.csv", try_parse_dates=True).filter(
    pl.col("date") > datetime(1991, 1, 1)
)


Plot.plot(
    {
        "insetRight": 10,
        "height": 600,
        "width": 600,
        "marks": [
            Plot.dot(
                ipos, Plot.dodgeY({"x": "date", "r": "rMVOP", "fill": "currentColor"})
            ),
            Plot.text(
                ipos,
                Plot.dodgeY(
                    {
                        "filter": js("(d) => d.rMVOP > 5e3"),
                        "x": "date",
                        "r": "rMVOP",
                        "text": js("d => (d.rMVOP / 1e3).toFixed()"),
                        "fill": "white",
                        "fontWeight": "bold",
                    }
                ),
            ),
        ],
    }
)
19921994199619982000200220042006200820102012104281613131298776

Hexbin transform

The Hexbin transform aggregates two dimensional points into hexagonal bins.

Plot.plot(
    {
        "inset": 10,
        "color": {"scheme": "ylgnbu", "legend": True, "label": "Count"},
        "marks": [
            Plot.hexagon(
                penguins,
                Plot.hexbin(
                    {"r": "count", "fill": "count"},
                    {
                        "x": "culmen_length_mm",
                        "y": "flipper_length_mm",
                        "binWidth": 50,
                    },
                ),
            ),
            Plot.text(
                penguins,
                Plot.hexbin(
                    {"text": "count"},
                    {
                        "x": "culmen_length_mm",
                        "y": "flipper_length_mm",
                        "fill": "black",
                        "stroke": "white",
                        "binWidth": 50,
                    },
                ),
            ),
        ],
    }
)
1020Count175180185190195200205210215220225230↑ flipper_length_mm3540455055culmen_length_mm →142611209111275101039322657181533115143211641752810189221734221

Map transform

Map transform family can be used to normalize values.

stocks = pl.read_csv("data/stocks.csv", try_parse_dates=True)

Plot.plot(
    {
        "marginRight": 40,
        "y": {
            "type": "log",
            "grid": True,
            "label": "↑ Change in price (%)",
            "tickFormat": js('(f => x => f((x - 1) * 100))(d3.format("+d"))'),
        },
        "marks": [
            Plot.ruleY([1]),
            Plot.line(
                stocks, Plot.normalizeY({"x": "Date", "y": "Close", "stroke": "Symbol"})
            ),
            Plot.text(
                stocks,
                Plot.selectLast(
                    Plot.normalizeY(
                        {
                            "x": "Date",
                            "y": "Close",
                            "z": "Symbol",
                            "text": "Symbol",
                            "textAnchor": "start",
                            "dx": 3,
                        }
                    )
                ),
            ),
        ],
    }
)
−40−30−20−10+0+100+200+300+400+500↑ Change in price (%)20142015201620172018AAPLAMZNGOOGIBM

Transforms composition

Several group and map transforms can be applied and composed to create complex representations, such as this distribution of group ages by US states adapted from the Map transform notebook.

# Load and prepare data
stateage = (
    pl.read_csv("data/us-population-state-age.csv")
    .unpivot(index="name", variable_name="age", value_name="population")
    .rename({"name": "state"})
)
# TODO: remove when Bigint error fixed upstream in Observable Plot
stateage = stateage.with_columns(pl.col("population").cast(pl.Int32))

# Compute list of age groups
ages = stateage.get_column("age").unique(maintain_order=True).to_list()
# Compute list of states sorted by the proportion of age ≥80
states = (
    stateage.with_columns(
        (pl.col("population") / pl.col("population").sum().over("state")).alias(
            "percent"
        )
    )
    .filter(pl.col("age") == "≥80")
    .sort(pl.col("percent"), descending=True)
    .get_column("state")
    .to_list()
)

# Plot.normalizeX default arguments
xy = {"basis": "sum", "z": "state", "x": "population", "y": "state"}

# Plot specification
Plot.plot(
    {
        "height": 660,
        "grid": True,
        "x": {"axis": "top", "label": "Percent (%) →", "transform": js("d => d * 100")},
        "y": {
            "domain": states,
            "axis": None,
        },
        "color": {
            "scheme": "spectral",
            "domain": ages,
            "legend": True,
        },
        "marks": [
            Plot.ruleX([0]),
            Plot.ruleY(
                stateage,
                Plot.groupY({"x1": "min", "x2": "max"}, Plot.normalizeX(xy)),
            ),
            Plot.dot(
                stateage,
                Plot.normalizeX({**xy, "fill": "age"}),
            ),
            Plot.text(
                stateage,
                Plot.selectMinX(
                    Plot.normalizeX(
                        {
                            **xy,
                            "textAnchor": "end",
                            "dx": -6,
                            "text": "state",
                        }
                    )
                ),
            ),
        ],
    }
)
<1010-1920-2930-3940-4950-5960-6970-79≥80
02468101214161820Percent (%) →ALAKAZARCACOCTDEDCFLGAHIIDILINIAKSKYLAMEMDMAMIMNMSMOMTNENVNHNJNMNYNCNDOHOKORPARISCSDTNTXUTVTVAWAWVWIWYPR