Back to Smile

Declarative Data Visualization

studio/src/universal/notebooks/plot-vega.ipynb

6.1.018.3 KB
Original Source

Declarative Data Visualization

A picture is worth a thousand words. In machine learning, we usually handle high-dimensional data, which is impossible to draw on display directly. But a variety of statistical plots are tremendously valuable for us to grasp the characteristics of many data points. Smile provides data visualization tools such as plots and maps for researchers to understand information more easily and quickly.

With smile.plot.vega package, we can create a specification that describes visualizations as mappings from data to properties of graphical marks (e.g., points or bars). The specification is based on Vega-Lite. The Vega-Lite compiler automatically produces visualization components including axes, legends, and scales. It then determines properties of these components based on a set of carefully designed rules.

This approach allows specifications to be succinct and expressive, but also provide user control. As Vega-Lite is designed for analysis, it supports data transformations such as aggregation, binning, filtering, sorting, and visual transformations including stacking and faceting. Moreover, Vega-Lite specifications can be composed into layered and multi-view displays, and made interactive with selections.

java
import smile.plot.vega.*;
import static smile.plot.vega.Predicate.*;

Simple Bar Chart

A bar chart encodes quantitative values as the extent of rectangular bars.

java
var bar = new View("Simple Bar Plot")
                .description("A simple bar chart with embedded data.")
                .widthStep(30);

bar.data().values("""
                [
                  {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43},
                  {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53},
                  {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}
                ]""");

bar.mark("bar");
Field x = bar.encode("x", "a").type("ordinal");
x.axis().labelAngle(0);
bar.encode("y", "b").type("quantitative");

bar.show();

Aggregate Bar Chart

A bar chart showing the US population distribution of age groups in 2000.

java
var bar = new View("Aggregate Bar Plot")
                .description("A bar chart showing the US population distribution of age groups in 2000.")
                .heightStep(20);

bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000");
bar.encode("x", "people").type("quantitative").aggregate("sum").title("population");
bar.encode("y", "age").type("ordinal");
bar.show();

Aggregate Bar Chart (Sorted)

A bar chart that sorts the y-values by the x-values.

java
var bar = new View("Sorted Aggregate Bar Plot")
                .description("A bar chart that sorts the y-values by the x-values.")
                .heightStep(20);

bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000");
bar.encode("x", "people").type("quantitative").aggregate("sum").title("population");
bar.encode("y", "age").type("ordinal").sort("-x");
bar.show();

Grouped Bar Chart

java
var bar = new View("Group Bar Plot").widthStep(12);

bar.mark("bar");
bar.viewConfig().stroke("transparent").axis().domainWidth(1);
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000")
               .calculate("datum.sex == 2 ? 'Female' : 'Male'", "gender");

bar.encode("x", "gender").type("nominal").title(null);
bar.encode("y", "people").type("quantitative").aggregate("sum").axis().title("population").grid(false);
bar.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
bar.encode("column", "age").type("ordinal").spacing(10);
bar.show();

Stacked Bar Chart

java
var bar = new View("Stacked Bar Plot");
bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
bar.encode("x", "date").type("ordinal").timeUnit("month").title("Month of the year");
bar.encode("y", null).type("quantitative").aggregate("count");
bar.encode("color", "weather").type("nominal")
                .domain("sun", "fog", "drizzle", "rain", "snow")
                .range("#e7ba52", "#c7c7c7", "#aec7e8", "#1f77b4", "#9467bd")
                .legend().title("Weather type");
bar.show();

Stacked Bar Chart with Rounded Corners

java
var bar = new View("Stacked Bar with Rounded Corner");
bar.mark("bar").cornerRadiusTopLeft(3).cornerRadiusTopRight(3);
bar.data().url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
bar.encode("x", "date").type("ordinal").timeUnit("month").title("Month of the year");
bar.encode("y", null).type("quantitative").aggregate("count");
bar.encode("color", "weather").type("nominal");
bar.show();

Horizontal Stacked Bar Chart

java
var bar = new View("Horizontal Stacked Bar");
bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/barley.json");
bar.encode("x", "yield").type("quantitative").aggregate("sum");
bar.encode("y", "variety").type("nominal");
bar.encode("color", "site").type("nominal");
bar.show();

Normalized (Percentage) Stacked Bar Chart

java
var bar = new View("Normalized (Percentage) Stacked Bar").widthStep(17);
bar.mark("bar");
bar.background().opacity(0.7);
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000")
               .calculate("datum.sex == 2 ? 'Female' : 'Male'", "gender");

bar.encode("x", "age").type("ordinal");
bar.encode("y", "people").type("quantitative").aggregate("sum").title("population").stack("normalize");
bar.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
bar.show();

Layered Bar Chart

java
var bar = new View("Layered Bar").widthStep(17);
bar.mark("bar");
bar.background().opacity(0.7);
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000")
               .calculate("datum.sex == 2 ? 'Female' : 'Male'", "gender");

bar.encode("x", "age").type("ordinal");
bar.encode("y", "people").type("quantitative").aggregate("sum").title("population").stack(null);
bar.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
bar.show();

Gantt Chart (Ranged Bar Marks)

java
var gantt = new View("Gantt Chart");
gantt.data().values("""
                [
                  {"task": "A", "start": 1, "end": 3},
                  {"task": "B", "start": 3, "end": 8},
                  {"task": "C", "start": 8, "end": 10}
                ]""");

gantt.mark("bar");
gantt.encode("x", "start").type("quantitative");
gantt.encode("x2", "end").type("quantitative");
gantt.encode("y", "task").type("ordinal");
gantt.show();

A Bar Chart Encoding Color Names in the Data

java
var bar = new View("Bar Chart Encoding Color Names in the Data");
bar.data().values("""
                [
                  {"color": "red", "b": 28},
                  {"color": "green", "b": 55},
                  {"color": "blue", "b": 43}
                ]""");

bar.mark("bar");
bar.encode("x", "color").type("nominal");
bar.encode("y", "b").type("quantitative");
bar.encode("color", "color").type("nominal").scale(null);
bar.show();

Histogram

java
var histogram = new View("Histogram");
histogram.mark("bar");
histogram.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
histogram.encode("x", "IMDB Rating").type("quantitative").bin(true);
histogram.encode("y", null).type("quantitative").aggregate("count");
histogram.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
histogram.show(true);

Relative Frequency Histogram

java
var histogram = new View("Relative Frequency Histogram");
histogram.mark("bar").tooltip(true);
histogram.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
histogram.encode("x", "bin_Horsepwoer").type("quantitative").bin("binned").title("Horsepower");
histogram.encode("x2", "bin_Horsepwoer_end").type("quantitative");
histogram.encode("y", "PercentOfTotal").type("quantitative").title("Relative Frequency").axis().format(".1~%");

histogram.transform()
                .bin("Horsepower", "bin_Horsepwoer")
                .aggregate("count", null, "Count", "bin_Horsepwoer", "bin_Horsepwoer_end")
                .joinAggregate("sum", "Count", "TotalCount")
                .calculate("datum.Count/datum.TotalCount", "PercentOfTotal");
histogram.show();

2D Histogram Heatmap

java
var heatmap = new View("2D Histogram Heatmap").width(300).height(200);
heatmap.mark("rect");
heatmap.viewConfig().stroke("transparent");
heatmap.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
heatmap.encode("x", "IMDB Rating").type("quantitative").title("IMDB Rating").bin(new BinParams().maxBins(60));
heatmap.encode("y", "Rotten Tomatoes Rating").type("quantitative").bin(new BinParams().maxBins(40));
heatmap.encode("color", null).type("quantitative").aggregate("count");
heatmap.transform().filter(and(valid("IMDB Rating"), valid("Rotten Tomatoes Rating")));
heatmap.show();

Density Plot

java
var density = new View("Density Plot").width(400).height(100);
density.mark("area");
density.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
density.encode("x", "value").type("quantitative").title("IMDB Rating");
density.encode("y", "density").type("quantitative");
density.transform().density("IMDB Rating").bandwidth(0.3);
density.show(true)

Cumulative Frequency Distribution

java
var cdf = new View("Cumulative Frequency Distribution");
cdf.mark("area");
cdf.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
cdf.encode("x", "IMDB Rating").type("quantitative");
cdf.encode("y", "Cumulative Count").type("quantitative");
cdf.transform().aggregate("count", "*", "count", "IMDB Rating")
            .window(new WindowTransformField("sum", "count", 0, "Cumulative Count"))
            .sort("IMDB Rating").frame(null, 0);
cdf.show();

Scatterplot

java
var scatter = new View("Scatter Plot");
scatter.mark("point");
scatter.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
scatter.encode("x", "Horsepower").type("quantitative");
scatter.encode("y", "Miles_per_Gallon").type("quantitative");
scatter.encode("color", "Origin").type("nominal");
scatter.encode("shape", "Origin").type("nominal");
scatter.show();

Bubble Plot

java
var bubble = new View("Bubble Plot");
bubble.mark("point");
bubble.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
bubble.encode("x", "Horsepower").type("quantitative");
bubble.encode("y", "Miles_per_Gallon").type("quantitative");
bubble.encode("size", "Acceleration").type("quantitative");
bubble.show();

Natural Disasters

java
var disaster = new View("Natural Disasters").width(600).height(400);
disaster.mark("circle").opacity(0.8).stroke("black").strokeWidth(1);
disaster.data().url("https://vega.github.io/vega-lite/examples/data/disasters.csv");
disaster.transform().filter("datum.Entity !== 'All natural disasters'");
disaster.encode("x", "Year").type("ordinal").axis().labelAngle(90).labelOverlap("greedy");
disaster.encode("y", "Entity").type("nominal").title(null);
disaster.encode("color", "Entity").type("nominal").removeLegend();
disaster.encode("size", "Deaths").type("quantitative")
                .range(0, 5000)
                .legend().title("Annual Global Deaths").clipHeight(30);
disaster.show();

Text Plot

java
var textPlot = new View("Text Plot");
textPlot.mark("text");
textPlot.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
textPlot.transform().calculate("split(datum.Name, ' ')[0]", "Brand");
textPlot.encode("x", "Horsepower").type("quantitative");
textPlot.encode("y", "Miles_per_Gallon").type("quantitative");
textPlot.encode("color", "Brand").type("nominal");
textPlot.encode("text", "Brand").type("nominal");
textPlot.show();

Line Chart

java
var line = new View("Line Chart");
line.mark("line");
line.data().url("https://vega.github.io/vega-lite/examples/data/stocks.csv");
line.transform().filter("datum.symbol==='GOOG'");
line.encode("x", "date").type("temporal");
line.encode("y", "price").type("quantitative");
line.show();

Line Chart with Point Markers

java
var pointLine = new View("Line Chart with Point Mark");
pointLine.mark("line").point(true);
pointLine.data().url("https://vega.github.io/vega-lite/examples/data/stocks.csv");
pointLine.encode("x", "date").type("temporal").timeUnit("year");
pointLine.encode("y", "price").type("quantitative").aggregate("mean");
pointLine.encode("color", "symbol").type("nominal");
pointLine.show();

Line Chart with Confidence Interval Band

java
var line = new View();
line.mark("line");
line.encode("x", "Year").type("temporal").timeUnit("year");
line.encode("y", "Miles_per_Gallon").type("quantitative").aggregate("mean");

var band = new View();
band.mark("errorband").extent("ci");
band.encode("x", "Year").type("temporal").timeUnit("year");
band.encode("y", "Miles_per_Gallon").type("quantitative").title("Mean of Miles per Gallon (95% CIs)");

var confidenceInterval = new Layer(line, band).title("Line Chart with Confidence Interval Band");
confidenceInterval.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
confidenceInterval.show();

Rolling Averages over Raw Values

java
var line = new View();
line.mark("line").color("red").size(3);
line.encode("x", "date").type("temporal");
line.encode("y", "rolling_mean").type("quantitative");

var point = new View();
point.mark("point").opacity(0.3);
point.encode("x", "date").type("temporal").title("Date");
point.encode("y", "temp_max").type("quantitative").title("Max Temperature");

var rollingAverages = new Layer(line, point).title("Rolling Averages over Raw Values").width(400).height(300);
rollingAverages.data().format("csv").url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
rollingAverages.transform().window(new WindowTransformField("mean", "temp_max", 0, "rolling_mean")).frame(-15, 15);
rollingAverages.show();

Area Chart with Overlaying Lines and Point Markers

java
var area = new View("Area Chart with Overlaying Lines and Point Markers");
area.data().url("https://vega.github.io/vega-lite/examples/data/stocks.csv");
area.transform().filter("datum.symbol==='GOOG'");
area.mark("area").line(true).point(true);
area.encode("x", "date").type("temporal");
area.encode("y", "price").type("quantitative");
area.show();

Annual Weather Heatmap

java
var heatmap = new View("2010 Daily Max Temperature (F) in Seattle, WA");
heatmap.mark("rect");
heatmap.data().url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
heatmap.encode("x", "date").type("ordinal").timeUnit("date").title("Day").axis().labelAngle(0).format("%e");
heatmap.encode("y", "date").type("ordinal").timeUnit("month");
heatmap.encode("color", "temp_max").type("quantitative").aggregate("max").legend().title(null);

heatmap.config().axis().domain(false);
heatmap.viewConfig().strokeWidth(0).step(13);
heatmap.show();

Donut Chart

java
var donut = new View("Donut Chart");
donut.data().values("""
                [
                  {"category": 1, "value": 4},
                  {"category": 2, "value": 6},
                  {"category": 3, "value": 10},
                  {"category": 4, "value": 3},
                  {"category": 5, "value": 7},
                  {"category": 6, "value": 8}
                ]""");

donut.mark("arc").innerRadius(50);
donut.encode("theta", "value").type("quantitative");
donut.encode("color", "category").type("nominal");
donut.viewConfig().stroke(null);
donut.show();

Radial Plot

This radial plot uses both angular and radial extent to convey multiple dimensions of data. However, this approach is not perceptually effective, as viewers will most likely be drawn to the total area of the shape, conflating the two dimensions. This example also demonstrates a way to add labels to circular plots.

java
var arc = new View();
arc.mark("arc").innerRadius(20).stroke("#fff");

var text = new View();
text.mark("text").radiusOffset(10);
text.encode("text", "data").type("quantitative");

var radial = new Layer(arc, text).title("Radial Chart");
radial.background().stroke(null);
radial.data().values("[12, 23, 47, 6, 52, 19]");
radial.encode("theta", "data").type("quantitative").stack("zero");
radial.encode("radius", "data").type("quantitative").scale("sqrt").zero(true).range(20, 100);
radial.encode("color", "data").type("nominal").removeLegend();
radial.show();

Box Plot

java
var boxplot = new View("Box Plot");
boxplot.background().stroke(null);
boxplot.mark("boxplot").extent("min-max");
boxplot.viewConfig().stroke(null);
boxplot.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
boxplot.encode("x", "age").type("ordinal");
boxplot.encode("y", "people").type("quantitative").title("population");
boxplot.show();

Horizontal Concatenation

java
var concat = Concat.vertical(donut, boxplot).title("Vertical Concatenation");
concat.show();

Scatterplot Matrix (SPLOM)

java
var plot = new View().width(150).height(150);
plot.mark("point");
plot.encode("x", "repeat:column").type("quantitative").zero(false);
plot.encode("y", "repeat:row").type("quantitative").zero(false);
plot.encode("color", "species").type("nominal");

String[] row = {"petalWidth", "petalLength", "sepalWidth", "sepalLength"};
String[] column = {"sepalLength", "sepalWidth", "petalLength", "petalWidth"};
var splom = new Repeat(plot, row, column).title("Scatter Plot Matrix");
splom.data().url("https://raw.githubusercontent.com/domoritz/maps/master/data/iris.json");
splom.show(true);

Choropleth of Unemployment Rate per County

java
var geo = new View("Choropleth of Unemployment Rate per County").width(500).height(300);
geo.mark("geoshape").extent("min-max");
geo.data().topojson("https://vega.github.io/vega-lite/examples/data/us-10m.json", "feature", "counties");
geo.encode("color", "rate").type("quantitative");
geo.projection("albersUsa");

var transform = geo.transform();
var lookupData = transform.lookupData("id").fields("rate");
lookupData.data().url("https://vega.github.io/vega-lite/examples/data/unemployment.tsv");
geo.transform().lookup("id", lookupData);
geo.show();