studio/src/universal/notebooks/plot-vega.ipynb
A picture is worth a thousand words. In machine learning, we usually handle high-dimensional data, which is impossible to draw on display directly. But a variety of statistical plots are tremendously valuable for us to grasp the characteristics of many data points. Smile provides data visualization tools such as plots and maps for researchers to understand information more easily and quickly.
With smile.plot.vega package, we can create a specification that describes visualizations as mappings from data to properties of graphical marks (e.g., points or bars). The specification is based on Vega-Lite. The Vega-Lite compiler automatically produces visualization components including axes, legends, and scales. It then determines properties of these components based on a set of carefully designed rules.
This approach allows specifications to be succinct and expressive, but also provide user control. As Vega-Lite is designed for analysis, it supports data transformations such as aggregation, binning, filtering, sorting, and visual transformations including stacking and faceting. Moreover, Vega-Lite specifications can be composed into layered and multi-view displays, and made interactive with selections.
import smile.plot.vega.*;
import static smile.plot.vega.Predicate.*;
A bar chart encodes quantitative values as the extent of rectangular bars.
var bar = new View("Simple Bar Plot")
.description("A simple bar chart with embedded data.")
.widthStep(30);
bar.data().values("""
[
{"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43},
{"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53},
{"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}
]""");
bar.mark("bar");
Field x = bar.encode("x", "a").type("ordinal");
x.axis().labelAngle(0);
bar.encode("y", "b").type("quantitative");
bar.show();
A bar chart showing the US population distribution of age groups in 2000.
var bar = new View("Aggregate Bar Plot")
.description("A bar chart showing the US population distribution of age groups in 2000.")
.heightStep(20);
bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000");
bar.encode("x", "people").type("quantitative").aggregate("sum").title("population");
bar.encode("y", "age").type("ordinal");
bar.show();
A bar chart that sorts the y-values by the x-values.
var bar = new View("Sorted Aggregate Bar Plot")
.description("A bar chart that sorts the y-values by the x-values.")
.heightStep(20);
bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000");
bar.encode("x", "people").type("quantitative").aggregate("sum").title("population");
bar.encode("y", "age").type("ordinal").sort("-x");
bar.show();
var bar = new View("Group Bar Plot").widthStep(12);
bar.mark("bar");
bar.viewConfig().stroke("transparent").axis().domainWidth(1);
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000")
.calculate("datum.sex == 2 ? 'Female' : 'Male'", "gender");
bar.encode("x", "gender").type("nominal").title(null);
bar.encode("y", "people").type("quantitative").aggregate("sum").axis().title("population").grid(false);
bar.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
bar.encode("column", "age").type("ordinal").spacing(10);
bar.show();
var bar = new View("Stacked Bar Plot");
bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
bar.encode("x", "date").type("ordinal").timeUnit("month").title("Month of the year");
bar.encode("y", null).type("quantitative").aggregate("count");
bar.encode("color", "weather").type("nominal")
.domain("sun", "fog", "drizzle", "rain", "snow")
.range("#e7ba52", "#c7c7c7", "#aec7e8", "#1f77b4", "#9467bd")
.legend().title("Weather type");
bar.show();
var bar = new View("Stacked Bar with Rounded Corner");
bar.mark("bar").cornerRadiusTopLeft(3).cornerRadiusTopRight(3);
bar.data().url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
bar.encode("x", "date").type("ordinal").timeUnit("month").title("Month of the year");
bar.encode("y", null).type("quantitative").aggregate("count");
bar.encode("color", "weather").type("nominal");
bar.show();
var bar = new View("Horizontal Stacked Bar");
bar.mark("bar");
bar.data().url("https://vega.github.io/vega-lite/examples/data/barley.json");
bar.encode("x", "yield").type("quantitative").aggregate("sum");
bar.encode("y", "variety").type("nominal");
bar.encode("color", "site").type("nominal");
bar.show();
var bar = new View("Normalized (Percentage) Stacked Bar").widthStep(17);
bar.mark("bar");
bar.background().opacity(0.7);
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000")
.calculate("datum.sex == 2 ? 'Female' : 'Male'", "gender");
bar.encode("x", "age").type("ordinal");
bar.encode("y", "people").type("quantitative").aggregate("sum").title("population").stack("normalize");
bar.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
bar.show();
var bar = new View("Layered Bar").widthStep(17);
bar.mark("bar");
bar.background().opacity(0.7);
bar.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
bar.transform().filter("datum.year == 2000")
.calculate("datum.sex == 2 ? 'Female' : 'Male'", "gender");
bar.encode("x", "age").type("ordinal");
bar.encode("y", "people").type("quantitative").aggregate("sum").title("population").stack(null);
bar.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
bar.show();
var gantt = new View("Gantt Chart");
gantt.data().values("""
[
{"task": "A", "start": 1, "end": 3},
{"task": "B", "start": 3, "end": 8},
{"task": "C", "start": 8, "end": 10}
]""");
gantt.mark("bar");
gantt.encode("x", "start").type("quantitative");
gantt.encode("x2", "end").type("quantitative");
gantt.encode("y", "task").type("ordinal");
gantt.show();
var bar = new View("Bar Chart Encoding Color Names in the Data");
bar.data().values("""
[
{"color": "red", "b": 28},
{"color": "green", "b": 55},
{"color": "blue", "b": 43}
]""");
bar.mark("bar");
bar.encode("x", "color").type("nominal");
bar.encode("y", "b").type("quantitative");
bar.encode("color", "color").type("nominal").scale(null);
bar.show();
var histogram = new View("Histogram");
histogram.mark("bar");
histogram.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
histogram.encode("x", "IMDB Rating").type("quantitative").bin(true);
histogram.encode("y", null).type("quantitative").aggregate("count");
histogram.encode("color", "gender").type("nominal").range("#675193", "#ca8861");
histogram.show(true);
var histogram = new View("Relative Frequency Histogram");
histogram.mark("bar").tooltip(true);
histogram.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
histogram.encode("x", "bin_Horsepwoer").type("quantitative").bin("binned").title("Horsepower");
histogram.encode("x2", "bin_Horsepwoer_end").type("quantitative");
histogram.encode("y", "PercentOfTotal").type("quantitative").title("Relative Frequency").axis().format(".1~%");
histogram.transform()
.bin("Horsepower", "bin_Horsepwoer")
.aggregate("count", null, "Count", "bin_Horsepwoer", "bin_Horsepwoer_end")
.joinAggregate("sum", "Count", "TotalCount")
.calculate("datum.Count/datum.TotalCount", "PercentOfTotal");
histogram.show();
var heatmap = new View("2D Histogram Heatmap").width(300).height(200);
heatmap.mark("rect");
heatmap.viewConfig().stroke("transparent");
heatmap.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
heatmap.encode("x", "IMDB Rating").type("quantitative").title("IMDB Rating").bin(new BinParams().maxBins(60));
heatmap.encode("y", "Rotten Tomatoes Rating").type("quantitative").bin(new BinParams().maxBins(40));
heatmap.encode("color", null).type("quantitative").aggregate("count");
heatmap.transform().filter(and(valid("IMDB Rating"), valid("Rotten Tomatoes Rating")));
heatmap.show();
var density = new View("Density Plot").width(400).height(100);
density.mark("area");
density.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
density.encode("x", "value").type("quantitative").title("IMDB Rating");
density.encode("y", "density").type("quantitative");
density.transform().density("IMDB Rating").bandwidth(0.3);
density.show(true)
var cdf = new View("Cumulative Frequency Distribution");
cdf.mark("area");
cdf.data().url("https://vega.github.io/vega-lite/examples/data/movies.json");
cdf.encode("x", "IMDB Rating").type("quantitative");
cdf.encode("y", "Cumulative Count").type("quantitative");
cdf.transform().aggregate("count", "*", "count", "IMDB Rating")
.window(new WindowTransformField("sum", "count", 0, "Cumulative Count"))
.sort("IMDB Rating").frame(null, 0);
cdf.show();
var scatter = new View("Scatter Plot");
scatter.mark("point");
scatter.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
scatter.encode("x", "Horsepower").type("quantitative");
scatter.encode("y", "Miles_per_Gallon").type("quantitative");
scatter.encode("color", "Origin").type("nominal");
scatter.encode("shape", "Origin").type("nominal");
scatter.show();
var bubble = new View("Bubble Plot");
bubble.mark("point");
bubble.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
bubble.encode("x", "Horsepower").type("quantitative");
bubble.encode("y", "Miles_per_Gallon").type("quantitative");
bubble.encode("size", "Acceleration").type("quantitative");
bubble.show();
var disaster = new View("Natural Disasters").width(600).height(400);
disaster.mark("circle").opacity(0.8).stroke("black").strokeWidth(1);
disaster.data().url("https://vega.github.io/vega-lite/examples/data/disasters.csv");
disaster.transform().filter("datum.Entity !== 'All natural disasters'");
disaster.encode("x", "Year").type("ordinal").axis().labelAngle(90).labelOverlap("greedy");
disaster.encode("y", "Entity").type("nominal").title(null);
disaster.encode("color", "Entity").type("nominal").removeLegend();
disaster.encode("size", "Deaths").type("quantitative")
.range(0, 5000)
.legend().title("Annual Global Deaths").clipHeight(30);
disaster.show();
var textPlot = new View("Text Plot");
textPlot.mark("text");
textPlot.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
textPlot.transform().calculate("split(datum.Name, ' ')[0]", "Brand");
textPlot.encode("x", "Horsepower").type("quantitative");
textPlot.encode("y", "Miles_per_Gallon").type("quantitative");
textPlot.encode("color", "Brand").type("nominal");
textPlot.encode("text", "Brand").type("nominal");
textPlot.show();
var line = new View("Line Chart");
line.mark("line");
line.data().url("https://vega.github.io/vega-lite/examples/data/stocks.csv");
line.transform().filter("datum.symbol==='GOOG'");
line.encode("x", "date").type("temporal");
line.encode("y", "price").type("quantitative");
line.show();
var pointLine = new View("Line Chart with Point Mark");
pointLine.mark("line").point(true);
pointLine.data().url("https://vega.github.io/vega-lite/examples/data/stocks.csv");
pointLine.encode("x", "date").type("temporal").timeUnit("year");
pointLine.encode("y", "price").type("quantitative").aggregate("mean");
pointLine.encode("color", "symbol").type("nominal");
pointLine.show();
var line = new View();
line.mark("line");
line.encode("x", "Year").type("temporal").timeUnit("year");
line.encode("y", "Miles_per_Gallon").type("quantitative").aggregate("mean");
var band = new View();
band.mark("errorband").extent("ci");
band.encode("x", "Year").type("temporal").timeUnit("year");
band.encode("y", "Miles_per_Gallon").type("quantitative").title("Mean of Miles per Gallon (95% CIs)");
var confidenceInterval = new Layer(line, band).title("Line Chart with Confidence Interval Band");
confidenceInterval.data().url("https://vega.github.io/vega-lite/examples/data/cars.json");
confidenceInterval.show();
var line = new View();
line.mark("line").color("red").size(3);
line.encode("x", "date").type("temporal");
line.encode("y", "rolling_mean").type("quantitative");
var point = new View();
point.mark("point").opacity(0.3);
point.encode("x", "date").type("temporal").title("Date");
point.encode("y", "temp_max").type("quantitative").title("Max Temperature");
var rollingAverages = new Layer(line, point).title("Rolling Averages over Raw Values").width(400).height(300);
rollingAverages.data().format("csv").url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
rollingAverages.transform().window(new WindowTransformField("mean", "temp_max", 0, "rolling_mean")).frame(-15, 15);
rollingAverages.show();
var area = new View("Area Chart with Overlaying Lines and Point Markers");
area.data().url("https://vega.github.io/vega-lite/examples/data/stocks.csv");
area.transform().filter("datum.symbol==='GOOG'");
area.mark("area").line(true).point(true);
area.encode("x", "date").type("temporal");
area.encode("y", "price").type("quantitative");
area.show();
var heatmap = new View("2010 Daily Max Temperature (F) in Seattle, WA");
heatmap.mark("rect");
heatmap.data().url("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv");
heatmap.encode("x", "date").type("ordinal").timeUnit("date").title("Day").axis().labelAngle(0).format("%e");
heatmap.encode("y", "date").type("ordinal").timeUnit("month");
heatmap.encode("color", "temp_max").type("quantitative").aggregate("max").legend().title(null);
heatmap.config().axis().domain(false);
heatmap.viewConfig().strokeWidth(0).step(13);
heatmap.show();
var donut = new View("Donut Chart");
donut.data().values("""
[
{"category": 1, "value": 4},
{"category": 2, "value": 6},
{"category": 3, "value": 10},
{"category": 4, "value": 3},
{"category": 5, "value": 7},
{"category": 6, "value": 8}
]""");
donut.mark("arc").innerRadius(50);
donut.encode("theta", "value").type("quantitative");
donut.encode("color", "category").type("nominal");
donut.viewConfig().stroke(null);
donut.show();
This radial plot uses both angular and radial extent to convey multiple dimensions of data. However, this approach is not perceptually effective, as viewers will most likely be drawn to the total area of the shape, conflating the two dimensions. This example also demonstrates a way to add labels to circular plots.
var arc = new View();
arc.mark("arc").innerRadius(20).stroke("#fff");
var text = new View();
text.mark("text").radiusOffset(10);
text.encode("text", "data").type("quantitative");
var radial = new Layer(arc, text).title("Radial Chart");
radial.background().stroke(null);
radial.data().values("[12, 23, 47, 6, 52, 19]");
radial.encode("theta", "data").type("quantitative").stack("zero");
radial.encode("radius", "data").type("quantitative").scale("sqrt").zero(true).range(20, 100);
radial.encode("color", "data").type("nominal").removeLegend();
radial.show();
var boxplot = new View("Box Plot");
boxplot.background().stroke(null);
boxplot.mark("boxplot").extent("min-max");
boxplot.viewConfig().stroke(null);
boxplot.data().url("https://vega.github.io/vega-lite/examples/data/population.json");
boxplot.encode("x", "age").type("ordinal");
boxplot.encode("y", "people").type("quantitative").title("population");
boxplot.show();
var concat = Concat.vertical(donut, boxplot).title("Vertical Concatenation");
concat.show();
var plot = new View().width(150).height(150);
plot.mark("point");
plot.encode("x", "repeat:column").type("quantitative").zero(false);
plot.encode("y", "repeat:row").type("quantitative").zero(false);
plot.encode("color", "species").type("nominal");
String[] row = {"petalWidth", "petalLength", "sepalWidth", "sepalLength"};
String[] column = {"sepalLength", "sepalWidth", "petalLength", "petalWidth"};
var splom = new Repeat(plot, row, column).title("Scatter Plot Matrix");
splom.data().url("https://raw.githubusercontent.com/domoritz/maps/master/data/iris.json");
splom.show(true);
var geo = new View("Choropleth of Unemployment Rate per County").width(500).height(300);
geo.mark("geoshape").extent("min-max");
geo.data().topojson("https://vega.github.io/vega-lite/examples/data/us-10m.json", "feature", "counties");
geo.encode("color", "rate").type("quantitative");
geo.projection("albersUsa");
var transform = geo.transform();
var lookupData = transform.lookupData("id").fields("rate");
lookupData.data().url("https://vega.github.io/vega-lite/examples/data/unemployment.tsv");
geo.transform().lookup("id", lookupData);
geo.show();