Back to Seaborn

Data Structure

doc/_tutorial/data_structure.ipynb

0.13.22.9 KB
Original Source
python
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_theme()
python
flights = sns.load_dataset("flights")
flights.head()
python
sns.relplot(data=flights, x="year", y="passengers", hue="month", kind="line")
python
flights_wide = flights.pivot(index="year", columns="month", values="passengers")
flights_wide.head()
python
sns.relplot(data=flights_wide, kind="line")
python
sns.relplot(data=flights, x="month", y="passengers", hue="year", kind="line")
python
sns.relplot(data=flights_wide.transpose(), kind="line")
python
sns.catplot(data=flights_wide, kind="box")
python
import matplotlib.pyplot as plt
f = plt.figure(figsize=(7, 5))

gs = plt.GridSpec(
    ncols=6, nrows=2, figure=f,
    left=0, right=.35, bottom=0, top=.9,
    height_ratios=(1, 20),
    wspace=.1, hspace=.01
)

colors = [c + (.5,) for c in sns.color_palette()]

f.add_subplot(gs[0, :], facecolor=".8")
[
    f.add_subplot(gs[1:, i], facecolor=colors[i])
    for i in range(gs.ncols)
]

gs = plt.GridSpec(
    ncols=2, nrows=2, figure=f,
    left=.4, right=1, bottom=.2, top=.8,
    height_ratios=(1, 8), width_ratios=(1, 11),
    wspace=.015, hspace=.02
)

f.add_subplot(gs[0, 1:], facecolor=colors[2])
f.add_subplot(gs[1:, 0], facecolor=colors[1])
f.add_subplot(gs[1, 1], facecolor=colors[0])

for ax in f.axes:
    ax.set(xticks=[], yticks=[])

f.text(.35 / 2, .91, "Long-form", ha="center", va="bottom", size=15)
f.text(.7, .81, "Wide-form", ha="center", va="bottom", size=15)
python
anagrams = sns.load_dataset("anagrams")
anagrams
python
anagrams_long = anagrams.melt(id_vars=["subidr", "attnr"], var_name="solutions", value_name="score")
anagrams_long.head()
python
sns.catplot(data=anagrams_long, x="solutions", y="score", hue="attnr", kind="point")
python
flights_dict = flights.to_dict()
sns.relplot(data=flights_dict, x="year", y="passengers", hue="month", kind="line")
python
flights_avg = flights.groupby("year").mean(numeric_only=True)
sns.relplot(data=flights_avg, x="year", y="passengers", kind="line")
python
year = flights_avg.index
passengers = flights_avg["passengers"]
sns.relplot(x=year, y=passengers, kind="line")
python
sns.relplot(x=year.to_numpy(), y=passengers.to_list(), kind="line")
python
flights_wide_list = [col for _, col in flights_wide.items()]
sns.relplot(data=flights_wide_list, kind="line")
python
two_series = [flights_wide.loc[:1955, "Jan"], flights_wide.loc[1952:, "Aug"]]
sns.relplot(data=two_series, kind="line")
python
two_arrays = [s.to_numpy() for s in two_series]
sns.relplot(data=two_arrays, kind="line")
python
two_arrays_dict = {s.name: s.to_numpy() for s in two_series}
sns.relplot(data=two_arrays_dict, kind="line")
python
flights_array = flights_wide.to_numpy()
sns.relplot(data=flights_array, kind="line")