JAX NeurIPS 2020 Demo

The basics: interactive NumPy on GPU and TPU

python

import jax
import jax.numpy as jnp
from jax import random

python

key = random.key(0)
key, subkey = random.split(key)
x = random.normal(key, (5000, 5000))

print(x.shape)
print(x.dtype)

python

y = jnp.dot(x, x)
print(y[0, 0])

python

import matplotlib.pyplot as plt

plt.plot(x[0])

python

print(jnp.dot(x, x.T))

python

print(jnp.dot(x, 2 * x)[[0, 2, 1, 0], ..., None, ::-1])

python

import numpy as np

x_cpu = np.array(x)
%timeit -n 5 -r 2 np.dot(x_cpu, x_cpu)

python

%timeit -n 5 -r 5 jnp.dot(x, x).block_until_ready()

Automatic differentiation

python

from jax import grad

python

def f(x):
  if x > 0:
    return 2 * x ** 3
  else:
    return 3 * x

python

key = random.key(0)
x = random.normal(key, ())

print(grad(f)(x))
print(grad(f)(-x))

python

print(grad(grad(f))(-x))
print(grad(grad(grad(f)))(-x))

Other JAX autodiff highlights:

Forward- and reverse-mode, totally composable
Fast Jacobians and Hessians
Complex number support (holomorphic and non-holomorphic)
Jacobian pre-accumulation for elementwise operations (like gelu)

For much more, see the JAX Autodiff Cookbook (Part 1).

End-to-end compilation with XLA with `jit`

python

from jax import jit

python

key = random.key(0)
x = random.normal(key, (5000, 5000))

python

def f(x):
  y = x
  for _ in range(10):
    y = y - 0.1 * y + 3.
  return y[:100, :100]

f(x)

python

g = jit(f)
g(x)

python

%timeit f(x).block_until_ready()

python

%timeit -n 100 g(x).block_until_ready()

python

grad(jit(grad(jit(grad(jnp.tanh)))))(1.0)

Parallelization over multiple accelerators with pmap

python

jax.device_count()

python

from jax import pmap

python

y = pmap(lambda x: x ** 2)(jnp.arange(8))
print(y)

python

import matplotlib.pyplot as plt
plt.plot(y)

Collective communication operations

python

from functools import partial
from jax.lax import psum

@partial(pmap, axis_name='i')
def f(x):
  total = psum(x, 'i')
  return x / total, total

normalized, total = f(jnp.arange(8.))

print(f"normalized:\n{normalized}\n")
print("total:", total)

For more, see the pmap cookbook.

Automatic parallelization with sharded_jit (new!)

python

from jax.experimental import sharded_jit, PartitionSpec as P

python

from jax import lax

conv = lambda image, kernel: lax.conv(image, kernel, (1, 1), 'SAME')

python

image = jnp.ones((1, 8, 2000, 1000)).astype(np.float32)
kernel = jnp.array(np.random.random((8, 8, 5, 5)).astype(np.float32))

np.set_printoptions(edgeitems=1)
conv(image, kernel)

python

%timeit conv(image, kernel).block_until_ready()

python

image_partitions = P(1, 1, 4, 2)
sharded_conv = sharded_jit(conv,
                           in_parts=(image_partitions, None),
                           out_parts=image_partitions)

sharded_conv(image, kernel)

python

%timeit -n 10 sharded_conv(image, kernel).block_until_ready()

The basics: interactive NumPy on GPU and TPU

Automatic differentiation

End-to-end compilation with XLA with jit

Parallelization over multiple accelerators with pmap

Collective communication operations

Automatic parallelization with sharded_jit (new!)

End-to-end compilation with XLA with `jit`