Back to jax

JAX NeurIPS 2020 Demo

cloud_tpu_colabs/JAX_NeurIPS_2020_demo.ipynb

0.3.253.2 KB
Original Source

The basics: interactive NumPy on GPU and TPU


python
import jax
import jax.numpy as jnp
from jax import random
python
key = random.key(0)
key, subkey = random.split(key)
x = random.normal(key, (5000, 5000))

print(x.shape)
print(x.dtype)
python
y = jnp.dot(x, x)
print(y[0, 0])
python
x
python
import matplotlib.pyplot as plt

plt.plot(x[0])
python
print(jnp.dot(x, x.T))
python
print(jnp.dot(x, 2 * x)[[0, 2, 1, 0], ..., None, ::-1])
python
import numpy as np

x_cpu = np.array(x)
%timeit -n 5 -r 2 np.dot(x_cpu, x_cpu)
python
%timeit -n 5 -r 5 jnp.dot(x, x).block_until_ready()

Automatic differentiation

python
from jax import grad
python
def f(x):
  if x > 0:
    return 2 * x ** 3
  else:
    return 3 * x
python
key = random.key(0)
x = random.normal(key, ())

print(grad(f)(x))
print(grad(f)(-x))
python
print(grad(grad(f))(-x))
print(grad(grad(grad(f)))(-x))

Other JAX autodiff highlights:

  • Forward- and reverse-mode, totally composable
  • Fast Jacobians and Hessians
  • Complex number support (holomorphic and non-holomorphic)
  • Jacobian pre-accumulation for elementwise operations (like gelu)

For much more, see the JAX Autodiff Cookbook (Part 1).

End-to-end compilation with XLA with jit

python
from jax import jit
python
key = random.key(0)
x = random.normal(key, (5000, 5000))
python
def f(x):
  y = x
  for _ in range(10):
    y = y - 0.1 * y + 3.
  return y[:100, :100]

f(x)
python
g = jit(f)
g(x)
python
%timeit f(x).block_until_ready()
python
%timeit -n 100 g(x).block_until_ready()
python
grad(jit(grad(jit(grad(jnp.tanh)))))(1.0)

Parallelization over multiple accelerators with pmap

python
jax.device_count()
python
from jax import pmap
python
y = pmap(lambda x: x ** 2)(jnp.arange(8))
print(y)
python
y
python
import matplotlib.pyplot as plt
plt.plot(y)

Collective communication operations

python
from functools import partial
from jax.lax import psum

@partial(pmap, axis_name='i')
def f(x):
  total = psum(x, 'i')
  return x / total, total

normalized, total = f(jnp.arange(8.))

print(f"normalized:\n{normalized}\n")
print("total:", total)

For more, see the pmap cookbook.

Automatic parallelization with sharded_jit (new!)

python
from jax.experimental import sharded_jit, PartitionSpec as P
python
from jax import lax

conv = lambda image, kernel: lax.conv(image, kernel, (1, 1), 'SAME')
python
image = jnp.ones((1, 8, 2000, 1000)).astype(np.float32)
kernel = jnp.array(np.random.random((8, 8, 5, 5)).astype(np.float32))

np.set_printoptions(edgeitems=1)
conv(image, kernel)
python
%timeit conv(image, kernel).block_until_ready()
python
image_partitions = P(1, 1, 4, 2)
sharded_conv = sharded_jit(conv,
                           in_parts=(image_partitions, None),
                           out_parts=image_partitions)

sharded_conv(image, kernel)
python
%timeit -n 10 sharded_conv(image, kernel).block_until_ready()