Back to Modin

To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.

examples/jupyter/Modin_Taxi.ipynb

0.37.1755 B
Original Source
python
# To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.
import urllib.request
url_path = "https://modin-datasets.intel.com/green-taxi/green_tripdata_2015-01.csv"
urllib.request.urlretrieve(url_path, "taxi.csv")

from modin.config import Engine
Engine.put("dask")
from dask.distributed import Client
client = Client(n_workers=12)

from modin.config import BenchmarkMode
BenchmarkMode.put(True)
python
import modin.pandas as pd
python
%time df = pd.read_csv("taxi.csv", parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"], quoting=3)
python
%time isnull = df.isnull()
python
%time rounded_trip_distance = df[["pickup_longitude"]].applymap(round)