examples/modin-scikit-learn-example.ipynb
%matplotlib inline
import numpy as np
import modin.pandas as pd
import matplotlib.pyplot as plt
import sklearn
data = pd.read_csv("data/boston_housing.csv")
data.head()
features = data.drop("PRICE", axis=1)
labels = data["PRICE"]
type(features)
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(features, labels)
plt.scatter(data["RM"], labels)
plt.xlabel("Average number of rooms per dwelling")
plt.ylabel("Housing Price")
plt.title("Relationship between Rooms and Price")
plt.show()
predicted_prices = lm.predict(features)
plt.scatter(labels, predicted_prices)
plt.xlabel("Prices")
plt.ylabel("Predicted Prices")
plt.title("Prices versus Predicted Prices")
plt.show()
training_error = \
(labels - predicted_prices).apply(lambda x: x ** 2).mean()
training_error
# Citation: http://bigdata-madesimple.com/how-to-run-linear-regression-in-python-scikit-learn/