examples/jupyter/integrations/statsmodels.ipynb
import statsmodels.api as sm
import pandas
import modin.pandas as pd
from patsy import dmatrices
df = sm.datasets.get_rdataset("Guerry", "HistData").data
modin_df = pd.DataFrame(df)
vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']
modin_df = modin_df[vars]
modin_df[-5:]
modin_df = modin_df.dropna()
modin_df[-5:]
y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=modin_df, return_type='dataframe')
y = pd.DataFrame(y)
X = pd.DataFrame(X)
mod = sm.OLS(y, X) # Describe model
res = mod.fit() # Fit model
print(res.summary())
sm.ols() is not interoperable with Modin currently.
modin_df = pd.DataFrame({"A": [10,20,30,40,50], "B": [20, 30, 10, 40, 50], "C": [32, 234, 23, 23, 42523]})
import statsmodels.formula.api as sm
result = sm.ols(formula="A ~ B + C", data=modin_df).fit()
print(result.params)
print(result.summary())
import statsmodels.api as sm
df = sm.datasets.get_rdataset("Guerry", "HistData").data
pandas_df = pandas.DataFrame(df)
vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']
pandas_df = pandas_df[vars]
pandas_df = pandas_df.dropna()
y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=df, return_type='dataframe')
y = pandas.DataFrame(y)
X = pandas.DataFrame(X)
mod = sm.OLS(y, X) # Describe model
res = mod.fit() # Fit model
print(res.summary())
pandas_df = pd.DataFrame({"A": [10,20,30,40,50], "B": [20, 30, 10, 40, 50], "C": [32, 234, 23, 23, 42523]})
import statsmodels.formula.api as sm
result = sm.ols(formula="A ~ B + C", data=pandas_df).fit()
print(result.params)
print(result.summary())