code/artificial_intelligence/src/lasso_regression/lasso_regression.ipynb
import pandas as pd
import numpy
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from pandas_datareader import data as pdr
import yfinance as yf
import datetime
start = datetime.datetime(2012,1,1)
end= datetime.datetime(2018,8,30)
yf.pdr_override()
df_full = yf.download("jpm",start = start,end=end).reset_index()
df_full.to_csv('jpm.csv',index=False)
df_full.head()
import pandas as pd
df = pd.read_csv('jpm.csv')
df
close_px = df['Adj Close']
mavg = close_px.rolling(window=100).mean()
mavg
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import style
# Adjusting the size of matplotlib
import matplotlib as mpl
mpl.rc('figure', figsize=(8, 7))
mpl.__version__
# Adjusting the style of matplotlib
style.use('ggplot')
close_px.plot(label='AAPL')
mavg.plot(label='mavg')
plt.legend()
dfcomp = yf.download(['jpm','AAPL', 'GE', 'GOOG', 'IBM', 'MSFT'],start = start,end=end)['Adj Close']
dfcomp
retscomp = dfcomp.pct_change()
corr = retscomp.corr()
corr
import math
import numpy as np
from sklearn import preprocessing
dfreg = df.loc[:,['Adj Close','Volume']]
dfreg['HL_PCT'] = (df['High']-df['Low']) / df['Close'] * 100.0
dfreg['PCT_change'] = (df['Close']-df['Open']) / df['Open'] * 100.0
dfreg.fillna(value=-99999, inplace=True)
forecast_out = int(math.ceil(0.01 * len(dfreg)))
forecast_col = 'Adj Close'
dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
X = np.array(dfreg.drop(['label'], 1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
y = np.array(dfreg['label'])
y = y[:-forecast_out]
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clflasso = Lasso()
clflasso.fit(X_train, y_train)
confidencelasso = clflasso.score(X_test,y_test)
print("The Accuracy of our Model is %r" %confidencelasso)
forecast_set =clflasso.predict(X_lately)
dfreg['Forecast'] = np.nan
forecast_set
next_unix = datetime.datetime.now() + datetime.timedelta(days=1)
for i in forecast_set:
next_date = next_unix
next_unix += datetime.timedelta(days=1)
dfreg.loc[next_date] = [np.nan for _ in range(len(dfreg.columns)-1)]+[i]
dfreg['Adj Close'].tail(500).plot()
dfreg['Forecast'].tail(500).plot()
plt.legend(loc=4)
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()