examples/riskReturnAnalysis.ipynb
===========================================================
This notebook demonstrates advanced techniques for risk-return analysis and portfolio optimization using OpenBB. We'll explore various asset classes, implement modern portfolio theory, and utilize OpenBB's extensive financial analysis capabilities. This comprehensive guide covers:
By the end of this notebook, you'll have a deep understanding of how to use OpenBB for sophisticated financial analysis and portfolio management.
If you are running this notebook in Colab, you can run the following command to install the OpenBB Platform:
!pip install openbb
!pip install openbb
To run this notebook, you'll need to install the following dependencies:
!pip install pandas numpy matplotlib seaborn scipy scikit-learn
# Import libraries
from openbb import obb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
# Set plotting style
plt.style.use('default') # Use the default Matplotlib style
sns.set_theme(style="whitegrid") # Set Seaborn style
sns.set_palette("viridis")
from openbb import obb
obb.account.login(pat="Enter Your PAT Key")
Let's collect historical data for a diverse set of asset classes:
import pandas as pd
from openbb import obb
from datetime import datetime, timedelta
import time
# Define asset classes and their tickers
assets = {
'US Large Cap': 'SPY',
'US Small Cap': 'IWM',
'International Developed': 'EFA',
'Emerging Markets': 'EEM',
'US Aggregate Bonds': 'AGG',
'US Treasury Bonds': 'TLT',
'Real Estate': 'VNQ',
'Gold': 'GLD',
'Commodities': 'DBC'
}
# Set date range
end_date = datetime.now().date()
start_date = end_date - timedelta(days=365) # Adjusted to 1 year for testing
# Function to fetch historical data with retries
def fetch_historical_data(ticker, retries=3):
for attempt in range(retries):
try:
# Fetch historical data using the correct function and parameters
historical_data = obb.equity.price.historical(ticker)
# Convert the OBBject to a DataFrame using the to_dataframe() method
df = historical_data.to_dataframe()
# Check if DataFrame is empty and return it if not
if df is not None and not df.empty:
return df
except Exception as e:
print(f"Attempt {attempt + 1} failed for {ticker}: {str(e)}")
time.sleep(2) # Wait before retrying
return None
# Fetch data and combine into a single DataFrame
combined_data = {}
for asset_name, ticker in assets.items():
df = fetch_historical_data(ticker)
if df is not None and not df.empty:
# Store closing prices with asset name as key
combined_data[asset_name] = df['close']
else:
print(f"No data returned for {asset_name} ({ticker}).")
# Combine all asset closing prices into a single DataFrame
df_combined = pd.DataFrame(combined_data)
# Check if we have any data
if df_combined.empty:
raise ValueError("No data was successfully retrieved. Please check your tickers and date range.")
# Calculate daily returns with specified fill method to avoid FutureWarning
returns = df_combined.pct_change(fill_method=None).dropna()
# Display the combined DataFrame for better readability
print("Combined Closing Prices:")
print(df_combined.head()) # Display first few rows of closing prices
print("\nDaily Returns:")
print(returns.head()) # Display first few rows of returns
# Export the combined DataFrame to an Excel file
output_file = "financial_data.xlsx"
df_combined.to_excel(output_file)
print(f"Data exported successfully to {output_file}")
Let's visualize our data to better understand the relationships between different asset classes.
import io
import base64
from IPython.display import HTML
def fig_to_base64(fig):
buf = io.BytesIO()
fig.savefig(buf, format='png')
buf.seek(0)
return base64.b64encode(buf.getvalue()).decode('utf-8')
# Correlation heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(returns.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap of Asset Returns')
heatmap_base64 = fig_to_base64(plt.gcf())
plt.close()
# Cumulative returns plot
cumulative_returns = (1 + returns).cumprod()
plt.figure(figsize=(12, 6))
cumulative_returns.plot()
plt.title('Cumulative Returns of Asset Classes')
plt.ylabel('Cumulative Return')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
cumulative_returns_base64 = fig_to_base64(plt.gcf())
plt.close()
# Display images
display(HTML(f''))
display(HTML(f''))
# Store base64 strings in variables (optional, for debugging or later use)
heatmap_data = f'"image/png": "{heatmap_base64[:50]}..."'
cumulative_returns_data = f'"image/png": "{cumulative_returns_base64[:50]}..."'
print("Heatmap data (preview):", heatmap_data)
print("Cumulative returns data (preview):", cumulative_returns_data)
Now let's calculate key risk and return metrics for each asset class.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Check if returns DataFrame is not empty before calculations
if not returns.empty:
# Calculate annualized returns and volatility
annual_returns = returns.mean() * 252
annual_volatility = returns.std() * np.sqrt(252)
# Calculate Sharpe Ratio (assuming risk-free rate of 2%)
risk_free_rate = 0.02
sharpe_ratio = (annual_returns - risk_free_rate) / annual_volatility
# Combine metrics into a DataFrame
risk_return_metrics = pd.DataFrame({
'Return': annual_returns,
'Volatility': annual_volatility,
'Sharpe Ratio': sharpe_ratio
})
# Sort and display the metrics
print(risk_return_metrics.sort_values('Sharpe Ratio', ascending=False))
# Visualize risk-return tradeoff
plt.figure(figsize=(12, 8))
sns.scatterplot(data=risk_return_metrics, x='Volatility', y='Return', size='Sharpe Ratio',
sizes=(50, 500), legend='brief', hue='Sharpe Ratio', palette='viridis')
# Annotate points with asset names
for i, asset in enumerate(risk_return_metrics.index):
plt.annotate(asset,
(risk_return_metrics['Volatility'][i], risk_return_metrics['Return'][i]),
xytext=(5, 5), textcoords='offset points')
plt.title('Risk-Return Tradeoff of Various Asset Classes')
plt.xlabel('Risk (Annualized Volatility)')
plt.ylabel('Return (Annualized)')
plt.tight_layout()
plt.show()
else:
print("No return data available for calculations.")
Let's compute the efficient frontier to understand the optimal risk-return tradeoffs.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize
def portfolio_performance(weights, returns):
portfolio_return = np.sum(returns.mean() * weights) * 252 # Annualized return
portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights))) # Annualized volatility
return portfolio_return, portfolio_volatility
def negative_sharpe_ratio(weights, returns, risk_free_rate):
p_return, p_volatility = portfolio_performance(weights, returns)
return -(p_return - risk_free_rate) / p_volatility # Negative Sharpe Ratio for minimization
def calculate_efficient_frontier(returns, num_portfolios=1000):
num_assets = len(returns.columns)
results = np.zeros((3, num_portfolios))
for i in range(num_portfolios):
weights = np.random.random(num_assets)
weights /= np.sum(weights) # Normalize weights
p_return, p_volatility = portfolio_performance(weights, returns)
results[0,i] = p_return
results[1,i] = p_volatility
results[2,i] = p_return / p_volatility # Sharpe Ratio
return results.T
# Calculate efficient frontier
efficient_frontier = calculate_efficient_frontier(returns)
# Find the optimal portfolio (maximum Sharpe ratio)
num_assets = len(returns.columns)
risk_free_rate = 0.02 # Define risk-free rate if not already defined
args = (returns, risk_free_rate)
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) # Weights sum to 1
bound = (0.0, 1.0)
bounds = tuple(bound for asset in range(num_assets))
result = minimize(negative_sharpe_ratio, num_assets*[1./num_assets], args=args,
method='SLSQP', bounds=bounds, constraints=constraints)
optimal_weights = result.x
optimal_return, optimal_volatility = portfolio_performance(optimal_weights, returns)
# Plot efficient frontier
plt.figure(figsize=(12, 8))
plt.scatter(efficient_frontier[:,1], efficient_frontier[:,0], c=efficient_frontier[:,2], cmap='viridis')
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility')
plt.ylabel('Return')
plt.title('Efficient Frontier')
# Plot individual assets
for i, asset in enumerate(returns.columns):
plt.scatter(risk_return_metrics.loc[asset, 'Volatility'],
risk_return_metrics.loc[asset, 'Return'],
marker='o', s=200, label=asset)
# Plot optimal portfolio
plt.scatter(optimal_volatility, optimal_return, c='red', s=200, marker='*', label='Optimal Portfolio')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
print("Optimal Portfolio Weights:")
for asset, weight in zip(returns.columns, optimal_weights):
print(f"{asset}: {weight:.4f}")
Let's calculate Value at Risk (VaR) and Conditional Value at Risk (CVaR) for our optimal portfolio.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def calculate_var_cvar(returns, weights, confidence_level=0.95):
# Calculate portfolio returns
portfolio_returns = np.sum(returns * weights, axis=1)
# Calculate VaR
var = np.percentile(portfolio_returns, 100 * (1 - confidence_level))
# Calculate CVaR
cvar = portfolio_returns[portfolio_returns <= var].mean()
return var, cvar
# Check if returns DataFrame is not empty before proceeding
if not returns.empty and optimal_weights is not None:
# Calculate optimal portfolio returns
optimal_portfolio_returns = np.sum(returns * optimal_weights, axis=1)
# Calculate 95% VaR and CVaR
var_95, cvar_95 = calculate_var_cvar(returns, optimal_weights)
# Print results
print(f"95% VaR: {var_95:.4f}")
print(f"95% CVaR: {cvar_95:.4f}")
# Visualize VaR and CVaR
plt.figure(figsize=(12, 6))
sns.histplot(optimal_portfolio_returns, kde=True)
plt.axvline(var_95, color='r', linestyle='dashed', label='95% VaR')
plt.axvline(cvar_95, color='g', linestyle='dashed', label='95% CVaR')
plt.title('Distribution of Optimal Portfolio Returns with VaR and CVaR')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.legend()
plt.show()
else:
print("Returns data or optimal weights are not available for calculations.")
Let's perform a simple scenario analysis to see how our optimal portfolio would perform under different market conditions.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def scenario_analysis(weights, returns, scenarios):
# Calculate portfolio returns based on weights
portfolio_returns = np.sum(returns * weights, axis=1)
scenario_results = {}
for scenario, shock in scenarios.items():
# Convert shock dictionary to a Series
shock_series = pd.Series(shock)
shocked_returns = returns + shock_series
# Calculate scenario portfolio returns
scenario_portfolio_returns = np.sum(shocked_returns * weights, axis=1)
# Store results for each scenario
scenario_results[scenario] = {
'Mean Return': scenario_portfolio_returns.mean(),
'Volatility': scenario_portfolio_returns.std(),
'VaR 95%': np.percentile(scenario_portfolio_returns, 5),
'CVaR 95%': scenario_portfolio_returns[scenario_portfolio_returns <= np.percentile(scenario_portfolio_returns, 5)].mean()
}
return pd.DataFrame(scenario_results).T
# Define scenarios
scenarios = {
'Base Case': {asset: 0 for asset in returns.columns},
'Market Crash': {asset: -0.3 for asset in returns.columns},
'Economic Boom': {asset: 0.2 for asset in returns.columns},
'Rising Interest Rates': {'US Aggregate Bonds': -0.1, 'US Treasury Bonds': -0.15},
'Commodity Boom': {'Gold': 0.25, 'Commodities': 0.3}
}
# Check if returns DataFrame is not empty before proceeding
if not returns.empty:
# Assuming optimal_weights is defined and returns is your DataFrame of returns
optimal_weights = [0.2] * len(returns.columns) # Example weights; adjust as necessary
# Perform scenario analysis
scenario_results = scenario_analysis(optimal_weights, returns, scenarios)
print(scenario_results)
# Visualize scenario analysis results
plt.figure(figsize=(12, 6))
scenario_results[['Mean Return', 'Volatility']].plot(kind='bar')
plt.title('Scenario Analysis: Mean Return and Volatility')
plt.xlabel('Scenario')
plt.ylabel('Value')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
else:
print("Returns data is not available for calculations.")
Performance attribution helps us understand which assets contributed most to our portfolio's performance. Let's implement a simple performance attribution analysis for our optimal portfolio.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io
import base64
from IPython.display import HTML
def fig_to_base64(fig):
buf = io.BytesIO()
fig.savefig(buf, format='png')
buf.seek(0)
return base64.b64encode(buf.getvalue()).decode('utf-8')
def performance_attribution(weights, returns):
# Calculate portfolio return
portfolio_return = np.sum(returns.mean() * weights) * 252 # Annualized return
# Calculate asset contribution to returns
asset_contribution = returns.mean() * weights * 252 # Annualized contribution
percent_contribution = asset_contribution / portfolio_return # Contribution percentage
# Create DataFrame for attribution data
attribution_data = pd.DataFrame({
'Weight': weights,
'Return': returns.mean() * 252,
'Contribution': asset_contribution,
'Percent Contribution': percent_contribution
})
return attribution_data.sort_values('Percent Contribution', ascending=False)
# Check if returns DataFrame is not empty before proceeding
if not returns.empty and optimal_weights is not None:
# Calculate performance attribution
attribution = performance_attribution(optimal_weights, returns)
print(attribution)
# Visualize performance attribution
fig, ax = plt.subplots(figsize=(12, 6))
attribution['Percent Contribution'].plot(kind='bar', ax=ax)
ax.set_title('Performance Attribution of Optimal Portfolio')
ax.set_xlabel('Asset')
ax.set_ylabel('Percent Contribution to Return')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
# Convert plot to base64
plot_base64 = fig_to_base64(fig)
plt.close(fig)
# Display the plot
display(HTML(f''))
# Store base64 string in a variable (optional, for debugging or later use)
plot_data = f'"image/png": "{plot_base64[:50]}..."'
print("Plot data (preview):", plot_data)
else:
print("Returns data or optimal weights are not available for calculations.")
Now, let's expand on our previous scenario analysis to include more detailed stress testing. We'll define several economic scenarios and see how our portfolio performs under each.
def detailed_scenario_analysis(weights, returns, scenarios):
portfolio_returns = np.sum(returns * weights, axis=1)
base_performance = {
'Mean Return': portfolio_returns.mean() * 252,
'Volatility': portfolio_returns.std() * np.sqrt(252),
'Sharpe Ratio': (portfolio_returns.mean() * 252 - risk_free_rate) / (portfolio_returns.std() * np.sqrt(252)),
'VaR 95%': np.percentile(portfolio_returns, 5) * np.sqrt(252),
'CVaR 95%': portfolio_returns[portfolio_returns <= np.percentile(portfolio_returns, 5)].mean() * np.sqrt(252),
'Max Drawdown': (portfolio_returns.cumsum() - portfolio_returns.cumsum().cummax()).min()
}
scenario_results = {'Base Case': base_performance}
for scenario, shocks in scenarios.items():
shocked_returns = returns.copy()
for asset, shock in shocks.items():
shocked_returns[asset] += shock
scenario_portfolio_returns = np.sum(shocked_returns * weights, axis=1)
scenario_results[scenario] = {
'Mean Return': scenario_portfolio_returns.mean() * 252,
'Volatility': scenario_portfolio_returns.std() * np.sqrt(252),
'Sharpe Ratio': (scenario_portfolio_returns.mean() * 252 - risk_free_rate) / (scenario_portfolio_returns.std() * np.sqrt(252)),
'VaR 95%': np.percentile(scenario_portfolio_returns, 5) * np.sqrt(252),
'CVaR 95%': scenario_portfolio_returns[scenario_portfolio_returns <= np.percentile(scenario_portfolio_returns, 5)].mean() * np.sqrt(252),
'Max Drawdown': (scenario_portfolio_returns.cumsum() - scenario_portfolio_returns.cumsum().cummax()).min()
}
return pd.DataFrame(scenario_results).T
# Define more detailed scenarios
detailed_scenarios = {
'Market Crash': {'US Large Cap': -0.4, 'US Small Cap': -0.5, 'International Developed': -0.35, 'Emerging Markets': -0.45, 'US Aggregate Bonds': 0.05, 'US Treasury Bonds': 0.1, 'Real Estate': -0.3, 'Gold': 0.15, 'Commodities': -0.25},
'Economic Boom': {'US Large Cap': 0.25, 'US Small Cap': 0.3, 'International Developed': 0.2, 'Emerging Markets': 0.35, 'US Aggregate Bonds': -0.05, 'US Treasury Bonds': -0.1, 'Real Estate': 0.2, 'Gold': -0.1, 'Commodities': 0.15},
'Rising Interest Rates': {'US Large Cap': -0.1, 'US Small Cap': -0.15, 'International Developed': -0.05, 'Emerging Markets': -0.1, 'US Aggregate Bonds': -0.2, 'US Treasury Bonds': -0.25, 'Real Estate': -0.15, 'Gold': -0.05, 'Commodities': 0.05},
'Geopolitical Tension': {'US Large Cap': -0.15, 'US Small Cap': -0.2, 'International Developed': -0.25, 'Emerging Markets': -0.3, 'US Aggregate Bonds': 0.1, 'US Treasury Bonds': 0.15, 'Real Estate': -0.1, 'Gold': 0.25, 'Commodities': 0.2},
'Tech Boom': {'US Large Cap': 0.3, 'US Small Cap': 0.35, 'International Developed': 0.2, 'Emerging Markets': 0.25, 'US Aggregate Bonds': -0.05, 'US Treasury Bonds': -0.1, 'Real Estate': 0.1, 'Gold': -0.05, 'Commodities': 0},
}
# Run detailed scenario analysis
detailed_scenario_results = detailed_scenario_analysis(optimal_weights, returns, detailed_scenarios)
print(detailed_scenario_results)
# Visualize scenario analysis results
plt.figure(figsize=(15, 10))
detailed_scenario_results[['Mean Return', 'Volatility', 'Sharpe Ratio', 'VaR 95%', 'CVaR 95%']].plot(kind='bar', subplots=True, layout=(3,2), sharex=False, figsize=(15,20))
plt.tight_layout()
plt.show()
# Visualize max drawdown for each scenario
plt.figure(figsize=(12, 6))
detailed_scenario_results['Max Drawdown'].plot(kind='bar')
plt.title('Max Drawdown Under Different Scenarios')
plt.xlabel('Scenario')
plt.ylabel('Max Drawdown')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()