Back to Openbb

RiskReturnAnalysis

examples/riskReturnAnalysis.ipynb

4.7.020.2 KB
Original Source

Advanced Risk-Return Analysis and Portfolio Optimization with OpenBB

===========================================================

Description

This notebook demonstrates advanced techniques for risk-return analysis and portfolio optimization using OpenBB. We'll explore various asset classes, implement modern portfolio theory, and utilize OpenBB's extensive financial analysis capabilities. This comprehensive guide covers:

  1. Data collection and preprocessing
  2. Exploratory data analysis with visualizations
  3. Risk and return calculations
  4. Efficient frontier computation
  5. Portfolio optimization techniques
  6. Advanced risk metrics (VaR, CVaR)
  7. Performance attribution
  8. Scenario analysis and stress testing

By the end of this notebook, you'll have a deep understanding of how to use OpenBB for sophisticated financial analysis and portfolio management.

Author

Amit Kumar

If you are running this notebook in Colab, you can run the following command to install the OpenBB Platform:

python
!pip install openbb
python
!pip install openbb

Dependencies

To run this notebook, you'll need to install the following dependencies:

python
!pip install pandas numpy matplotlib seaborn scipy scikit-learn

Imports

python
# Import libraries
from openbb import obb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
python
# Set plotting style
plt.style.use('default')  # Use the default Matplotlib style
sns.set_theme(style="whitegrid")  # Set Seaborn style
sns.set_palette("viridis")

Login

python
from openbb import obb
obb.account.login(pat="Enter Your PAT Key")

1. Data Collection and Preprocessing

Let's collect historical data for a diverse set of asset classes:

python
import pandas as pd
from openbb import obb
from datetime import datetime, timedelta
import time

# Define asset classes and their tickers
assets = {
    'US Large Cap': 'SPY',
    'US Small Cap': 'IWM',
    'International Developed': 'EFA',
    'Emerging Markets': 'EEM',
    'US Aggregate Bonds': 'AGG',
    'US Treasury Bonds': 'TLT',
    'Real Estate': 'VNQ',
    'Gold': 'GLD',
    'Commodities': 'DBC'
}

# Set date range
end_date = datetime.now().date()
start_date = end_date - timedelta(days=365)  # Adjusted to 1 year for testing

# Function to fetch historical data with retries
def fetch_historical_data(ticker, retries=3):
    for attempt in range(retries):
        try:
            # Fetch historical data using the correct function and parameters
            historical_data = obb.equity.price.historical(ticker)

            # Convert the OBBject to a DataFrame using the to_dataframe() method
            df = historical_data.to_dataframe()

            # Check if DataFrame is empty and return it if not
            if df is not None and not df.empty:
                return df

        except Exception as e:
            print(f"Attempt {attempt + 1} failed for {ticker}: {str(e)}")
            time.sleep(2)  # Wait before retrying

    return None

# Fetch data and combine into a single DataFrame
combined_data = {}
for asset_name, ticker in assets.items():
    df = fetch_historical_data(ticker)

    if df is not None and not df.empty:
        # Store closing prices with asset name as key
        combined_data[asset_name] = df['close']
    else:
        print(f"No data returned for {asset_name} ({ticker}).")

# Combine all asset closing prices into a single DataFrame
df_combined = pd.DataFrame(combined_data)

# Check if we have any data
if df_combined.empty:
    raise ValueError("No data was successfully retrieved. Please check your tickers and date range.")

# Calculate daily returns with specified fill method to avoid FutureWarning
returns = df_combined.pct_change(fill_method=None).dropna()

# Display the combined DataFrame for better readability
print("Combined Closing Prices:")
print(df_combined.head())  # Display first few rows of closing prices

print("\nDaily Returns:")
print(returns.head())  # Display first few rows of returns


# Export the combined DataFrame to an Excel file
output_file = "financial_data.xlsx"
df_combined.to_excel(output_file)

print(f"Data exported successfully to {output_file}")

2. Exploratory Data Analysis

Let's visualize our data to better understand the relationships between different asset classes.

python
import io
import base64
from IPython.display import HTML

def fig_to_base64(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    return base64.b64encode(buf.getvalue()).decode('utf-8')

# Correlation heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(returns.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap of Asset Returns')
heatmap_base64 = fig_to_base64(plt.gcf())
plt.close()

# Cumulative returns plot
cumulative_returns = (1 + returns).cumprod()
plt.figure(figsize=(12, 6))
cumulative_returns.plot()
plt.title('Cumulative Returns of Asset Classes')
plt.ylabel('Cumulative Return')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
cumulative_returns_base64 = fig_to_base64(plt.gcf())
plt.close()

# Display images
display(HTML(f''))
display(HTML(f''))

# Store base64 strings in variables (optional, for debugging or later use)
heatmap_data = f'"image/png": "{heatmap_base64[:50]}..."'
cumulative_returns_data = f'"image/png": "{cumulative_returns_base64[:50]}..."'

print("Heatmap data (preview):", heatmap_data)
print("Cumulative returns data (preview):", cumulative_returns_data)

3. Risk and Return Calculations

Now let's calculate key risk and return metrics for each asset class.

python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Check if returns DataFrame is not empty before calculations
if not returns.empty:
    # Calculate annualized returns and volatility
    annual_returns = returns.mean() * 252
    annual_volatility = returns.std() * np.sqrt(252)

    # Calculate Sharpe Ratio (assuming risk-free rate of 2%)
    risk_free_rate = 0.02
    sharpe_ratio = (annual_returns - risk_free_rate) / annual_volatility

    # Combine metrics into a DataFrame
    risk_return_metrics = pd.DataFrame({
        'Return': annual_returns,
        'Volatility': annual_volatility,
        'Sharpe Ratio': sharpe_ratio
    })

    # Sort and display the metrics
    print(risk_return_metrics.sort_values('Sharpe Ratio', ascending=False))

    # Visualize risk-return tradeoff
    plt.figure(figsize=(12, 8))
    sns.scatterplot(data=risk_return_metrics, x='Volatility', y='Return', size='Sharpe Ratio',
                    sizes=(50, 500), legend='brief', hue='Sharpe Ratio', palette='viridis')

    # Annotate points with asset names
    for i, asset in enumerate(risk_return_metrics.index):
        plt.annotate(asset,
                     (risk_return_metrics['Volatility'][i], risk_return_metrics['Return'][i]),
                     xytext=(5, 5), textcoords='offset points')

    plt.title('Risk-Return Tradeoff of Various Asset Classes')
    plt.xlabel('Risk (Annualized Volatility)')
    plt.ylabel('Return (Annualized)')
    plt.tight_layout()
    plt.show()
else:
    print("No return data available for calculations.")

4. Efficient Frontier Computation

Let's compute the efficient frontier to understand the optimal risk-return tradeoffs.

python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize

def portfolio_performance(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252  # Annualized return
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))  # Annualized volatility
    return portfolio_return, portfolio_volatility

def negative_sharpe_ratio(weights, returns, risk_free_rate):
    p_return, p_volatility = portfolio_performance(weights, returns)
    return -(p_return - risk_free_rate) / p_volatility  # Negative Sharpe Ratio for minimization

def calculate_efficient_frontier(returns, num_portfolios=1000):
    num_assets = len(returns.columns)
    results = np.zeros((3, num_portfolios))

    for i in range(num_portfolios):
        weights = np.random.random(num_assets)
        weights /= np.sum(weights)  # Normalize weights
        p_return, p_volatility = portfolio_performance(weights, returns)
        results[0,i] = p_return
        results[1,i] = p_volatility
        results[2,i] = p_return / p_volatility  # Sharpe Ratio

    return results.T

# Calculate efficient frontier
efficient_frontier = calculate_efficient_frontier(returns)

# Find the optimal portfolio (maximum Sharpe ratio)
num_assets = len(returns.columns)
risk_free_rate = 0.02  # Define risk-free rate if not already defined
args = (returns, risk_free_rate)
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})  # Weights sum to 1
bound = (0.0, 1.0)
bounds = tuple(bound for asset in range(num_assets))

result = minimize(negative_sharpe_ratio, num_assets*[1./num_assets], args=args,
                  method='SLSQP', bounds=bounds, constraints=constraints)

optimal_weights = result.x
optimal_return, optimal_volatility = portfolio_performance(optimal_weights, returns)

# Plot efficient frontier
plt.figure(figsize=(12, 8))
plt.scatter(efficient_frontier[:,1], efficient_frontier[:,0], c=efficient_frontier[:,2], cmap='viridis')
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility')
plt.ylabel('Return')
plt.title('Efficient Frontier')

# Plot individual assets
for i, asset in enumerate(returns.columns):
    plt.scatter(risk_return_metrics.loc[asset, 'Volatility'],
                risk_return_metrics.loc[asset, 'Return'],
                marker='o', s=200, label=asset)

# Plot optimal portfolio
plt.scatter(optimal_volatility, optimal_return, c='red', s=200, marker='*', label='Optimal Portfolio')

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

print("Optimal Portfolio Weights:")
for asset, weight in zip(returns.columns, optimal_weights):
    print(f"{asset}: {weight:.4f}")

5. Advanced Risk Metrics

Let's calculate Value at Risk (VaR) and Conditional Value at Risk (CVaR) for our optimal portfolio.

python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def calculate_var_cvar(returns, weights, confidence_level=0.95):
    # Calculate portfolio returns
    portfolio_returns = np.sum(returns * weights, axis=1)

    # Calculate VaR
    var = np.percentile(portfolio_returns, 100 * (1 - confidence_level))

    # Calculate CVaR
    cvar = portfolio_returns[portfolio_returns <= var].mean()

    return var, cvar

# Check if returns DataFrame is not empty before proceeding
if not returns.empty and optimal_weights is not None:
    # Calculate optimal portfolio returns
    optimal_portfolio_returns = np.sum(returns * optimal_weights, axis=1)

    # Calculate 95% VaR and CVaR
    var_95, cvar_95 = calculate_var_cvar(returns, optimal_weights)

    # Print results
    print(f"95% VaR: {var_95:.4f}")
    print(f"95% CVaR: {cvar_95:.4f}")

    # Visualize VaR and CVaR
    plt.figure(figsize=(12, 6))
    sns.histplot(optimal_portfolio_returns, kde=True)
    plt.axvline(var_95, color='r', linestyle='dashed', label='95% VaR')
    plt.axvline(cvar_95, color='g', linestyle='dashed', label='95% CVaR')
    plt.title('Distribution of Optimal Portfolio Returns with VaR and CVaR')
    plt.xlabel('Daily Return')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()
else:
    print("Returns data or optimal weights are not available for calculations.")

6. Scenario Analysis and Stress Testing

Let's perform a simple scenario analysis to see how our optimal portfolio would perform under different market conditions.

python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def scenario_analysis(weights, returns, scenarios):
    # Calculate portfolio returns based on weights
    portfolio_returns = np.sum(returns * weights, axis=1)
    scenario_results = {}

    for scenario, shock in scenarios.items():
        # Convert shock dictionary to a Series
        shock_series = pd.Series(shock)
        shocked_returns = returns + shock_series

        # Calculate scenario portfolio returns
        scenario_portfolio_returns = np.sum(shocked_returns * weights, axis=1)

        # Store results for each scenario
        scenario_results[scenario] = {
            'Mean Return': scenario_portfolio_returns.mean(),
            'Volatility': scenario_portfolio_returns.std(),
            'VaR 95%': np.percentile(scenario_portfolio_returns, 5),
            'CVaR 95%': scenario_portfolio_returns[scenario_portfolio_returns <= np.percentile(scenario_portfolio_returns, 5)].mean()
        }

    return pd.DataFrame(scenario_results).T

# Define scenarios
scenarios = {
    'Base Case': {asset: 0 for asset in returns.columns},
    'Market Crash': {asset: -0.3 for asset in returns.columns},
    'Economic Boom': {asset: 0.2 for asset in returns.columns},
    'Rising Interest Rates': {'US Aggregate Bonds': -0.1, 'US Treasury Bonds': -0.15},
    'Commodity Boom': {'Gold': 0.25, 'Commodities': 0.3}
}

# Check if returns DataFrame is not empty before proceeding
if not returns.empty:
    # Assuming optimal_weights is defined and returns is your DataFrame of returns
    optimal_weights = [0.2] * len(returns.columns)  # Example weights; adjust as necessary

    # Perform scenario analysis
    scenario_results = scenario_analysis(optimal_weights, returns, scenarios)
    print(scenario_results)

    # Visualize scenario analysis results
    plt.figure(figsize=(12, 6))
    scenario_results[['Mean Return', 'Volatility']].plot(kind='bar')
    plt.title('Scenario Analysis: Mean Return and Volatility')
    plt.xlabel('Scenario')
    plt.ylabel('Value')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()
else:
    print("Returns data is not available for calculations.")

7. Performance Attribution

Performance attribution helps us understand which assets contributed most to our portfolio's performance. Let's implement a simple performance attribution analysis for our optimal portfolio.

python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io
import base64
from IPython.display import HTML

def fig_to_base64(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    return base64.b64encode(buf.getvalue()).decode('utf-8')

def performance_attribution(weights, returns):
    # Calculate portfolio return
    portfolio_return = np.sum(returns.mean() * weights) * 252  # Annualized return

    # Calculate asset contribution to returns
    asset_contribution = returns.mean() * weights * 252  # Annualized contribution
    percent_contribution = asset_contribution / portfolio_return  # Contribution percentage

    # Create DataFrame for attribution data
    attribution_data = pd.DataFrame({
        'Weight': weights,
        'Return': returns.mean() * 252,
        'Contribution': asset_contribution,
        'Percent Contribution': percent_contribution
    })
    return attribution_data.sort_values('Percent Contribution', ascending=False)

# Check if returns DataFrame is not empty before proceeding
if not returns.empty and optimal_weights is not None:
    # Calculate performance attribution
    attribution = performance_attribution(optimal_weights, returns)
    print(attribution)

    # Visualize performance attribution
    fig, ax = plt.subplots(figsize=(12, 6))
    attribution['Percent Contribution'].plot(kind='bar', ax=ax)
    ax.set_title('Performance Attribution of Optimal Portfolio')
    ax.set_xlabel('Asset')
    ax.set_ylabel('Percent Contribution to Return')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    # Convert plot to base64
    plot_base64 = fig_to_base64(fig)
    plt.close(fig)

    # Display the plot
    display(HTML(f''))

    # Store base64 string in a variable (optional, for debugging or later use)
    plot_data = f'"image/png": "{plot_base64[:50]}..."'
    print("Plot data (preview):", plot_data)
else:
    print("Returns data or optimal weights are not available for calculations.")

8. Scenario Analysis and Stress Testing

Now, let's expand on our previous scenario analysis to include more detailed stress testing. We'll define several economic scenarios and see how our portfolio performs under each.

python
def detailed_scenario_analysis(weights, returns, scenarios):
    portfolio_returns = np.sum(returns * weights, axis=1)
    base_performance = {
        'Mean Return': portfolio_returns.mean() * 252,
        'Volatility': portfolio_returns.std() * np.sqrt(252),
        'Sharpe Ratio': (portfolio_returns.mean() * 252 - risk_free_rate) / (portfolio_returns.std() * np.sqrt(252)),
        'VaR 95%': np.percentile(portfolio_returns, 5) * np.sqrt(252),
        'CVaR 95%': portfolio_returns[portfolio_returns <= np.percentile(portfolio_returns, 5)].mean() * np.sqrt(252),
        'Max Drawdown': (portfolio_returns.cumsum() - portfolio_returns.cumsum().cummax()).min()
    }

    scenario_results = {'Base Case': base_performance}

    for scenario, shocks in scenarios.items():
        shocked_returns = returns.copy()
        for asset, shock in shocks.items():
            shocked_returns[asset] += shock

        scenario_portfolio_returns = np.sum(shocked_returns * weights, axis=1)
        scenario_results[scenario] = {
            'Mean Return': scenario_portfolio_returns.mean() * 252,
            'Volatility': scenario_portfolio_returns.std() * np.sqrt(252),
            'Sharpe Ratio': (scenario_portfolio_returns.mean() * 252 - risk_free_rate) / (scenario_portfolio_returns.std() * np.sqrt(252)),
            'VaR 95%': np.percentile(scenario_portfolio_returns, 5) * np.sqrt(252),
            'CVaR 95%': scenario_portfolio_returns[scenario_portfolio_returns <= np.percentile(scenario_portfolio_returns, 5)].mean() * np.sqrt(252),
            'Max Drawdown': (scenario_portfolio_returns.cumsum() - scenario_portfolio_returns.cumsum().cummax()).min()
        }

    return pd.DataFrame(scenario_results).T

# Define more detailed scenarios
detailed_scenarios = {
    'Market Crash': {'US Large Cap': -0.4, 'US Small Cap': -0.5, 'International Developed': -0.35, 'Emerging Markets': -0.45, 'US Aggregate Bonds': 0.05, 'US Treasury Bonds': 0.1, 'Real Estate': -0.3, 'Gold': 0.15, 'Commodities': -0.25},
    'Economic Boom': {'US Large Cap': 0.25, 'US Small Cap': 0.3, 'International Developed': 0.2, 'Emerging Markets': 0.35, 'US Aggregate Bonds': -0.05, 'US Treasury Bonds': -0.1, 'Real Estate': 0.2, 'Gold': -0.1, 'Commodities': 0.15},
    'Rising Interest Rates': {'US Large Cap': -0.1, 'US Small Cap': -0.15, 'International Developed': -0.05, 'Emerging Markets': -0.1, 'US Aggregate Bonds': -0.2, 'US Treasury Bonds': -0.25, 'Real Estate': -0.15, 'Gold': -0.05, 'Commodities': 0.05},
    'Geopolitical Tension': {'US Large Cap': -0.15, 'US Small Cap': -0.2, 'International Developed': -0.25, 'Emerging Markets': -0.3, 'US Aggregate Bonds': 0.1, 'US Treasury Bonds': 0.15, 'Real Estate': -0.1, 'Gold': 0.25, 'Commodities': 0.2},
    'Tech Boom': {'US Large Cap': 0.3, 'US Small Cap': 0.35, 'International Developed': 0.2, 'Emerging Markets': 0.25, 'US Aggregate Bonds': -0.05, 'US Treasury Bonds': -0.1, 'Real Estate': 0.1, 'Gold': -0.05, 'Commodities': 0},
}

# Run detailed scenario analysis
detailed_scenario_results = detailed_scenario_analysis(optimal_weights, returns, detailed_scenarios)
print(detailed_scenario_results)

# Visualize scenario analysis results
plt.figure(figsize=(15, 10))
detailed_scenario_results[['Mean Return', 'Volatility', 'Sharpe Ratio', 'VaR 95%', 'CVaR 95%']].plot(kind='bar', subplots=True, layout=(3,2), sharex=False, figsize=(15,20))
plt.tight_layout()
plt.show()

# Visualize max drawdown for each scenario
plt.figure(figsize=(12, 6))
detailed_scenario_results['Max Drawdown'].plot(kind='bar')
plt.title('Max Drawdown Under Different Scenarios')
plt.xlabel('Scenario')
plt.ylabel('Max Drawdown')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()