Back to Ray

Load Test Results Analysis

doc/source/serve/tutorials/video-analysis/analyze_load_tests.ipynb

1.13.114.2 KB
Original Source

Load Test Results Analysis

This notebook analyzes the results from video processing load tests at different concurrency levels.

python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import re
from pathlib import Path

# Style configuration
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

# Color palette
COLORS = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c', '#f39c12', '#1abc9c']

Load and Process Data

python
def extract_concurrency(filename):
    """Extract concurrency level from filename like 'load_test_20260102_024910_2.csv'"""
    match = re.search(r'_(\d+)\.csv$', filename)
    return int(match.group(1)) if match else None

def load_all_results(pattern='load_test_*.csv', discard_first_half=True):
    """Load all load test CSV files and combine them
    
    Args:
        pattern: glob pattern to match files
        discard_first_half: if True, discard the first half of records from each file
                           (to exclude autoscaling warm-up period with high latencies)
    """
    all_files = glob.glob(pattern)
    print(f"Found {len(all_files)} load test files")
    
    dfs = []
    for file in sorted(all_files):
        concurrency = extract_concurrency(file)
        if concurrency:
            df = pd.read_csv(file)
            original_len = len(df)
            
            # Discard first half to exclude autoscaling warm-up
            if discard_first_half:
                df = df.iloc[len(df)//2:].reset_index(drop=True)
            
            df['concurrency'] = concurrency
            df['filename'] = Path(file).name
            dfs.append(df)
            print(f"  Loaded {file}: {len(df)}/{original_len} requests (kept 2nd half), concurrency={concurrency}")
    
    return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

# Load all data (discarding first half to exclude autoscaling warm-up)
df = load_all_results(discard_first_half=True)
print(f"\nTotal requests loaded: {len(df):,}")

python
# Data overview
print("Columns:", df.columns.tolist())
print("\nData types:")
print(df.dtypes)
print("\nConcurrency levels tested:", sorted(df['concurrency'].unique()))

python
df.head(10)

Summary Statistics by Concurrency

python
def compute_stats(group):
    """Compute summary statistics for a group of requests"""
    latency = group['latency_ms']
    duration = group['end_time'].max() - group['start_time'].min()
    
    return pd.Series({
        'total_requests': len(group),
        'success_count': group['success'].sum(),
        'success_rate_%': group['success'].mean() * 100,
        'latency_mean_ms': latency.mean(),
        'latency_std_ms': latency.std(),
        'latency_min_ms': latency.min(),
        'latency_p50_ms': latency.quantile(0.5),
        'latency_p90_ms': latency.quantile(0.9),
        'latency_p95_ms': latency.quantile(0.95),
        'latency_p99_ms': latency.quantile(0.99),
        'latency_max_ms': latency.max(),
        'test_duration_s': duration,
        'throughput_rps': len(group) / duration if duration > 0 else 0,
    })

# Compute statistics by concurrency
stats_by_concurrency = df.groupby('concurrency').apply(compute_stats).reset_index()
stats_by_concurrency = stats_by_concurrency.sort_values('concurrency')

# Display formatted
display_cols = ['concurrency', 'total_requests', 'success_rate_%', 
                'latency_mean_ms', 'latency_p50_ms', 'latency_p95_ms', 'latency_p99_ms',
                'throughput_rps']
stats_by_concurrency[display_cols].round(2)

Latency Distribution Analysis

python
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

concurrencies = sorted(df['concurrency'].unique())

# Box plot of latencies
ax1 = axes[0]
data_for_box = [df[df['concurrency'] == c]['latency_ms'].values for c in concurrencies]
bp = ax1.boxplot(data_for_box, labels=concurrencies, patch_artist=True)
for patch, color in zip(bp['boxes'], COLORS[:len(concurrencies)]):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
ax1.set_xlabel('Concurrency')
ax1.set_ylabel('Latency (ms)')
ax1.set_title('Latency Distribution by Concurrency')

# Latency percentiles line chart
ax2 = axes[1]
percentiles = ['latency_p50_ms', 'latency_p90_ms', 'latency_p95_ms', 'latency_p99_ms']
labels = ['P50', 'P90', 'P95', 'P99']
for i, (p, label) in enumerate(zip(percentiles, labels)):
    ax2.plot(stats_by_concurrency['concurrency'], stats_by_concurrency[p], 
             marker='o', label=label, color=COLORS[i], linewidth=2, markersize=8)
ax2.set_xlabel('Concurrency')
ax2.set_ylabel('Latency (ms)')
ax2.set_title('Latency Percentiles vs Concurrency')
ax2.legend()
ax2.set_xticks(concurrencies)

plt.tight_layout()
fig.savefig('assets/latency_distribution_by_concurrency.png', dpi=150, bbox_inches='tight')
plt.show()

python
# Histogram of latencies for each concurrency
n_concurrencies = len(concurrencies)
n_cols = min(3, n_concurrencies)
n_rows = (n_concurrencies + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
if n_concurrencies == 1:
    axes = np.array([axes])
axes = axes.flatten()

for i, (c, color) in enumerate(zip(concurrencies, COLORS)):
    ax = axes[i]
    data = df[df['concurrency'] == c]['latency_ms']
    ax.hist(data, bins=50, color=color, alpha=0.7, edgecolor='black', linewidth=0.5)
    ax.axvline(data.median(), color='red', linestyle='--', label=f'Median: {data.median():.0f}ms')
    ax.axvline(data.quantile(0.95), color='orange', linestyle='--', label=f'P95: {data.quantile(0.95):.0f}ms')
    ax.set_xlabel('Latency (ms)')
    ax.set_ylabel('Count')
    ax.set_title(f'Concurrency = {c}')
    ax.legend(fontsize=9)

# Hide unused subplots
for i in range(len(concurrencies), len(axes)):
    axes[i].set_visible(False)

plt.suptitle('Latency Distribution Histograms', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig('assets/latency_histograms.png', dpi=150, bbox_inches='tight')
plt.show()

Throughput Analysis

python
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Throughput vs Concurrency
ax1 = axes[0]
ax1.bar(stats_by_concurrency['concurrency'].astype(str), 
        stats_by_concurrency['throughput_rps'],
        color=COLORS[:len(concurrencies)], alpha=0.8, edgecolor='black')
ax1.set_xlabel('Concurrency')
ax1.set_ylabel('Throughput (requests/second)')
ax1.set_title('Throughput vs Concurrency')

# Add value labels on bars
for i, (c, v) in enumerate(zip(stats_by_concurrency['concurrency'], stats_by_concurrency['throughput_rps'])):
    ax1.text(i, v + 0.02 * max(stats_by_concurrency['throughput_rps']), 
             f'{v:.2f}', ha='center', va='bottom', fontsize=10)

# Latency vs Throughput tradeoff
ax2 = axes[1]
scatter = ax2.scatter(stats_by_concurrency['throughput_rps'], 
                      stats_by_concurrency['latency_p95_ms'],
                      c=stats_by_concurrency['concurrency'], 
                      s=150, cmap='viridis', edgecolors='black', linewidth=1)
for _, row in stats_by_concurrency.iterrows():
    ax2.annotate(f"C={int(row['concurrency'])}", 
                 (row['throughput_rps'], row['latency_p95_ms']),
                 textcoords="offset points", xytext=(5, 5), fontsize=10)
ax2.set_xlabel('Throughput (requests/second)')
ax2.set_ylabel('P95 Latency (ms)')
ax2.set_title('Latency vs Throughput Tradeoff')
plt.colorbar(scatter, ax=ax2, label='Concurrency')

plt.tight_layout()
fig.savefig('assets/throughput_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

Processing Time Breakdown

python
# Compute mean processing times by concurrency
time_components = ['s3_download_ms', 'decode_video_ms', 'encode_ms', 'decode_ms']
available_components = [c for c in time_components if c in df.columns]

time_breakdown = df.groupby('concurrency')[available_components].mean()
time_breakdown = time_breakdown.sort_index()
print("Mean Processing Time Breakdown (ms):")
time_breakdown.round(2)

python
# Stacked bar chart of processing time components
fig, ax = plt.subplots(figsize=(12, 6))

x = np.arange(len(time_breakdown))
width = 0.6
bottom = np.zeros(len(time_breakdown))

component_colors = {'s3_download_ms': '#3498db', 'decode_video_ms': '#2ecc71', 
                    'encode_ms': '#e74c3c', 'decode_ms': '#f39c12'}
component_labels = {'s3_download_ms': 'S3 Download', 'decode_video_ms': 'Video Decode', 
                    'encode_ms': 'Encode', 'decode_ms': 'Decode'}

for component in available_components:
    values = time_breakdown[component].values
    ax.bar(x, values, width, label=component_labels.get(component, component), 
           bottom=bottom, color=component_colors.get(component, '#95a5a6'), alpha=0.8)
    bottom += values

ax.set_xlabel('Concurrency')
ax.set_ylabel('Processing Time (ms)')
ax.set_title('Mean Processing Time Breakdown by Concurrency')
ax.set_xticks(x)
ax.set_xticklabels(time_breakdown.index)
ax.legend(loc='upper left')

# Add total time labels
for i, total in enumerate(bottom):
    ax.text(i, total + 20, f'{total:.0f}ms', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
fig.savefig('assets/processing_time_breakdown.png', dpi=150, bbox_inches='tight')
plt.show()

python
# Processing time component percentage breakdown
n_cols = min(3, len(concurrencies))
n_rows = (len(concurrencies) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 5*n_rows))
if len(concurrencies) == 1:
    axes = np.array([axes])
axes = axes.flatten()

for i, c in enumerate(sorted(df['concurrency'].unique())):
    ax = axes[i]
    data = time_breakdown.loc[c]
    colors = [component_colors.get(comp, '#95a5a6') for comp in available_components]
    labels = [component_labels.get(comp, comp) for comp in available_components]
    
    wedges, texts, autotexts = ax.pie(data, colors=colors, autopct='%1.1f%%', 
                                       startangle=90, pctdistance=0.75)
    ax.set_title(f'Concurrency = {c}')
    
for i in range(len(concurrencies), len(axes)):
    axes[i].set_visible(False)

# Add legend
labels = [component_labels.get(comp, comp) for comp in available_components]
fig.legend(labels, loc='lower center', ncol=len(available_components), bbox_to_anchor=(0.5, -0.02))

plt.suptitle('Processing Time Breakdown (%)', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig('assets/processing_time_pie_charts.png', dpi=150, bbox_inches='tight')
plt.show()

Request Timeline Analysis

python
# Time series of latency during the test
n_cols = min(3, len(concurrencies))
n_rows = (len(concurrencies) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
if len(concurrencies) == 1:
    axes = np.array([axes])
axes = axes.flatten()

for i, (c, color) in enumerate(zip(concurrencies, COLORS)):
    ax = axes[i]
    data = df[df['concurrency'] == c].copy()
    data['relative_time'] = data['start_time'] - data['start_time'].min()
    
    ax.scatter(data['relative_time'], data['latency_ms'], alpha=0.5, s=10, color=color)
    
    # Rolling average
    if len(data) > 20:
        rolling = data.sort_values('relative_time')['latency_ms'].rolling(window=20, min_periods=1).mean()
        ax.plot(data.sort_values('relative_time')['relative_time'], rolling, 
                color='red', linewidth=2, label='Rolling Avg (20)')
    
    ax.set_xlabel('Time (seconds)')
    ax.set_ylabel('Latency (ms)')
    ax.set_title(f'Concurrency = {c}')
    ax.legend()

for i in range(len(concurrencies), len(axes)):
    axes[i].set_visible(False)

plt.suptitle('Latency Over Time', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig('assets/latency_over_time.png', dpi=150, bbox_inches='tight')
plt.show()

Error Analysis

python
# Check for failed requests
failed = df[df['success'] == False]
print(f"Total failed requests: {len(failed)} ({len(failed)/len(df)*100:.2f}%)")

if len(failed) > 0:
    print("\nFailure breakdown by concurrency:")
    failure_stats = df.groupby('concurrency').agg(
        total=('success', 'count'),
        failures=('success', lambda x: (~x).sum()),
        failure_rate=('success', lambda x: (~x).mean() * 100)
    )
    print(failure_stats)
    
    if 'error' in failed.columns:
        print("\nError types:")
        print(failed['error'].value_counts())
    
    if 'status_code' in failed.columns:
        print("\nStatus codes for failures:")
        print(failed['status_code'].value_counts())
else:
    print("\n✅ All requests succeeded!")

Summary Report

python
print("=" * 70)
print("LOAD TEST SUMMARY REPORT")
print("=" * 70)

print(f"\n📊 Test Overview:")
print(f"   Total requests analyzed: {len(df):,}")
print(f"   Concurrency levels tested: {sorted(concurrencies)}")
print(f"   Overall success rate: {df['success'].mean()*100:.2f}%")

print(f"\n⚡ Performance Highlights:")
best_throughput = stats_by_concurrency.loc[stats_by_concurrency['throughput_rps'].idxmax()]
print(f"   Best throughput: {best_throughput['throughput_rps']:.2f} req/s at concurrency {int(best_throughput['concurrency'])}")

best_latency = stats_by_concurrency.loc[stats_by_concurrency['latency_p95_ms'].idxmin()]
print(f"   Best P95 latency: {best_latency['latency_p95_ms']:.0f}ms at concurrency {int(best_latency['concurrency'])}")

print(f"\n📈 Latency by Concurrency:")
for _, row in stats_by_concurrency.iterrows():
    print(f"   C={int(row['concurrency']):2d}: P50={row['latency_p50_ms']:7.0f}ms, "
          f"P95={row['latency_p95_ms']:7.0f}ms, P99={row['latency_p99_ms']:7.0f}ms, "
          f"Throughput={row['throughput_rps']:.2f} req/s")

print(f"\n🔧 Processing Time Breakdown (mean at best throughput concurrency):")
best_c = int(best_throughput['concurrency'])
for comp in available_components:
    mean_time = df[df['concurrency'] == best_c][comp].mean()
    print(f"   {component_labels.get(comp, comp)}: {mean_time:.2f}ms")

print("\n" + "=" * 70)

python
# Final comparison table
final_table = stats_by_concurrency[['concurrency', 'total_requests', 'success_rate_%',
                                     'latency_mean_ms', 'latency_p50_ms', 'latency_p95_ms', 
                                     'latency_p99_ms', 'throughput_rps']].copy()
final_table.columns = ['Concurrency', 'Requests', 'Success %', 
                       'Mean (ms)', 'P50 (ms)', 'P95 (ms)', 'P99 (ms)', 'Throughput (rps)']
final_table = final_table.round(2)
final_table