doc/source/serve/tutorials/video-analysis/analyze_load_tests.ipynb
This notebook analyzes the results from video processing load tests at different concurrency levels.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import re
from pathlib import Path
# Style configuration
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11
# Color palette
COLORS = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c', '#f39c12', '#1abc9c']
def extract_concurrency(filename):
"""Extract concurrency level from filename like 'load_test_20260102_024910_2.csv'"""
match = re.search(r'_(\d+)\.csv$', filename)
return int(match.group(1)) if match else None
def load_all_results(pattern='load_test_*.csv', discard_first_half=True):
"""Load all load test CSV files and combine them
Args:
pattern: glob pattern to match files
discard_first_half: if True, discard the first half of records from each file
(to exclude autoscaling warm-up period with high latencies)
"""
all_files = glob.glob(pattern)
print(f"Found {len(all_files)} load test files")
dfs = []
for file in sorted(all_files):
concurrency = extract_concurrency(file)
if concurrency:
df = pd.read_csv(file)
original_len = len(df)
# Discard first half to exclude autoscaling warm-up
if discard_first_half:
df = df.iloc[len(df)//2:].reset_index(drop=True)
df['concurrency'] = concurrency
df['filename'] = Path(file).name
dfs.append(df)
print(f" Loaded {file}: {len(df)}/{original_len} requests (kept 2nd half), concurrency={concurrency}")
return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
# Load all data (discarding first half to exclude autoscaling warm-up)
df = load_all_results(discard_first_half=True)
print(f"\nTotal requests loaded: {len(df):,}")
# Data overview
print("Columns:", df.columns.tolist())
print("\nData types:")
print(df.dtypes)
print("\nConcurrency levels tested:", sorted(df['concurrency'].unique()))
df.head(10)
def compute_stats(group):
"""Compute summary statistics for a group of requests"""
latency = group['latency_ms']
duration = group['end_time'].max() - group['start_time'].min()
return pd.Series({
'total_requests': len(group),
'success_count': group['success'].sum(),
'success_rate_%': group['success'].mean() * 100,
'latency_mean_ms': latency.mean(),
'latency_std_ms': latency.std(),
'latency_min_ms': latency.min(),
'latency_p50_ms': latency.quantile(0.5),
'latency_p90_ms': latency.quantile(0.9),
'latency_p95_ms': latency.quantile(0.95),
'latency_p99_ms': latency.quantile(0.99),
'latency_max_ms': latency.max(),
'test_duration_s': duration,
'throughput_rps': len(group) / duration if duration > 0 else 0,
})
# Compute statistics by concurrency
stats_by_concurrency = df.groupby('concurrency').apply(compute_stats).reset_index()
stats_by_concurrency = stats_by_concurrency.sort_values('concurrency')
# Display formatted
display_cols = ['concurrency', 'total_requests', 'success_rate_%',
'latency_mean_ms', 'latency_p50_ms', 'latency_p95_ms', 'latency_p99_ms',
'throughput_rps']
stats_by_concurrency[display_cols].round(2)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
concurrencies = sorted(df['concurrency'].unique())
# Box plot of latencies
ax1 = axes[0]
data_for_box = [df[df['concurrency'] == c]['latency_ms'].values for c in concurrencies]
bp = ax1.boxplot(data_for_box, labels=concurrencies, patch_artist=True)
for patch, color in zip(bp['boxes'], COLORS[:len(concurrencies)]):
patch.set_facecolor(color)
patch.set_alpha(0.7)
ax1.set_xlabel('Concurrency')
ax1.set_ylabel('Latency (ms)')
ax1.set_title('Latency Distribution by Concurrency')
# Latency percentiles line chart
ax2 = axes[1]
percentiles = ['latency_p50_ms', 'latency_p90_ms', 'latency_p95_ms', 'latency_p99_ms']
labels = ['P50', 'P90', 'P95', 'P99']
for i, (p, label) in enumerate(zip(percentiles, labels)):
ax2.plot(stats_by_concurrency['concurrency'], stats_by_concurrency[p],
marker='o', label=label, color=COLORS[i], linewidth=2, markersize=8)
ax2.set_xlabel('Concurrency')
ax2.set_ylabel('Latency (ms)')
ax2.set_title('Latency Percentiles vs Concurrency')
ax2.legend()
ax2.set_xticks(concurrencies)
plt.tight_layout()
fig.savefig('assets/latency_distribution_by_concurrency.png', dpi=150, bbox_inches='tight')
plt.show()
# Histogram of latencies for each concurrency
n_concurrencies = len(concurrencies)
n_cols = min(3, n_concurrencies)
n_rows = (n_concurrencies + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
if n_concurrencies == 1:
axes = np.array([axes])
axes = axes.flatten()
for i, (c, color) in enumerate(zip(concurrencies, COLORS)):
ax = axes[i]
data = df[df['concurrency'] == c]['latency_ms']
ax.hist(data, bins=50, color=color, alpha=0.7, edgecolor='black', linewidth=0.5)
ax.axvline(data.median(), color='red', linestyle='--', label=f'Median: {data.median():.0f}ms')
ax.axvline(data.quantile(0.95), color='orange', linestyle='--', label=f'P95: {data.quantile(0.95):.0f}ms')
ax.set_xlabel('Latency (ms)')
ax.set_ylabel('Count')
ax.set_title(f'Concurrency = {c}')
ax.legend(fontsize=9)
# Hide unused subplots
for i in range(len(concurrencies), len(axes)):
axes[i].set_visible(False)
plt.suptitle('Latency Distribution Histograms', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig('assets/latency_histograms.png', dpi=150, bbox_inches='tight')
plt.show()
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Throughput vs Concurrency
ax1 = axes[0]
ax1.bar(stats_by_concurrency['concurrency'].astype(str),
stats_by_concurrency['throughput_rps'],
color=COLORS[:len(concurrencies)], alpha=0.8, edgecolor='black')
ax1.set_xlabel('Concurrency')
ax1.set_ylabel('Throughput (requests/second)')
ax1.set_title('Throughput vs Concurrency')
# Add value labels on bars
for i, (c, v) in enumerate(zip(stats_by_concurrency['concurrency'], stats_by_concurrency['throughput_rps'])):
ax1.text(i, v + 0.02 * max(stats_by_concurrency['throughput_rps']),
f'{v:.2f}', ha='center', va='bottom', fontsize=10)
# Latency vs Throughput tradeoff
ax2 = axes[1]
scatter = ax2.scatter(stats_by_concurrency['throughput_rps'],
stats_by_concurrency['latency_p95_ms'],
c=stats_by_concurrency['concurrency'],
s=150, cmap='viridis', edgecolors='black', linewidth=1)
for _, row in stats_by_concurrency.iterrows():
ax2.annotate(f"C={int(row['concurrency'])}",
(row['throughput_rps'], row['latency_p95_ms']),
textcoords="offset points", xytext=(5, 5), fontsize=10)
ax2.set_xlabel('Throughput (requests/second)')
ax2.set_ylabel('P95 Latency (ms)')
ax2.set_title('Latency vs Throughput Tradeoff')
plt.colorbar(scatter, ax=ax2, label='Concurrency')
plt.tight_layout()
fig.savefig('assets/throughput_analysis.png', dpi=150, bbox_inches='tight')
plt.show()
# Compute mean processing times by concurrency
time_components = ['s3_download_ms', 'decode_video_ms', 'encode_ms', 'decode_ms']
available_components = [c for c in time_components if c in df.columns]
time_breakdown = df.groupby('concurrency')[available_components].mean()
time_breakdown = time_breakdown.sort_index()
print("Mean Processing Time Breakdown (ms):")
time_breakdown.round(2)
# Stacked bar chart of processing time components
fig, ax = plt.subplots(figsize=(12, 6))
x = np.arange(len(time_breakdown))
width = 0.6
bottom = np.zeros(len(time_breakdown))
component_colors = {'s3_download_ms': '#3498db', 'decode_video_ms': '#2ecc71',
'encode_ms': '#e74c3c', 'decode_ms': '#f39c12'}
component_labels = {'s3_download_ms': 'S3 Download', 'decode_video_ms': 'Video Decode',
'encode_ms': 'Encode', 'decode_ms': 'Decode'}
for component in available_components:
values = time_breakdown[component].values
ax.bar(x, values, width, label=component_labels.get(component, component),
bottom=bottom, color=component_colors.get(component, '#95a5a6'), alpha=0.8)
bottom += values
ax.set_xlabel('Concurrency')
ax.set_ylabel('Processing Time (ms)')
ax.set_title('Mean Processing Time Breakdown by Concurrency')
ax.set_xticks(x)
ax.set_xticklabels(time_breakdown.index)
ax.legend(loc='upper left')
# Add total time labels
for i, total in enumerate(bottom):
ax.text(i, total + 20, f'{total:.0f}ms', ha='center', va='bottom', fontsize=10)
plt.tight_layout()
fig.savefig('assets/processing_time_breakdown.png', dpi=150, bbox_inches='tight')
plt.show()
# Processing time component percentage breakdown
n_cols = min(3, len(concurrencies))
n_rows = (len(concurrencies) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 5*n_rows))
if len(concurrencies) == 1:
axes = np.array([axes])
axes = axes.flatten()
for i, c in enumerate(sorted(df['concurrency'].unique())):
ax = axes[i]
data = time_breakdown.loc[c]
colors = [component_colors.get(comp, '#95a5a6') for comp in available_components]
labels = [component_labels.get(comp, comp) for comp in available_components]
wedges, texts, autotexts = ax.pie(data, colors=colors, autopct='%1.1f%%',
startangle=90, pctdistance=0.75)
ax.set_title(f'Concurrency = {c}')
for i in range(len(concurrencies), len(axes)):
axes[i].set_visible(False)
# Add legend
labels = [component_labels.get(comp, comp) for comp in available_components]
fig.legend(labels, loc='lower center', ncol=len(available_components), bbox_to_anchor=(0.5, -0.02))
plt.suptitle('Processing Time Breakdown (%)', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig('assets/processing_time_pie_charts.png', dpi=150, bbox_inches='tight')
plt.show()
# Time series of latency during the test
n_cols = min(3, len(concurrencies))
n_rows = (len(concurrencies) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
if len(concurrencies) == 1:
axes = np.array([axes])
axes = axes.flatten()
for i, (c, color) in enumerate(zip(concurrencies, COLORS)):
ax = axes[i]
data = df[df['concurrency'] == c].copy()
data['relative_time'] = data['start_time'] - data['start_time'].min()
ax.scatter(data['relative_time'], data['latency_ms'], alpha=0.5, s=10, color=color)
# Rolling average
if len(data) > 20:
rolling = data.sort_values('relative_time')['latency_ms'].rolling(window=20, min_periods=1).mean()
ax.plot(data.sort_values('relative_time')['relative_time'], rolling,
color='red', linewidth=2, label='Rolling Avg (20)')
ax.set_xlabel('Time (seconds)')
ax.set_ylabel('Latency (ms)')
ax.set_title(f'Concurrency = {c}')
ax.legend()
for i in range(len(concurrencies), len(axes)):
axes[i].set_visible(False)
plt.suptitle('Latency Over Time', fontsize=14, y=1.02)
plt.tight_layout()
fig.savefig('assets/latency_over_time.png', dpi=150, bbox_inches='tight')
plt.show()
# Check for failed requests
failed = df[df['success'] == False]
print(f"Total failed requests: {len(failed)} ({len(failed)/len(df)*100:.2f}%)")
if len(failed) > 0:
print("\nFailure breakdown by concurrency:")
failure_stats = df.groupby('concurrency').agg(
total=('success', 'count'),
failures=('success', lambda x: (~x).sum()),
failure_rate=('success', lambda x: (~x).mean() * 100)
)
print(failure_stats)
if 'error' in failed.columns:
print("\nError types:")
print(failed['error'].value_counts())
if 'status_code' in failed.columns:
print("\nStatus codes for failures:")
print(failed['status_code'].value_counts())
else:
print("\n✅ All requests succeeded!")
print("=" * 70)
print("LOAD TEST SUMMARY REPORT")
print("=" * 70)
print(f"\n📊 Test Overview:")
print(f" Total requests analyzed: {len(df):,}")
print(f" Concurrency levels tested: {sorted(concurrencies)}")
print(f" Overall success rate: {df['success'].mean()*100:.2f}%")
print(f"\n⚡ Performance Highlights:")
best_throughput = stats_by_concurrency.loc[stats_by_concurrency['throughput_rps'].idxmax()]
print(f" Best throughput: {best_throughput['throughput_rps']:.2f} req/s at concurrency {int(best_throughput['concurrency'])}")
best_latency = stats_by_concurrency.loc[stats_by_concurrency['latency_p95_ms'].idxmin()]
print(f" Best P95 latency: {best_latency['latency_p95_ms']:.0f}ms at concurrency {int(best_latency['concurrency'])}")
print(f"\n📈 Latency by Concurrency:")
for _, row in stats_by_concurrency.iterrows():
print(f" C={int(row['concurrency']):2d}: P50={row['latency_p50_ms']:7.0f}ms, "
f"P95={row['latency_p95_ms']:7.0f}ms, P99={row['latency_p99_ms']:7.0f}ms, "
f"Throughput={row['throughput_rps']:.2f} req/s")
print(f"\n🔧 Processing Time Breakdown (mean at best throughput concurrency):")
best_c = int(best_throughput['concurrency'])
for comp in available_components:
mean_time = df[df['concurrency'] == best_c][comp].mean()
print(f" {component_labels.get(comp, comp)}: {mean_time:.2f}ms")
print("\n" + "=" * 70)
# Final comparison table
final_table = stats_by_concurrency[['concurrency', 'total_requests', 'success_rate_%',
'latency_mean_ms', 'latency_p50_ms', 'latency_p95_ms',
'latency_p99_ms', 'throughput_rps']].copy()
final_table.columns = ['Concurrency', 'Requests', 'Success %',
'Mean (ms)', 'P50 (ms)', 'P95 (ms)', 'P99 (ms)', 'Throughput (rps)']
final_table = final_table.round(2)
final_table