docs/articles/guides/how-it-works.md
BenchmarkDotNet follows the following steps to run your benchmarks:
BenchmarkRunner generates an isolated project per each runtime settings and builds it in Release mode.LaunchCount).IterationSetup method, it will be invoked before each iteration,
but not between operations. We have the following type of iterations:
Jitting: The overhead/workload methods are invoked to ensure they are JIT-compiled (and on tiered runtimes, to promote them when possible). These iterations are not used for measurements.Pilot: The best operation count will be chosen.OverheadWarmup, OverheadWorkload: BenchmarkDotNet overhead will be evaluated.ActualWarmup: Warmup of the workload method.ActualWorkload: Actual measurements.Result = ActualWorkload - <MedianOverhead>Summary class that contains all information about benchmark runs.If you don't understand our "count terminology", then you might find following pseudocode useful:
IEnumerable<Results> Run(Benchmark benchmark)
{
var toolchain = benchmark.GetToolchain();
var autoGeneratedProject = toolchain.Generate(benchmark);
var exe = toolchain.Build(autoGeneratedProject);
foreach (var runIndex in LaunchCount) // LaunchCount = 1 by default
yield return ParseResults(Process.Start(exe).Output); // calls ActualRun in a separate process
}
Result ActualRun(Method method, Job job)
{
GlobalSetup();
JittingStage(method); // triggers JIT compilation (and tiering if enabled) before Pilot/Warmup
int unrollFactor = job.Run.UnrollFactor; // 16 by default
long perfectInvocationCount = Pilot(method, unrollFactor);
WarmupStage(EMPTY_METHOD, perfectInvocationCount, unrollFactor); // EMPTY_METHOD has same return type and arguments as benchmark
var overhead = ActualStage(EMPTY_METHOD, perfectInvocationCount, unrollFactor);
WarmupStage(method, perfectInvocationCount, unrollFactor);
var result = ActualStage(method, perfectInvocationCount);
if (MemoryDiagnoser.IsEnabled)
var gcStats = MeasureGcStats(method, perfectInvocationCount, unrollFactor);
GlobalCleanup();
return (result - Median(overhead), gcStats);
}
void JittingStage(Method method)
{
RunIteration(method, invokeCount: 1, unrollFactor: 1);
if (JitInfo.IsTiered)
{
for (int i = 0; i < JitInfo.MaxTierPromotions; i++)
{
RunIteration(method, invokeCount: JitInfo.TieredCallCountThreshold, unrollFactor: 1);
Thread.Sleep(250);
}
}
}
long Pilot(Method method, int unrollFactor)
{
// invokeCount is the equivalent of InnerIterationCount from xunit-performance
long invokeCount = minInvokeCount;
while (true)
{
var measurement = RunIteration(method, invokeCount, unrollFactor);
if (heuristic.IsPilotRequirementMet(measurement))
break;
invokeCount *= 2;
}
return invokeCount;
}
void Warmup(Method method, long invokeCount, int unrollFactor)
{
while (true)
{
var measurement = RunIteration(method, invokeCount, unrollFactor);
if (heuristic.IsWarmupRequirementMet(measurement))
break;
}
}
IEnuberable<Measurement> Workload(Method method, long invokeCount, int unrollFactor)
{
while (true)
{
var measurement = RunIteration(method, invokeCount, unrollFactor);
if (measurement.IsNotOutlier)
yield return measurement;
if (heuristic.IsWorkloadRequirementMet(measurement))
yield break;
}
}
// every iteration invokes the method (invokeCount / unrollFactor) times
Measurement RunIteration(Method method, long invokeCount, long unrollFactor)
{
IterationSetup();
MemoryCleanup();
var clock = Clock.Start();
for (long i = 0; i < invokeCount / unrollFactor; i++)
{
// we perform manual loop unrolling!!
method(); // 1st call
method(); // 2nd call
method(); // (unrollFactor - 1)'th call
method(); // unrollFactor'th call
}
var clockSpan = clock.GetElapsed();
IterationCleanup();
MemoryCleanup();
return Measurement(clockSpan);
}
GcStats MeasureGcStats(Method method, long invokeCount, long unrollFacto)
{
// we enable monitoring after workload actual run, for this single iteration which is executed at the end
// so even if we enable AppDomain monitoring in separate process
// it does not matter, because we have already obtained the results!
EnableMonitoring();
IterationSetup();
var initialGcStats = GcStats.ReadInitial();
// we do NOT start any clock here, because the enabled monitoring might have some overhead
// so we just get the gc stats and ignore the timing
// it's last thing the process does before it dies, so also enabled monitoring is not an issue for next benchmarks
// because each of them is going to be executed in a new process
for (long i = 0; i < invokeCount / unrollFactor; i++)
{
// we perform manual loop unrolling!!
method(); // 1st call
method(); // 2nd call
method(); // (unrollFactor - 1)'th call
method(); // unrollFactor'th call
}
var finalGcStats = GcStats.ReadFinal();
IterationCleanup();
return finalGcStats - initialGcStats; // the result is the difference between the stats collected after and before running the extra iteration
}