tutorials/measure_performance.md
see https://github.com/NVIDIA/TensorRT/tree/master/samples/sampleNMT for more detail.
// for rcnn, you can put these code into common.hpp
#include "logging.h" // rcnn/logging.h
static Logger gLogger{ Logger::Severity::kINFO };
static LogStreamConsumer gLogInfo{ LOG_INFO(gLogger) };
struct SimpleProfiler : public nvinfer1::IProfiler
{
struct Record
{
float time{ 0 };
int count{ 0 };
};
virtual void reportLayerTime(const char* layerName, float ms)
{
mProfile[layerName].count++;
mProfile[layerName].time += ms;
if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end())
{
mLayerNames.push_back(layerName);
}
}
SimpleProfiler(const char* name, const std::vector<SimpleProfiler>& srcProfilers = std::vector<SimpleProfiler>())
: mName(name)
{
for (const auto& srcProfiler : srcProfilers)
{
for (const auto& rec : srcProfiler.mProfile)
{
auto it = mProfile.find(rec.first);
if (it == mProfile.end())
{
mProfile.insert(rec);
}
else
{
it->second.time += rec.second.time;
it->second.count += rec.second.count;
}
}
}
}
friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value)
{
out << "========== " << value.mName << " profile ==========" << std::endl;
float totalTime = 0;
std::string layerNameStr = "TensorRT layer name";
int maxLayerNameLength = std::max(static_cast<int>(layerNameStr.size()), 70);
for (const auto& elem : value.mProfile)
{
totalTime += elem.second.time;
maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
}
auto old_settings = out.flags();
auto old_precision = out.precision();
// Output header
{
out << std::setw(maxLayerNameLength) << layerNameStr << " ";
out << std::setw(12) << "Runtime, "
<< "%"
<< " ";
out << std::setw(12) << "Invocations"
<< " ";
out << std::setw(12) << "Runtime, ms" << std::endl;
}
for (size_t i = 0; i < value.mLayerNames.size(); i++)
{
const std::string layerName = value.mLayerNames[i];
auto elem = value.mProfile.at(layerName);
out << std::setw(maxLayerNameLength) << layerName << " ";
out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%"
<< " ";
out << std::setw(12) << elem.count << " ";
out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl;
}
out.flags(old_settings);
out.precision(old_precision);
out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl;
return out;
}
private:
std::string mName;
std::vector<std::string> mLayerNames;
std::map<std::string, Record> mProfile;
};
// you'd better set name for every layers
// build engine
// build context
auto sp = SimpleProfiler("test");
context->setProfiler(&sp);
context->enqueue(...);
gLogInfo << sp << std::endl;