tools/experimental/trt-engine-explorer/notebooks/api-examples.ipynb
This notebook provides a set of TREx API examples.
import trex
from trex.notebook import *
engine_name = "../tests/inputs/mobilenet.qat.onnx.engine"
plan = trex.EnginePlan(f"{engine_name}.graph.json", f"{engine_name}.profile.json", f"{engine_name}.metadata.json")
k slowest layersList the k-slowest layers:
top3 = plan.df.nlargest(3, 'latency.pct_time')
for i in range(len(top3)):
layer = top3.iloc[i]
print("%s: %s" % (layer["Name"], layer["type"]))
Compute the latency of the top-3 slowest layers:
top3_latency = top3['latency.avg_time'].sum()
top3_percent = top3['latency.pct_time'].sum()
print(f"top3 latency: {top3_latency:.6f} ms ({top3_percent:.2f}%)")
ltype = "Convolution"
convs = plan.df.query(f"type == \"{ltype}\"")
print(f"There are {len(convs)} convolutions")
print(convs['latency.avg_time'].median())
convs2 = plan.get_layers_by_type('Convolution')
print(f"There are {len(convs2)} convolutions")
print(convs['latency.avg_time'].median())
There are several ways to access a layer's inputs and outputs
print(convs.iloc[0]['Inputs'])
clean_convs = trex.clean_df(convs2.copy(), inplace=True)
clean_convs.iloc[0]['Inputs']
As an Activation instance:
inputs, outputs = trex.create_activations(convs.iloc[0])
print(inputs[0].name)
print(inputs[0].shape)
print(inputs[0].precision)
print(inputs[0].format)
print(inputs[0].size_bytes)
# Group by type, and perform a sum reduction on the latency
plan.df.groupby(["type"])[["latency.avg_time", "latency.pct_time"]].sum()
# trex provides another way to do the same thing
trex.group_sum_attr(plan.df,"type", "latency.avg_time")
# Another trex convenience wrapper: group by 'type' and count the number of members in each group
trex.group_count(plan.df, "type")
# Display the dataframe of all INT8 convolutions with
convs_1x1_fp32 = convs_1x1[convs_1x1["Outputs"].str.startswith("FP32")]
display_df(convs_1x1_fp32)