quickstart/SemanticSegmentation/tutorial-runtime.ipynb
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
!python3 -c 'import tensorrt; print("TensorRT version: {}".format(tensorrt.__version__))'
!python3 /workspace/TensorRT/quickstart/SemanticSegmentation/export.py
!trtexec --onnx=fcn-resnet101.onnx --saveEngine=fcn-resnet101.engine --optShapes=input:1x3x1026x1282 --stronglyTyped
import numpy as np
import os
import ctypes
from cuda.bindings import runtime as cudart
import tensorrt as trt
import matplotlib.pyplot as plt
from PIL import Image
TRT_LOGGER = trt.Logger()
assert cudart.cudaSetDevice(0) == (cudart.cudaError_t.cudaSuccess,)
# Filenames of TensorRT plan file and input/output images.
engine_file = "/workspace/fcn-resnet101.engine"
input_file = "/workspace/input.ppm"
output_file = "/workspace/output.ppm"
# For torchvision models, input images are loaded in to a range of [0, 1] and
# normalized using mean = [0.485, 0.456, 0.406] and stddev = [0.229, 0.224, 0.225].
def preprocess(image):
# Mean normalization
mean = np.array([0.485, 0.456, 0.406]).astype('float32')
stddev = np.array([0.229, 0.224, 0.225]).astype('float32')
data = (np.asarray(image).astype('float32') / float(255.0) - mean) / stddev
# Switch from HWC to to CHW order
return np.moveaxis(data, 2, 0)
def postprocess(data):
num_classes = 21
# create a color palette, selecting a color for each class
palette = np.array([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
colors = np.array([palette*i%255 for i in range(num_classes)]).astype("uint8")
# plot the segmentation predictions for 21 classes in different colors
img = Image.fromarray(data.astype('uint8'), mode='P')
img.putpalette(colors)
return img
Deserialize the TensorRT engine from specified plan file.
def load_engine(engine_file_path):
assert os.path.exists(engine_file_path)
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
Starting with a deserialized engine, TensorRT inference pipeline consists of the following steps:
def infer(engine, input_file, output_file):
print("Reading input image from file {}".format(input_file))
with Image.open(input_file) as img:
input_image = preprocess(img)
image_width = img.width
image_height = img.height
with engine.create_execution_context() as context:
input_buffers = {}
input_memories = {}
# Allocate host and device buffers
tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
for tensor in tensor_names:
size = trt.volume(context.get_tensor_shape(tensor))
dtype = trt.nptype(engine.get_tensor_dtype(tensor))
if engine.get_tensor_mode(tensor) == trt.TensorIOMode.INPUT:
context.set_input_shape(tensor, (1, 3, image_height, image_width))
input_buffers[tensor] = np.ascontiguousarray(input_image)
err, input_memories[tensor] = cudart.cudaMalloc(input_image.nbytes)
assert err == cudart.cudaError_t.cudaSuccess
context.set_tensor_address(tensor, input_memories[tensor])
else:
err, output_buffer_ptr = cudart.cudaMallocHost(size * dtype().itemsize)
assert err == cudart.cudaError_t.cudaSuccess
pointer_type = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))
output_buffer = np.ctypeslib.as_array(ctypes.cast(output_buffer_ptr, pointer_type), (size,))
err, output_memory = cudart.cudaMalloc(output_buffer.nbytes)
assert err == cudart.cudaError_t.cudaSuccess
context.set_tensor_address(tensor, output_memory)
err, stream = cudart.cudaStreamCreate()
assert err == cudart.cudaError_t.cudaSuccess
# Transfer input data to the GPU for all input tensors
for tensor_name, input_buffer in input_buffers.items():
input_memory = input_memories[tensor_name]
err, = cudart.cudaMemcpyAsync(input_memory, input_buffer.ctypes.data, input_buffer.nbytes,
cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream)
assert err == cudart.cudaError_t.cudaSuccess
# Run inference
context.execute_async_v3(stream)
# Transfer prediction output from the GPU.
err, = cudart.cudaMemcpyAsync(output_buffer.ctypes.data, output_memory, output_buffer.nbytes,
cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream)
assert err == cudart.cudaError_t.cudaSuccess
# Synchronize the stream
err, = cudart.cudaStreamSynchronize(stream)
assert err == cudart.cudaError_t.cudaSuccess
output_d64 = np.array(output_buffer, dtype=np.int64)
np.savetxt('test.out', output_d64.astype(int), fmt='%i', delimiter=' ', newline=' ')
with postprocess(np.reshape(output_buffer, (image_height, image_width))) as img:
print("Writing output image to file {}".format(output_file))
img.convert('RGB').save(output_file, "PPM")
# cleanup cuda resources for all input tensors
for input_memory in input_memories.values():
cudart.cudaFree(input_memory)
cudart.cudaFree(output_memory)
cudart.cudaFreeHost(output_buffer_ptr)
cudart.cudaStreamDestroy(stream)
plt.imshow(Image.open(input_file))
print("Running TensorRT inference for FCN-ResNet101")
with load_engine(engine_file) as engine:
infer(engine, input_file, output_file)
plt.imshow(Image.open(output_file))