Back to Jetson Inference

Jetson Inference: jetson

docs/html/tensorNet_8h_source.html

latest73.1 KB
Original Source

| | Jetson Inference

DNN Vision Library |

tensorNet.h

Go to the documentation of this file.

1 /*

2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.

3 *

4 * Permission is hereby granted, free of charge, to any person obtaining a

5 * copy of this software and associated documentation files (the "Software"),

6 * to deal in the Software without restriction, including without limitation

7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,

8 * and/or sell copies of the Software, and to permit persons to whom the

9 * Software is furnished to do so, subject to the following conditions:

10 *

11 * The above copyright notice and this permission notice shall be included in

12 * all copies or substantial portions of the Software.

13 *

14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

20 * DEALINGS IN THE SOFTWARE.

21 */

22

23 #ifndef __TENSOR_NET_H__

24 #define __TENSOR_NET_H__

25

26 // forward declaration of IInt8Calibrator

27 namespace nvinfer1 { class IInt8Calibrator; }

28

29 // includes

30 #include <NvInfer.h>

31

32 #include <jetson-utils/cudaUtility.h>

33 #include <jetson-utils/commandLine.h>

34 #include <jetson-utils/imageFormat.h>

35 #include <jetson-utils/timespec.h>

36 #include <jetson-utils/logging.h>

37

38 #include <vector>

39 #include <sstream>

40 #include <math.h>

41

42

43 #if NV_TENSORRT_MAJOR >= 6

44 typedef nvinfer1::Dims3 Dims3;

45

46 #define DIMS_C(x) x.d[0]

47 #define DIMS_H(x) x.d[1]

48 #define DIMS_W(x) x.d[2]

49

50 #elif NV_TENSORRT_MAJOR >= 2

51 typedef nvinfer1::DimsCHW Dims3;

52

53 #define DIMS_C(x) x.d[0]

54 #define DIMS_H(x) x.d[1]

55 #define DIMS_W(x) x.d[2]

56

57 #else

58 typedef nvinfer1::Dims3 Dims3;

59

60 #define DIMS_C(x) x.c

61 #define DIMS_H(x) x.h

62 #define DIMS_W(x) x.w

63

64 #ifndef NV_TENSORRT_MAJOR

65 #define NV_TENSORRT_MAJOR 1

66 #define NV_TENSORRT_MINOR 0

67 #endif

68 #endif

69

70 #if NV_TENSORRT_MAJOR >= 8

71 #define NOEXCEPT noexcept

72 #else

73 #define NOEXCEPT

74 #endif

75

76

82 #define TENSORRT_VERSION_CHECK(major, minor, patch) (NV_TENSORRT_MAJOR > major || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR > minor) || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && NV_TENSORRT_PATCH >= patch))

83

88 #define DEFAULT_MAX_BATCH_SIZE 1

89

94 #define LOG_TRT "[TRT] "

95

96

102 enum precisionType

103 {

104TYPE_DISABLED = 0,

105TYPE_FASTEST,

106TYPE_FP32,

107TYPE_FP16,

108TYPE_INT8,

109NUM_PRECISIONS

110 };

111

116 const char* precisionTypeToStr( precisionType type );

117

122 precisionType precisionTypeFromStr( const char* str );

123

129 enum deviceType

130 {

131DEVICE_GPU = 0,

132DEVICE_DLA,

133DEVICE_DLA_0 = DEVICE_DLA,

134DEVICE_DLA_1,

135NUM_DEVICES

136 };

137

142 const char* deviceTypeToStr( deviceType type );

143

148 deviceType deviceTypeFromStr( const char* str );

149

155 enum modelType

156 {

157MODEL_CUSTOM = 0,

158MODEL_CAFFE,

159MODEL_ONNX,

160MODEL_UFF,

161MODEL_ENGINE

162 };

163

168 const char* modelTypeToStr( modelType type );

169

174 modelType modelTypeFromStr( const char* str );

175

180 modelType modelTypeFromPath( const char* path );

181

187 enum profilerQuery

188 {

189PROFILER_PREPROCESS = 0,

190PROFILER_NETWORK,

191PROFILER_POSTPROCESS,

192PROFILER_VISUALIZE,

193PROFILER_TOTAL,

194 };

195

200 const char* profilerQueryToStr( profilerQuery query );

201

206 enum profilerDevice

207 {

208PROFILER_CPU = 0,

209PROFILER_CUDA,

210 };

211

212

218 class tensorNet

219 {

220 public:

224virtual ~tensorNet();

225

235bool LoadNetwork( const char* prototxt, const char* model, const char* mean=NULL,

236const char* input_blob="data", const char* output_blob="prob",

237 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,

238deviceType device=DEVICE_GPU, bool allowGPUFallback=true,

239 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );

240

250bool LoadNetwork( const char* prototxt, const char* model, const char* mean,

251const char* input_blob, const std::vector<std::string>& output_blobs,

252 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,

253deviceType device=DEVICE_GPU, bool allowGPUFallback=true,

254 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );

255

265bool LoadNetwork( const char* prototxt, const char* model, const char* mean,

266const std::vector<std::string>& input_blobs,

267const std::vector<std::string>& output_blobs,

268 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,

269precisionType precision=TYPE_FASTEST,

270deviceType device=DEVICE_GPU, bool allowGPUFallback=true,

271 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );

272

283bool LoadNetwork( const char* prototxt, const char* model, const char* mean,

284const char* input_blob, const Dims3& input_dims,

285const std::vector<std::string>& output_blobs,

286 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,

287precisionType precision=TYPE_FASTEST,

288deviceType device=DEVICE_GPU, bool allowGPUFallback=true,

289 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );

290

301bool LoadNetwork( const char* prototxt, const char* model, const char* mean,

302const std::vector<std::string>& input_blobs,

303const std::vector<Dims3>& input_dims,

304const std::vector<std::string>& output_blobs,

305 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,

306precisionType precision=TYPE_FASTEST,

307deviceType device=DEVICE_GPU, bool allowGPUFallback=true,

308 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );

309

316bool LoadEngine( const char* engine_filename,

317const std::vector<std::string>& input_blobs,

318const std::vector<std::string>& output_blobs,

319 nvinfer1::IPluginFactory* pluginFactory=NULL,

320deviceType device=DEVICE_GPU,

321 cudaStream_t stream=NULL );

322

330bool LoadEngine( char* engine_stream, size_t engine_size,

331const std::vector<std::string>& input_blobs,

332const std::vector<std::string>& output_blobs,

333 nvinfer1::IPluginFactory* pluginFactory=NULL,

334deviceType device=DEVICE_GPU,

335 cudaStream_t stream=NULL );

336

344bool LoadEngine( nvinfer1::ICudaEngine* engine,

345const std::vector<std::string>& input_blobs,

346const std::vector<std::string>& output_blobs,

347deviceType device=DEVICE_GPU,

348 cudaStream_t stream=NULL );

349

353bool LoadEngine( const char* filename, char** stream, size_t* size );

354

360static bool LoadClassLabels( const char* filename, std::vector<std::string>& descriptions, int expectedClasses=-1 );

361

367static bool LoadClassLabels( const char* filename, std::vector<std::string>& descriptions, std::vector<std::string>& synsets, int expectedClasses=-1 );

368

374static bool LoadClassColors( const char* filename, float4* colors, int expectedClasses, float defaultAlpha=255.0f );

375

381static bool LoadClassColors( const char* filename, float4** colors, int expectedClasses, float defaultAlpha=255.0f );

382

387static float4 GenerateColor( uint32_t classID, float alpha=255.0f );

388

392void EnableLayerProfiler();

393

397void EnableDebug();

398

402inline bool AllowGPUFallback() const { return mAllowGPUFallback; }

403

407inline deviceType GetDevice() const { return mDevice; }

408

412inline precisionType GetPrecision() const { return mPrecision; }

413

417inline bool IsPrecision( precisionType type ) const { return (mPrecision == type); }

418

422static precisionType SelectPrecision( precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true );

423

427static precisionType FindFastestPrecision( deviceType device=DEVICE_GPU, bool allowInt8=true );

428

432static std::vector<precisionType> DetectNativePrecisions( deviceType device=DEVICE_GPU );

433

437static bool DetectNativePrecision( const std::vector<precisionType>& nativeTypes, precisionType type );

438

442static bool DetectNativePrecision( precisionType precision, deviceType device=DEVICE_GPU );

443

447inline cudaStream_t GetStream() const { return mStream; }

448

452 cudaStream_t CreateStream( bool nonBlocking=true );

453

457void SetStream( cudaStream_t stream );

458

462inline const char* GetPrototxtPath() const { return mPrototxtPath.c_str(); }

463

467inline const char* GetModelPath() const { return mModelPath.c_str(); }

468

472inline const char* GetModelFilename() const { return mModelFile.c_str(); }

473

477inline modelType GetModelType() const { return mModelType; }

478

482inline bool IsModelType( modelType type ) const { return (mModelType == type); }

483

487inline uint32_t GetInputLayers() const { return mInputs.size(); }

488

492inline uint32_t GetOutputLayers() const { return mOutputs.size(); }

493

497inline Dims3 GetInputDims( uint32_t layer=0 ) const { return mInputs[layer].dims; }

498

502inline uint32_t GetInputWidth( uint32_t layer=0 ) const { return DIMS_W(mInputs[layer].dims); }

503

507inline uint32_t GetInputHeight( uint32_t layer=0 ) const { return DIMS_H(mInputs[layer].dims); }

508

512inline uint32_t GetInputSize( uint32_t layer=0 ) const { return mInputs[layer].size; }

513

517inline float* GetInputPtr( uint32_t layer=0 ) const { return mInputs[layer].CUDA; }

518

522inline Dims3 GetOutputDims( uint32_t layer=0 ) const { return mOutputs[layer].dims; }

523

527inline uint32_t GetOutputWidth( uint32_t layer=0 ) const { return DIMS_W(mOutputs[layer].dims); }

528

532inline uint32_t GetOutputHeight( uint32_t layer=0 ) const { return DIMS_H(mOutputs[layer].dims); }

533

537inline uint32_t GetOutputSize( uint32_t layer=0 ) const { return mOutputs[layer].size; }

538

542inline float* GetOutputPtr( uint32_t layer=0 ) const { return mOutputs[layer].CUDA; }

543

547inline float GetNetworkFPS() { return 1000.0f / GetNetworkTime(); }

548

552inline float GetNetworkTime() { return GetProfilerTime(PROFILER_NETWORK, PROFILER_CUDA); }

553

557inline const char* GetNetworkName() const { return mModelFile.c_str(); }

558

562inline float2 GetProfilerTime( profilerQuery query ) { PROFILER_QUERY(query); return mProfilerTimes[query]; }

563

567inline float GetProfilerTime( profilerQuery query, profilerDevice device ) { PROFILER_QUERY(query); return (device == PROFILER_CPU) ? mProfilerTimes[query].x : mProfilerTimes[query].y; }

568

572inline void PrintProfilerTimes()

573 {

574LogInfo("\n");

575LogInfo(LOG_TRT "------------------------------------------------\n");

576LogInfo(LOG_TRT "Timing Report %s\n", GetModelPath());

577LogInfo(LOG_TRT "------------------------------------------------\n");

578

579for( uint32_t n=0; n <= PROFILER_TOTAL; n++ )

580 {

581const profilerQuery query = (profilerQuery)n;

582

583if( PROFILER_QUERY(query) )

584LogInfo(LOG_TRT "%-12s CPU %9.5fms CUDA %9.5fms\n", profilerQueryToStr(query), mProfilerTimes[n].x, mProfilerTimes[n].y);

585 }

586

587LogInfo(LOG_TRT "------------------------------------------------\n\n");

588

589static bool first_run=true;

590

591if( first_run )

592 {

593LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"

594" to disable DVFS for more accurate profiling/timing measurements\n\n");

595

596 first_run = false;

597 }

598 }

599

600 protected:

601

605tensorNet();

606

614bool ProcessNetwork( bool sync=true );

615

626bool ProfileModel( const std::string& deployFile, const std::string& modelFile,

627const std::vector<std::string>& inputs, const std::vector<Dims3>& inputDims,

628const std::vector<std::string>& outputs, uint32_t maxBatchSize,

629precisionType precision, deviceType device, bool allowGPUFallback,

630 nvinfer1::IInt8Calibrator* calibrator, char** engineStream, size_t* engineSize );

631

635 #if NV_TENSORRT_MAJOR >= 8

636bool ConfigureBuilder( nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,

637 uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision,

638deviceType device, bool allowGPUFallback,

639 nvinfer1::IInt8Calibrator* calibrator );

640 #else

641bool ConfigureBuilder( nvinfer1::IBuilder* builder, uint32_t maxBatchSize,

642 uint32_t workspaceSize, precisionType precision,

643deviceType device, bool allowGPUFallback,

644 nvinfer1::IInt8Calibrator* calibrator );

645 #endif

646

650bool ValidateEngine( const char* model_path, const char* cache_path, const char* checksum_path );

651

655class Logger : public nvinfer1::ILogger

656 {

657public:

658void log( Severity severity, const char* msg ) NOEXCEPT override

659 {

660if( severity == Severity::kWARNING )

661 {

662LogWarning(LOG_TRT "%s\n", msg);

663 }

664else if( severity == Severity::kINFO )

665 {

666LogInfo(LOG_TRT "%s\n", msg);

667 }

668 #if NV_TENSORRT_MAJOR >= 6

669else if( severity == Severity::kVERBOSE )

670 {

671LogVerbose(LOG_TRT "%s\n", msg);

672 }

673 #endif

674else

675 {

676LogError(LOG_TRT "%s\n", msg);

677 }

678 }

679 } static gLogger;

680

684class Profiler : public nvinfer1::IProfiler

685 {

686public:

687Profiler() : timingAccumulator(0.0f) { }

688

689virtual void reportLayerTime(const char* layerName, float ms) NOEXCEPT

690 {

691LogVerbose(LOG_TRT "layer %s - %f ms\n", layerName, ms);

692timingAccumulator += ms;

693 }

694

695float timingAccumulator;

696 } gProfiler;

697

701inline void PROFILER_BEGIN( profilerQuery query )

702 {

703const uint32_t evt = query*2;

704const uint32_t flag = (1 << query);

705

706CUDA(cudaEventRecord(mEventsGPU[evt], mStream));

707timestamp(&mEventsCPU[evt]);

708

709mProfilerQueriesUsed |= flag;

710mProfilerQueriesDone &= ~flag;

711 }

712

716inline void PROFILER_END( profilerQuery query )

717 {

718const uint32_t evt = query*2+1;

719

720CUDA(cudaEventRecord(mEventsGPU[evt]));

721timestamp(&mEventsCPU[evt]);

722 timespec cpuTime;

723timeDiff(mEventsCPU[evt-1], mEventsCPU[evt], &cpuTime);

724mProfilerTimes[query].x = timeFloat(cpuTime);

725

726if( mEnableProfiler && query == PROFILER_NETWORK )

727 {

728LogVerbose(LOG_TRT "layer network time - %f ms\n", gProfiler.timingAccumulator);

729gProfiler.timingAccumulator = 0.0f;

730LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"

731" to disable DVFS for more accurate profiling/timing measurements\n");

732 }

733 }

734

738inline bool PROFILER_QUERY( profilerQuery query )

739 {

740const uint32_t flag = (1 << query);

741

742if( query == PROFILER_TOTAL )

743 {

744mProfilerTimes[PROFILER_TOTAL].x = 0.0f;

745mProfilerTimes[PROFILER_TOTAL].y = 0.0f;

746

747for( uint32_t n=0; n < PROFILER_TOTAL; n++ )

748 {

749if( PROFILER_QUERY((profilerQuery)n) )

750 {

751mProfilerTimes[PROFILER_TOTAL].x += mProfilerTimes[n].x;

752mProfilerTimes[PROFILER_TOTAL].y += mProfilerTimes[n].y;

753 }

754 }

755

756return true;

757 }

758else if( mProfilerQueriesUsed & flag )

759 {

760if( !(mProfilerQueriesDone & flag) )

761 {

762const uint32_t evt = query*2;

763float cuda_time = 0.0f;

764CUDA(cudaEventElapsedTime(&cuda_time, mEventsGPU[evt], mEventsGPU[evt+1]));

765mProfilerTimes[query].y = cuda_time;

766mProfilerQueriesDone |= flag;

767//mProfilerQueriesUsed &= ~flag;

768 }

769

770return true;

771 }

772

773return false;

774 }

775

776 protected:

777

778/* Member Variables */

779 std::string mPrototxtPath;

780 std::string mModelPath;

781 std::string mModelFile;

782 std::string mMeanPath;

783 std::string mCacheEnginePath;

784 std::string mCacheCalibrationPath;

785 std::string mChecksumPath;

786

787deviceTypemDevice;

788precisionType mPrecision;

789modelTypemModelType;

790 cudaStream_t mStream;

791 cudaEvent_t mEventsGPU[PROFILER_TOTAL * 2];

792 timespec mEventsCPU[PROFILER_TOTAL * 2];

793

794 nvinfer1::IRuntime* mInfer;

795 nvinfer1::ICudaEngine* mEngine;

796 nvinfer1::IExecutionContext* mContext;

797

798 float2 mProfilerTimes[PROFILER_TOTAL + 1];

799 uint32_t mProfilerQueriesUsed;

800 uint32_t mProfilerQueriesDone;

801 uint32_t mWorkspaceSize;

802 uint32_t mMaxBatchSize;

803boolmEnableProfiler;

804boolmEnableDebug;

805boolmAllowGPUFallback;

806void** mBindings;

807

808struct layerInfo

809 {

810 std::string name;

811Dims3 dims;

812 uint32_t size;

813 uint32_t binding;

814float* CPU;

815float* CUDA;

816 };

817

818 std::vector<layerInfo> mInputs;

819 std::vector<layerInfo> mOutputs;

820 };

821

822 #endif

tensorNet::DetectNativePrecisions

static std::vector< precisionType > DetectNativePrecisions(deviceType device=DEVICE_GPU)

Detect the precisions supported natively on a device.

DEVICE_DLA_0

@ DEVICE_DLA_0

Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)

Definition: tensorNet.h:133

tensorNet::Logger

Logger class for GIE info/warning/errors.

Definition: tensorNet.h:655

tensorNet::GetPrecision

precisionType GetPrecision() const

Retrieve the type of precision being used.

Definition: tensorNet.h:412

MODEL_UFF

@ MODEL_UFF

UFF.

Definition: tensorNet.h:160

tensorNet::DetectNativePrecision

static bool DetectNativePrecision(const std::vector< precisionType > &nativeTypes, precisionType type)

Detect if a particular precision is supported natively.

CUDA

#define CUDA(x)

Execute a CUDA call and print out any errors.

Definition: cudaUtility.h:41

tensorNet::ProcessNetwork

bool ProcessNetwork(bool sync=true)

Execute processing of the network.

MODEL_CUSTOM

@ MODEL_CUSTOM

Created directly with TensorRT API.

Definition: tensorNet.h:157

tensorNet::layerInfo::binding

uint32_t binding

Definition: tensorNet.h:813

LogInfo

#define LogInfo(format, args...)

Log a printf-style info message (Log::INFO)

Definition: logging.h:168

tensorNet::mAllowGPUFallback

bool mAllowGPUFallback

Definition: tensorNet.h:805

timeFloat

float timeFloat(const timespec &a)

Convert to 32-bit float (in milliseconds).

Definition: timespec.h:149

modelTypeFromStr

modelType modelTypeFromStr(const char *str)

Parse the model format from a string.

tensorNet::GetNetworkFPS

float GetNetworkFPS()

Retrieve the network frames per second (FPS).

Definition: tensorNet.h:547

tensorNet::layerInfo::size

uint32_t size

Definition: tensorNet.h:812

tensorNet::mModelFile

std::string mModelFile

Definition: tensorNet.h:781

timestamp

void timestamp(timespec *timestampOut)

Retrieve a timestamp of the current system time.

Definition: timespec.h:37

PROFILER_CPU

@ PROFILER_CPU

CPU walltime.

Definition: tensorNet.h:208

tensorNet::LoadNetwork

bool LoadNetwork(const char *prototxt, const char *model, const char *mean=NULL, const char *input_blob="data", const char *output_blob="prob", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)

Load a new network instance.

TYPE_FP16

@ TYPE_FP16

16-bit floating-point half precision (FP16)

Definition: tensorNet.h:107

deviceTypeToStr

const char * deviceTypeToStr(deviceType type)

Stringize function that returns deviceType in text.

tensorNet::GetPrototxtPath

const char * GetPrototxtPath() const

Retrieve the path to the network prototxt file.

Definition: tensorNet.h:462

tensorNet::mEnableProfiler

bool mEnableProfiler

Definition: tensorNet.h:803

MODEL_ENGINE

@ MODEL_ENGINE

TensorRT engine/plan.

Definition: tensorNet.h:161

tensorNet::GetOutputSize

uint32_t GetOutputSize(uint32_t layer=0) const

Retrieve the size (in bytes) of network output layer.

Definition: tensorNet.h:537

tensorNet::IsModelType

bool IsModelType(modelType type) const

Return true if the model is of the specified format.

Definition: tensorNet.h:482

cudaUtility.h

tensorNet::ValidateEngine

bool ValidateEngine(const char *model_path, const char *cache_path, const char *checksum_path)

Validate that the model already has a built TensorRT engine that exists and doesn't need updating.

DEVICE_DLA

@ DEVICE_DLA

Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)

Definition: tensorNet.h:132

tensorNet::EnableLayerProfiler

void EnableLayerProfiler()

Manually enable layer profiling times.

profilerDevice

profilerDevice

Profiler device.

Definition: tensorNet.h:206

TYPE_INT8

@ TYPE_INT8

8-bit integer precision (INT8)

Definition: tensorNet.h:108

tensorNet::tensorNet

tensorNet()

Constructor.

tensorNet::Profiler::reportLayerTime

virtual void reportLayerTime(const char *layerName, float ms) NOEXCEPT

Definition: tensorNet.h:689

precisionTypeToStr

const char * precisionTypeToStr(precisionType type)

Stringize function that returns precisionType in text.

tensorNet::IsPrecision

bool IsPrecision(precisionType type) const

Check if a particular precision is being used.

Definition: tensorNet.h:417

tensorNet::mWorkspaceSize

uint32_t mWorkspaceSize

Definition: tensorNet.h:801

tensorNet::mMeanPath

std::string mMeanPath

Definition: tensorNet.h:782

LOG_TRT

#define LOG_TRT

Prefix used for tagging printed log output from TensorRT.

Definition: tensorNet.h:94

PROFILER_VISUALIZE

@ PROFILER_VISUALIZE

Definition: tensorNet.h:192

tensorNet::gProfiler

tensorNet::Profiler gProfiler

tensorNet::Logger::log

void log(Severity severity, const char *msg) NOEXCEPT override

Definition: tensorNet.h:658

tensorNet::mProfilerTimes

float2 mProfilerTimes[PROFILER_TOTAL+1]

Definition: tensorNet.h:798

profilerQueryToStr

const char * profilerQueryToStr(profilerQuery query)

Stringize function that returns profilerQuery in text.

tensorNet::layerInfo::CPU

float * CPU

Definition: tensorNet.h:814

profilerQuery

profilerQuery

Profiling queries.

Definition: tensorNet.h:187

tensorNet::LoadEngine

bool LoadEngine(const char *engine_filename, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)

Load a network instance from a serialized engine plan file.

tensorNet::GetNetworkName

const char * GetNetworkName() const

Retrieve the network name (it's filename).

Definition: tensorNet.h:557

commandLine.h

DEVICE_GPU

@ DEVICE_GPU

GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()

Definition: tensorNet.h:131

modelType

modelType

Enumeration indicating the format of the model that's imported in TensorRT (either caffe,...

Definition: tensorNet.h:155

PROFILER_CUDA

@ PROFILER_CUDA

CUDA kernel time.

Definition: tensorNet.h:209

tensorNet::Profiler::timingAccumulator

float timingAccumulator

Definition: tensorNet.h:695

Dims3

nvinfer1::Dims3 Dims3

Definition: tensorNet.h:58

tensorNet::FindFastestPrecision

static precisionType FindFastestPrecision(deviceType device=DEVICE_GPU, bool allowInt8=true)

Determine the fastest native precision on a device.

tensorNet::PrintProfilerTimes

void PrintProfilerTimes()

Print the profiler times (in millseconds).

Definition: tensorNet.h:572

classID

uint8_t classID

The class ID of the point.

Definition: cudaPointCloud.h:17

tensorNet::GetInputWidth

uint32_t GetInputWidth(uint32_t layer=0) const

Retrieve the width of network input layer.

Definition: tensorNet.h:502

tensorNet::GetOutputHeight

uint32_t GetOutputHeight(uint32_t layer=0) const

Retrieve the height of network output layer.

Definition: tensorNet.h:532

deviceType

deviceType

Enumeration for indicating the desired device that the network should run on, if available in hardwar...

Definition: tensorNet.h:129

tensorNet::mStream

cudaStream_t mStream

Definition: tensorNet.h:790

tensorNet::GetModelType

modelType GetModelType() const

Retrieve the format of the network model.

Definition: tensorNet.h:477

tensorNet::GetOutputLayers

uint32_t GetOutputLayers() const

Retrieve the number of output layers to the network.

Definition: tensorNet.h:492

tensorNet::EnableDebug

void EnableDebug()

Manually enable debug messages and synchronization.

tensorNet::mBindings

void ** mBindings

Definition: tensorNet.h:806

nvinfer1

Definition: tensorNet.h:27

NOEXCEPT

#define NOEXCEPT

Definition: tensorNet.h:73

tensorNet::mMaxBatchSize

uint32_t mMaxBatchSize

Definition: tensorNet.h:802

PROFILER_POSTPROCESS

@ PROFILER_POSTPROCESS

Definition: tensorNet.h:191

tensorNet::GetInputHeight

uint32_t GetInputHeight(uint32_t layer=0) const

Retrieve the height of network input layer.

Definition: tensorNet.h:507

tensorNet::LoadClassLabels

static bool LoadClassLabels(const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)

Load class descriptions from a label file.

tensorNet::ConfigureBuilder

bool ConfigureBuilder(nvinfer1::IBuilder *builder, uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator)

Configure builder options.

precisionTypeFromStr

precisionType precisionTypeFromStr(const char *str)

Parse the precision type from a string.

tensorNet::mCacheEnginePath

std::string mCacheEnginePath

Definition: tensorNet.h:783

tensorNet::GetProfilerTime

float2 GetProfilerTime(profilerQuery query)

Retrieve the profiler runtime (in milliseconds).

Definition: tensorNet.h:562

tensorNet::mModelType

modelType mModelType

Definition: tensorNet.h:789

DIMS_H

#define DIMS_H(x)

Definition: tensorNet.h:61

TYPE_FASTEST

@ TYPE_FASTEST

The fastest detected precision should be use (i.e.

Definition: tensorNet.h:105

tensorNet::GetOutputPtr

float * GetOutputPtr(uint32_t layer=0) const

Get the CUDA pointer to the output memory.

Definition: tensorNet.h:542

tensorNet::Profiler::Profiler

Profiler()

Definition: tensorNet.h:687

tensorNet::mEnableDebug

bool mEnableDebug

Definition: tensorNet.h:804

tensorNet::GetInputDims

Dims3 GetInputDims(uint32_t layer=0) const

Retrieve the dimensions of network input layer.

Definition: tensorNet.h:497

NUM_DEVICES

@ NUM_DEVICES

Number of device types defined.

Definition: tensorNet.h:135

precisionType

precisionType

Enumeration for indicating the desired precision that the network should run in, if available in hard...

Definition: tensorNet.h:102

deviceTypeFromStr

deviceType deviceTypeFromStr(const char *str)

Parse the device type from a string.

tensorNet::SelectPrecision

static precisionType SelectPrecision(precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true)

Resolve a desired precision to a specific one that's available.

tensorNet::GetDevice

deviceType GetDevice() const

Retrieve the device being used for execution.

Definition: tensorNet.h:407

LogWarning

#define LogWarning(format, args...)

Log a printf-style warning message (Log::WARNING)

Definition: logging.h:156

modelTypeFromPath

modelType modelTypeFromPath(const char *path)

Parse the model format from a file path.

DEVICE_DLA_1

@ DEVICE_DLA_1

Deep Learning Accelerator (DLA) Core 1 (only on Jetson Xavier)

Definition: tensorNet.h:134

tensorNet::AllowGPUFallback

bool AllowGPUFallback() const

Return true if GPU fallback is enabled.

Definition: tensorNet.h:402

tensorNet::SetStream

void SetStream(cudaStream_t stream)

Set the stream that the device is operating on.

tensorNet

Abstract class for loading a tensor network with TensorRT.

Definition: tensorNet.h:218

tensorNet::GetModelPath

const char * GetModelPath() const

Retrieve the full path to model file, including the filename.

Definition: tensorNet.h:467

tensorNet::mInputs

std::vector< layerInfo > mInputs

Definition: tensorNet.h:818

tensorNet::mEngine

nvinfer1::ICudaEngine * mEngine

Definition: tensorNet.h:795

tensorNet::GetOutputDims

Dims3 GetOutputDims(uint32_t layer=0) const

Retrieve the dimensions of network output layer.

Definition: tensorNet.h:522

PROFILER_NETWORK

@ PROFILER_NETWORK

Definition: tensorNet.h:190

tensorNet::mEventsGPU

cudaEvent_t mEventsGPU[PROFILER_TOTAL *2]

Definition: tensorNet.h:791

DIMS_W

#define DIMS_W(x)

Definition: tensorNet.h:62

TYPE_FP32

@ TYPE_FP32

32-bit floating-point precision (FP32)

Definition: tensorNet.h:106

tensorNet::layerInfo::dims

Dims3 dims

Definition: tensorNet.h:811

MODEL_ONNX

@ MODEL_ONNX

ONNX.

Definition: tensorNet.h:159

modelTypeToStr

const char * modelTypeToStr(modelType type)

Stringize function that returns modelType in text.

tensorNet::PROFILER_END

void PROFILER_END(profilerQuery query)

End a profiling query, after the network is run.

Definition: tensorNet.h:716

tensorNet::layerInfo::CUDA

float * CUDA

Definition: tensorNet.h:815

tensorNet::mContext

nvinfer1::IExecutionContext * mContext

Definition: tensorNet.h:796

LogVerbose

#define LogVerbose(format, args...)

Log a printf-style verbose message (Log::VERBOSE)

Definition: logging.h:174

NUM_PRECISIONS

@ NUM_PRECISIONS

Number of precision types defined.

Definition: tensorNet.h:109

tensorNet::mPrecision

precisionType mPrecision

Definition: tensorNet.h:788

tensorNet::GetInputSize

uint32_t GetInputSize(uint32_t layer=0) const

Retrieve the size (in bytes) of network input layer.

Definition: tensorNet.h:512

tensorNet::GetProfilerTime

float GetProfilerTime(profilerQuery query, profilerDevice device)

Retrieve the profiler runtime (in milliseconds).

Definition: tensorNet.h:567

tensorNet::GetInputPtr

float * GetInputPtr(uint32_t layer=0) const

Get the CUDA pointer to the input layer's memory.

Definition: tensorNet.h:517

tensorNet::mChecksumPath

std::string mChecksumPath

Definition: tensorNet.h:785

tensorNet::GetModelFilename

const char * GetModelFilename() const

Retrieve the filename of the file, excluding the directory.

Definition: tensorNet.h:472

tensorNet::mDevice

deviceType mDevice

Definition: tensorNet.h:787

logging.h

PROFILER_TOTAL

@ PROFILER_TOTAL

Definition: tensorNet.h:193

tensorNet::layerInfo::name

std::string name

Definition: tensorNet.h:810

tensorNet::GetOutputWidth

uint32_t GetOutputWidth(uint32_t layer=0) const

Retrieve the width of network output layer.

Definition: tensorNet.h:527

MODEL_CAFFE

@ MODEL_CAFFE

caffemodel

Definition: tensorNet.h:158

DEFAULT_MAX_BATCH_SIZE

#define DEFAULT_MAX_BATCH_SIZE

Default maximum batch size.

Definition: tensorNet.h:88

tensorNet::GetStream

cudaStream_t GetStream() const

Retrieve the stream that the device is operating on.

Definition: tensorNet.h:447

tensorNet::mEventsCPU

timespec mEventsCPU[PROFILER_TOTAL *2]

Definition: tensorNet.h:792

tensorNet::mProfilerQueriesUsed

uint32_t mProfilerQueriesUsed

Definition: tensorNet.h:799

PROFILER_PREPROCESS

@ PROFILER_PREPROCESS

Definition: tensorNet.h:189

tensorNet::GetInputLayers

uint32_t GetInputLayers() const

Retrieve the number of input layers to the network.

Definition: tensorNet.h:487

tensorNet::~tensorNet

virtual ~tensorNet()

Destory.

tensorNet::GenerateColor

static float4 GenerateColor(uint32_t classID, float alpha=255.0f)

Procedurally generate a color for a given class index with the specified alpha value.

LogError

#define LogError(format, args...)

Log a printf-style error message (Log::ERROR)

Definition: logging.h:150

tensorNet::ProfileModel

bool ProfileModel(const std::string &deployFile, const std::string &modelFile, const std::vector< std::string > &inputs, const std::vector< Dims3 > &inputDims, const std::vector< std::string > &outputs, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator, char **engineStream, size_t *engineSize)

Create and output an optimized network model.

tensorNet::PROFILER_QUERY

bool PROFILER_QUERY(profilerQuery query)

Query the CUDA part of a profiler query.

Definition: tensorNet.h:738

tensorNet::mPrototxtPath

std::string mPrototxtPath

Definition: tensorNet.h:779

tensorNet::LoadClassColors

static bool LoadClassColors(const char *filename, float4 *colors, int expectedClasses, float defaultAlpha=255.0f)

Load class colors from a text file.

tensorNet::mCacheCalibrationPath

std::string mCacheCalibrationPath

Definition: tensorNet.h:784

tensorNet::gLogger

tensorNet::Logger gLogger

tensorNet::CreateStream

cudaStream_t CreateStream(bool nonBlocking=true)

Create and use a new stream for execution.

tensorNet::mInfer

nvinfer1::IRuntime * mInfer

Definition: tensorNet.h:794

tensorNet::PROFILER_BEGIN

void PROFILER_BEGIN(profilerQuery query)

Begin a profiling query, before network is run.

Definition: tensorNet.h:701

tensorNet::Profiler

Profiler interface for measuring layer timings.

Definition: tensorNet.h:684

tensorNet::GetNetworkTime

float GetNetworkTime()

Retrieve the network runtime (in milliseconds).

Definition: tensorNet.h:552

timespec.h

tensorNet::mProfilerQueriesDone

uint32_t mProfilerQueriesDone

Definition: tensorNet.h:800

imageFormat.h

tensorNet::mOutputs

std::vector< layerInfo > mOutputs

Definition: tensorNet.h:819

tensorNet::layerInfo

Definition: tensorNet.h:808

tensorNet::mModelPath

std::string mModelPath

Definition: tensorNet.h:780

alpha

__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)

Definition: cudaVector.h:98

timeDiff

void timeDiff(const timespec &start, const timespec &end, timespec *result)

Find the difference between two timestamps.

Definition: timespec.h:73

TYPE_DISABLED

@ TYPE_DISABLED

Unknown, unspecified, or disabled type.

Definition: tensorNet.h:104