docs/html/tensorNet_8h_source.html
| | Jetson Inference
DNN Vision Library |
tensorNet.h
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #ifndef __TENSOR_NET_H__
24 #define __TENSOR_NET_H__
25
26 // forward declaration of IInt8Calibrator
27 namespace nvinfer1 { class IInt8Calibrator; }
28
29 // includes
30 #include <NvInfer.h>
31
32 #include <jetson-utils/cudaUtility.h>
33 #include <jetson-utils/commandLine.h>
34 #include <jetson-utils/imageFormat.h>
35 #include <jetson-utils/timespec.h>
36 #include <jetson-utils/logging.h>
37
38 #include <vector>
39 #include <sstream>
40 #include <math.h>
41
42
43 #if NV_TENSORRT_MAJOR >= 6
44 typedef nvinfer1::Dims3 Dims3;
45
46 #define DIMS_C(x) x.d[0]
47 #define DIMS_H(x) x.d[1]
48 #define DIMS_W(x) x.d[2]
49
50 #elif NV_TENSORRT_MAJOR >= 2
51 typedef nvinfer1::DimsCHW Dims3;
52
53 #define DIMS_C(x) x.d[0]
54 #define DIMS_H(x) x.d[1]
55 #define DIMS_W(x) x.d[2]
56
57 #else
58 typedef nvinfer1::Dims3 Dims3;
59
60 #define DIMS_C(x) x.c
61 #define DIMS_H(x) x.h
62 #define DIMS_W(x) x.w
63
64 #ifndef NV_TENSORRT_MAJOR
65 #define NV_TENSORRT_MAJOR 1
66 #define NV_TENSORRT_MINOR 0
67 #endif
68 #endif
69
70 #if NV_TENSORRT_MAJOR >= 8
71 #define NOEXCEPT noexcept
72 #else
73 #define NOEXCEPT
74 #endif
75
76
82 #define TENSORRT_VERSION_CHECK(major, minor, patch) (NV_TENSORRT_MAJOR > major || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR > minor) || (NV_TENSORRT_MAJOR == major && NV_TENSORRT_MINOR == minor && NV_TENSORRT_PATCH >= patch))
83
88 #define DEFAULT_MAX_BATCH_SIZE 1
89
94 #define LOG_TRT "[TRT] "
95
96
102 enum precisionType
103 {
104TYPE_DISABLED = 0,
110 };
111
116 const char* precisionTypeToStr( precisionType type );
117
122 precisionType precisionTypeFromStr( const char* str );
123
129 enum deviceType
130 {
131DEVICE_GPU = 0,
136 };
137
142 const char* deviceTypeToStr( deviceType type );
143
148 deviceType deviceTypeFromStr( const char* str );
149
156 {
157MODEL_CUSTOM = 0,
162 };
163
168 const char* modelTypeToStr( modelType type );
169
174 modelType modelTypeFromStr( const char* str );
175
180 modelType modelTypeFromPath( const char* path );
181
187 enum profilerQuery
188 {
194 };
195
200 const char* profilerQueryToStr( profilerQuery query );
201
206 enum profilerDevice
207 {
208PROFILER_CPU = 0,
210 };
211
212
219 {
220 public:
224virtual ~tensorNet();
225
235bool LoadNetwork( const char* prototxt, const char* model, const char* mean=NULL,
236const char* input_blob="data", const char* output_blob="prob",
237 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
238deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
239 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
240
250bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
251const char* input_blob, const std::vector<std::string>& output_blobs,
252 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
253deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
254 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
255
265bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
266const std::vector<std::string>& input_blobs,
267const std::vector<std::string>& output_blobs,
268 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
269precisionType precision=TYPE_FASTEST,
270deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
271 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
272
283bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
284const char* input_blob, const Dims3& input_dims,
285const std::vector<std::string>& output_blobs,
286 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
287precisionType precision=TYPE_FASTEST,
288deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
289 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
290
301bool LoadNetwork( const char* prototxt, const char* model, const char* mean,
302const std::vector<std::string>& input_blobs,
303const std::vector<Dims3>& input_dims,
304const std::vector<std::string>& output_blobs,
305 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
306precisionType precision=TYPE_FASTEST,
307deviceType device=DEVICE_GPU, bool allowGPUFallback=true,
308 nvinfer1::IInt8Calibrator* calibrator=NULL, cudaStream_t stream=NULL );
309
316bool LoadEngine( const char* engine_filename,
317const std::vector<std::string>& input_blobs,
318const std::vector<std::string>& output_blobs,
319 nvinfer1::IPluginFactory* pluginFactory=NULL,
320deviceType device=DEVICE_GPU,
321 cudaStream_t stream=NULL );
322
330bool LoadEngine( char* engine_stream, size_t engine_size,
331const std::vector<std::string>& input_blobs,
332const std::vector<std::string>& output_blobs,
333 nvinfer1::IPluginFactory* pluginFactory=NULL,
334deviceType device=DEVICE_GPU,
335 cudaStream_t stream=NULL );
336
344bool LoadEngine( nvinfer1::ICudaEngine* engine,
345const std::vector<std::string>& input_blobs,
346const std::vector<std::string>& output_blobs,
347deviceType device=DEVICE_GPU,
348 cudaStream_t stream=NULL );
349
353bool LoadEngine( const char* filename, char** stream, size_t* size );
354
360static bool LoadClassLabels( const char* filename, std::vector<std::string>& descriptions, int expectedClasses=-1 );
361
367static bool LoadClassLabels( const char* filename, std::vector<std::string>& descriptions, std::vector<std::string>& synsets, int expectedClasses=-1 );
368
374static bool LoadClassColors( const char* filename, float4* colors, int expectedClasses, float defaultAlpha=255.0f );
375
381static bool LoadClassColors( const char* filename, float4** colors, int expectedClasses, float defaultAlpha=255.0f );
382
387static float4 GenerateColor( uint32_t classID, float alpha=255.0f );
388
392void EnableLayerProfiler();
393
397void EnableDebug();
398
402inline bool AllowGPUFallback() const { return mAllowGPUFallback; }
403
407inline deviceType GetDevice() const { return mDevice; }
408
412inline precisionType GetPrecision() const { return mPrecision; }
413
417inline bool IsPrecision( precisionType type ) const { return (mPrecision == type); }
418
422static precisionType SelectPrecision( precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true );
423
427static precisionType FindFastestPrecision( deviceType device=DEVICE_GPU, bool allowInt8=true );
428
432static std::vector<precisionType> DetectNativePrecisions( deviceType device=DEVICE_GPU );
433
437static bool DetectNativePrecision( const std::vector<precisionType>& nativeTypes, precisionType type );
438
442static bool DetectNativePrecision( precisionType precision, deviceType device=DEVICE_GPU );
443
447inline cudaStream_t GetStream() const { return mStream; }
448
452 cudaStream_t CreateStream( bool nonBlocking=true );
453
457void SetStream( cudaStream_t stream );
458
462inline const char* GetPrototxtPath() const { return mPrototxtPath.c_str(); }
463
467inline const char* GetModelPath() const { return mModelPath.c_str(); }
468
472inline const char* GetModelFilename() const { return mModelFile.c_str(); }
473
477inline modelType GetModelType() const { return mModelType; }
478
482inline bool IsModelType( modelType type ) const { return (mModelType == type); }
483
487inline uint32_t GetInputLayers() const { return mInputs.size(); }
488
492inline uint32_t GetOutputLayers() const { return mOutputs.size(); }
493
497inline Dims3 GetInputDims( uint32_t layer=0 ) const { return mInputs[layer].dims; }
498
502inline uint32_t GetInputWidth( uint32_t layer=0 ) const { return DIMS_W(mInputs[layer].dims); }
503
507inline uint32_t GetInputHeight( uint32_t layer=0 ) const { return DIMS_H(mInputs[layer].dims); }
508
512inline uint32_t GetInputSize( uint32_t layer=0 ) const { return mInputs[layer].size; }
513
517inline float* GetInputPtr( uint32_t layer=0 ) const { return mInputs[layer].CUDA; }
518
522inline Dims3 GetOutputDims( uint32_t layer=0 ) const { return mOutputs[layer].dims; }
523
527inline uint32_t GetOutputWidth( uint32_t layer=0 ) const { return DIMS_W(mOutputs[layer].dims); }
528
532inline uint32_t GetOutputHeight( uint32_t layer=0 ) const { return DIMS_H(mOutputs[layer].dims); }
533
537inline uint32_t GetOutputSize( uint32_t layer=0 ) const { return mOutputs[layer].size; }
538
542inline float* GetOutputPtr( uint32_t layer=0 ) const { return mOutputs[layer].CUDA; }
543
547inline float GetNetworkFPS() { return 1000.0f / GetNetworkTime(); }
548
552inline float GetNetworkTime() { return GetProfilerTime(PROFILER_NETWORK, PROFILER_CUDA); }
553
557inline const char* GetNetworkName() const { return mModelFile.c_str(); }
558
562inline float2 GetProfilerTime( profilerQuery query ) { PROFILER_QUERY(query); return mProfilerTimes[query]; }
563
567inline float GetProfilerTime( profilerQuery query, profilerDevice device ) { PROFILER_QUERY(query); return (device == PROFILER_CPU) ? mProfilerTimes[query].x : mProfilerTimes[query].y; }
568
572inline void PrintProfilerTimes()
573 {
574LogInfo("\n");
575LogInfo(LOG_TRT "------------------------------------------------\n");
576LogInfo(LOG_TRT "Timing Report %s\n", GetModelPath());
577LogInfo(LOG_TRT "------------------------------------------------\n");
578
579for( uint32_t n=0; n <= PROFILER_TOTAL; n++ )
580 {
581const profilerQuery query = (profilerQuery)n;
582
583if( PROFILER_QUERY(query) )
584LogInfo(LOG_TRT "%-12s CPU %9.5fms CUDA %9.5fms\n", profilerQueryToStr(query), mProfilerTimes[n].x, mProfilerTimes[n].y);
585 }
586
587LogInfo(LOG_TRT "------------------------------------------------\n\n");
588
589static bool first_run=true;
590
591if( first_run )
592 {
593LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
594" to disable DVFS for more accurate profiling/timing measurements\n\n");
595
596 first_run = false;
597 }
598 }
599
600 protected:
601
605tensorNet();
606
614bool ProcessNetwork( bool sync=true );
615
626bool ProfileModel( const std::string& deployFile, const std::string& modelFile,
627const std::vector<std::string>& inputs, const std::vector<Dims3>& inputDims,
628const std::vector<std::string>& outputs, uint32_t maxBatchSize,
629precisionType precision, deviceType device, bool allowGPUFallback,
630 nvinfer1::IInt8Calibrator* calibrator, char** engineStream, size_t* engineSize );
631
635 #if NV_TENSORRT_MAJOR >= 8
636bool ConfigureBuilder( nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
637 uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision,
638deviceType device, bool allowGPUFallback,
639 nvinfer1::IInt8Calibrator* calibrator );
640 #else
641bool ConfigureBuilder( nvinfer1::IBuilder* builder, uint32_t maxBatchSize,
642 uint32_t workspaceSize, precisionType precision,
643deviceType device, bool allowGPUFallback,
644 nvinfer1::IInt8Calibrator* calibrator );
645 #endif
646
650bool ValidateEngine( const char* model_path, const char* cache_path, const char* checksum_path );
651
655class Logger : public nvinfer1::ILogger
656 {
657public:
658void log( Severity severity, const char* msg ) NOEXCEPT override
659 {
660if( severity == Severity::kWARNING )
661 {
662LogWarning(LOG_TRT "%s\n", msg);
663 }
664else if( severity == Severity::kINFO )
665 {
666LogInfo(LOG_TRT "%s\n", msg);
667 }
668 #if NV_TENSORRT_MAJOR >= 6
669else if( severity == Severity::kVERBOSE )
670 {
671LogVerbose(LOG_TRT "%s\n", msg);
672 }
673 #endif
674else
675 {
676LogError(LOG_TRT "%s\n", msg);
677 }
678 }
679 } static gLogger;
680
684class Profiler : public nvinfer1::IProfiler
685 {
686public:
687Profiler() : timingAccumulator(0.0f) { }
688
689virtual void reportLayerTime(const char* layerName, float ms) NOEXCEPT
690 {
691LogVerbose(LOG_TRT "layer %s - %f ms\n", layerName, ms);
692timingAccumulator += ms;
693 }
694
695float timingAccumulator;
696 } gProfiler;
697
701inline void PROFILER_BEGIN( profilerQuery query )
702 {
703const uint32_t evt = query*2;
704const uint32_t flag = (1 << query);
705
706CUDA(cudaEventRecord(mEventsGPU[evt], mStream));
707timestamp(&mEventsCPU[evt]);
708
709mProfilerQueriesUsed |= flag;
710mProfilerQueriesDone &= ~flag;
711 }
712
716inline void PROFILER_END( profilerQuery query )
717 {
718const uint32_t evt = query*2+1;
719
720CUDA(cudaEventRecord(mEventsGPU[evt]));
721timestamp(&mEventsCPU[evt]);
722 timespec cpuTime;
723timeDiff(mEventsCPU[evt-1], mEventsCPU[evt], &cpuTime);
724mProfilerTimes[query].x = timeFloat(cpuTime);
725
726if( mEnableProfiler && query == PROFILER_NETWORK )
727 {
728LogVerbose(LOG_TRT "layer network time - %f ms\n", gProfiler.timingAccumulator);
729gProfiler.timingAccumulator = 0.0f;
730LogWarning(LOG_TRT "note -- when processing a single image, run 'sudo jetson_clocks' before\n"
731" to disable DVFS for more accurate profiling/timing measurements\n");
732 }
733 }
734
738inline bool PROFILER_QUERY( profilerQuery query )
739 {
740const uint32_t flag = (1 << query);
741
742if( query == PROFILER_TOTAL )
743 {
744mProfilerTimes[PROFILER_TOTAL].x = 0.0f;
745mProfilerTimes[PROFILER_TOTAL].y = 0.0f;
746
747for( uint32_t n=0; n < PROFILER_TOTAL; n++ )
748 {
749if( PROFILER_QUERY((profilerQuery)n) )
750 {
751mProfilerTimes[PROFILER_TOTAL].x += mProfilerTimes[n].x;
752mProfilerTimes[PROFILER_TOTAL].y += mProfilerTimes[n].y;
753 }
754 }
755
756return true;
757 }
758else if( mProfilerQueriesUsed & flag )
759 {
760if( !(mProfilerQueriesDone & flag) )
761 {
762const uint32_t evt = query*2;
763float cuda_time = 0.0f;
764CUDA(cudaEventElapsedTime(&cuda_time, mEventsGPU[evt], mEventsGPU[evt+1]));
765mProfilerTimes[query].y = cuda_time;
766mProfilerQueriesDone |= flag;
767//mProfilerQueriesUsed &= ~flag;
768 }
769
770return true;
771 }
772
773return false;
774 }
775
776 protected:
777
778/* Member Variables */
779 std::string mPrototxtPath;
780 std::string mModelPath;
781 std::string mModelFile;
783 std::string mCacheEnginePath;
784 std::string mCacheCalibrationPath;
785 std::string mChecksumPath;
786
791 cudaEvent_t mEventsGPU[PROFILER_TOTAL * 2];
792 timespec mEventsCPU[PROFILER_TOTAL * 2];
793
794 nvinfer1::IRuntime* mInfer;
795 nvinfer1::ICudaEngine* mEngine;
796 nvinfer1::IExecutionContext* mContext;
797
798 float2 mProfilerTimes[PROFILER_TOTAL + 1];
799 uint32_t mProfilerQueriesUsed;
800 uint32_t mProfilerQueriesDone;
801 uint32_t mWorkspaceSize;
802 uint32_t mMaxBatchSize;
803boolmEnableProfiler;
804boolmEnableDebug;
805boolmAllowGPUFallback;
807
809 {
816 };
817
818 std::vector<layerInfo> mInputs;
819 std::vector<layerInfo> mOutputs;
820 };
821
822 #endif
tensorNet::DetectNativePrecisions
static std::vector< precisionType > DetectNativePrecisions(deviceType device=DEVICE_GPU)
Detect the precisions supported natively on a device.
@ DEVICE_DLA_0
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:133
Logger class for GIE info/warning/errors.
Definition: tensorNet.h:655
precisionType GetPrecision() const
Retrieve the type of precision being used.
Definition: tensorNet.h:412
@ MODEL_UFF
UFF.
Definition: tensorNet.h:160
tensorNet::DetectNativePrecision
static bool DetectNativePrecision(const std::vector< precisionType > &nativeTypes, precisionType type)
Detect if a particular precision is supported natively.
#define CUDA(x)
Execute a CUDA call and print out any errors.
Definition: cudaUtility.h:41
bool ProcessNetwork(bool sync=true)
Execute processing of the network.
@ MODEL_CUSTOM
Created directly with TensorRT API.
Definition: tensorNet.h:157
uint32_t binding
Definition: tensorNet.h:813
#define LogInfo(format, args...)
Log a printf-style info message (Log::INFO)
Definition: logging.h:168
bool mAllowGPUFallback
Definition: tensorNet.h:805
float timeFloat(const timespec &a)
Convert to 32-bit float (in milliseconds).
Definition: timespec.h:149
modelType modelTypeFromStr(const char *str)
Parse the model format from a string.
float GetNetworkFPS()
Retrieve the network frames per second (FPS).
Definition: tensorNet.h:547
uint32_t size
Definition: tensorNet.h:812
std::string mModelFile
Definition: tensorNet.h:781
void timestamp(timespec *timestampOut)
Retrieve a timestamp of the current system time.
Definition: timespec.h:37
@ PROFILER_CPU
CPU walltime.
Definition: tensorNet.h:208
bool LoadNetwork(const char *prototxt, const char *model, const char *mean=NULL, const char *input_blob="data", const char *output_blob="prob", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
Load a new network instance.
@ TYPE_FP16
16-bit floating-point half precision (FP16)
Definition: tensorNet.h:107
const char * deviceTypeToStr(deviceType type)
Stringize function that returns deviceType in text.
const char * GetPrototxtPath() const
Retrieve the path to the network prototxt file.
Definition: tensorNet.h:462
bool mEnableProfiler
Definition: tensorNet.h:803
@ MODEL_ENGINE
TensorRT engine/plan.
Definition: tensorNet.h:161
uint32_t GetOutputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network output layer.
Definition: tensorNet.h:537
bool IsModelType(modelType type) const
Return true if the model is of the specified format.
Definition: tensorNet.h:482
bool ValidateEngine(const char *model_path, const char *cache_path, const char *checksum_path)
Validate that the model already has a built TensorRT engine that exists and doesn't need updating.
@ DEVICE_DLA
Deep Learning Accelerator (DLA) Core 0 (only on Jetson Xavier)
Definition: tensorNet.h:132
tensorNet::EnableLayerProfiler
void EnableLayerProfiler()
Manually enable layer profiling times.
profilerDevice
Profiler device.
Definition: tensorNet.h:206
@ TYPE_INT8
8-bit integer precision (INT8)
Definition: tensorNet.h:108
tensorNet()
Constructor.
tensorNet::Profiler::reportLayerTime
virtual void reportLayerTime(const char *layerName, float ms) NOEXCEPT
Definition: tensorNet.h:689
const char * precisionTypeToStr(precisionType type)
Stringize function that returns precisionType in text.
bool IsPrecision(precisionType type) const
Check if a particular precision is being used.
Definition: tensorNet.h:417
uint32_t mWorkspaceSize
Definition: tensorNet.h:801
std::string mMeanPath
Definition: tensorNet.h:782
#define LOG_TRT
Prefix used for tagging printed log output from TensorRT.
Definition: tensorNet.h:94
@ PROFILER_VISUALIZE
Definition: tensorNet.h:192
tensorNet::Profiler gProfiler
void log(Severity severity, const char *msg) NOEXCEPT override
Definition: tensorNet.h:658
float2 mProfilerTimes[PROFILER_TOTAL+1]
Definition: tensorNet.h:798
const char * profilerQueryToStr(profilerQuery query)
Stringize function that returns profilerQuery in text.
float * CPU
Definition: tensorNet.h:814
profilerQuery
Profiling queries.
Definition: tensorNet.h:187
bool LoadEngine(const char *engine_filename, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
Load a network instance from a serialized engine plan file.
const char * GetNetworkName() const
Retrieve the network name (it's filename).
Definition: tensorNet.h:557
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
modelType
Enumeration indicating the format of the model that's imported in TensorRT (either caffe,...
Definition: tensorNet.h:155
@ PROFILER_CUDA
CUDA kernel time.
Definition: tensorNet.h:209
tensorNet::Profiler::timingAccumulator
float timingAccumulator
Definition: tensorNet.h:695
nvinfer1::Dims3 Dims3
Definition: tensorNet.h:58
tensorNet::FindFastestPrecision
static precisionType FindFastestPrecision(deviceType device=DEVICE_GPU, bool allowInt8=true)
Determine the fastest native precision on a device.
void PrintProfilerTimes()
Print the profiler times (in millseconds).
Definition: tensorNet.h:572
uint8_t classID
The class ID of the point.
Definition: cudaPointCloud.h:17
uint32_t GetInputWidth(uint32_t layer=0) const
Retrieve the width of network input layer.
Definition: tensorNet.h:502
uint32_t GetOutputHeight(uint32_t layer=0) const
Retrieve the height of network output layer.
Definition: tensorNet.h:532
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
cudaStream_t mStream
Definition: tensorNet.h:790
modelType GetModelType() const
Retrieve the format of the network model.
Definition: tensorNet.h:477
uint32_t GetOutputLayers() const
Retrieve the number of output layers to the network.
Definition: tensorNet.h:492
void EnableDebug()
Manually enable debug messages and synchronization.
void ** mBindings
Definition: tensorNet.h:806
Definition: tensorNet.h:27
#define NOEXCEPT
Definition: tensorNet.h:73
uint32_t mMaxBatchSize
Definition: tensorNet.h:802
@ PROFILER_POSTPROCESS
Definition: tensorNet.h:191
uint32_t GetInputHeight(uint32_t layer=0) const
Retrieve the height of network input layer.
Definition: tensorNet.h:507
static bool LoadClassLabels(const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)
Load class descriptions from a label file.
bool ConfigureBuilder(nvinfer1::IBuilder *builder, uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator)
Configure builder options.
precisionType precisionTypeFromStr(const char *str)
Parse the precision type from a string.
std::string mCacheEnginePath
Definition: tensorNet.h:783
float2 GetProfilerTime(profilerQuery query)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:562
modelType mModelType
Definition: tensorNet.h:789
#define DIMS_H(x)
Definition: tensorNet.h:61
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
float * GetOutputPtr(uint32_t layer=0) const
Get the CUDA pointer to the output memory.
Definition: tensorNet.h:542
Profiler()
Definition: tensorNet.h:687
bool mEnableDebug
Definition: tensorNet.h:804
Dims3 GetInputDims(uint32_t layer=0) const
Retrieve the dimensions of network input layer.
Definition: tensorNet.h:497
@ NUM_DEVICES
Number of device types defined.
Definition: tensorNet.h:135
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
deviceType deviceTypeFromStr(const char *str)
Parse the device type from a string.
static precisionType SelectPrecision(precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true)
Resolve a desired precision to a specific one that's available.
deviceType GetDevice() const
Retrieve the device being used for execution.
Definition: tensorNet.h:407
#define LogWarning(format, args...)
Log a printf-style warning message (Log::WARNING)
Definition: logging.h:156
modelType modelTypeFromPath(const char *path)
Parse the model format from a file path.
@ DEVICE_DLA_1
Deep Learning Accelerator (DLA) Core 1 (only on Jetson Xavier)
Definition: tensorNet.h:134
bool AllowGPUFallback() const
Return true if GPU fallback is enabled.
Definition: tensorNet.h:402
void SetStream(cudaStream_t stream)
Set the stream that the device is operating on.
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
const char * GetModelPath() const
Retrieve the full path to model file, including the filename.
Definition: tensorNet.h:467
std::vector< layerInfo > mInputs
Definition: tensorNet.h:818
nvinfer1::ICudaEngine * mEngine
Definition: tensorNet.h:795
Dims3 GetOutputDims(uint32_t layer=0) const
Retrieve the dimensions of network output layer.
Definition: tensorNet.h:522
@ PROFILER_NETWORK
Definition: tensorNet.h:190
cudaEvent_t mEventsGPU[PROFILER_TOTAL *2]
Definition: tensorNet.h:791
#define DIMS_W(x)
Definition: tensorNet.h:62
@ TYPE_FP32
32-bit floating-point precision (FP32)
Definition: tensorNet.h:106
Dims3 dims
Definition: tensorNet.h:811
@ MODEL_ONNX
ONNX.
Definition: tensorNet.h:159
const char * modelTypeToStr(modelType type)
Stringize function that returns modelType in text.
void PROFILER_END(profilerQuery query)
End a profiling query, after the network is run.
Definition: tensorNet.h:716
float * CUDA
Definition: tensorNet.h:815
nvinfer1::IExecutionContext * mContext
Definition: tensorNet.h:796
#define LogVerbose(format, args...)
Log a printf-style verbose message (Log::VERBOSE)
Definition: logging.h:174
@ NUM_PRECISIONS
Number of precision types defined.
Definition: tensorNet.h:109
precisionType mPrecision
Definition: tensorNet.h:788
uint32_t GetInputSize(uint32_t layer=0) const
Retrieve the size (in bytes) of network input layer.
Definition: tensorNet.h:512
float GetProfilerTime(profilerQuery query, profilerDevice device)
Retrieve the profiler runtime (in milliseconds).
Definition: tensorNet.h:567
float * GetInputPtr(uint32_t layer=0) const
Get the CUDA pointer to the input layer's memory.
Definition: tensorNet.h:517
std::string mChecksumPath
Definition: tensorNet.h:785
const char * GetModelFilename() const
Retrieve the filename of the file, excluding the directory.
Definition: tensorNet.h:472
deviceType mDevice
Definition: tensorNet.h:787
@ PROFILER_TOTAL
Definition: tensorNet.h:193
std::string name
Definition: tensorNet.h:810
uint32_t GetOutputWidth(uint32_t layer=0) const
Retrieve the width of network output layer.
Definition: tensorNet.h:527
@ MODEL_CAFFE
caffemodel
Definition: tensorNet.h:158
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
cudaStream_t GetStream() const
Retrieve the stream that the device is operating on.
Definition: tensorNet.h:447
timespec mEventsCPU[PROFILER_TOTAL *2]
Definition: tensorNet.h:792
tensorNet::mProfilerQueriesUsed
uint32_t mProfilerQueriesUsed
Definition: tensorNet.h:799
@ PROFILER_PREPROCESS
Definition: tensorNet.h:189
uint32_t GetInputLayers() const
Retrieve the number of input layers to the network.
Definition: tensorNet.h:487
virtual ~tensorNet()
Destory.
static float4 GenerateColor(uint32_t classID, float alpha=255.0f)
Procedurally generate a color for a given class index with the specified alpha value.
#define LogError(format, args...)
Log a printf-style error message (Log::ERROR)
Definition: logging.h:150
bool ProfileModel(const std::string &deployFile, const std::string &modelFile, const std::vector< std::string > &inputs, const std::vector< Dims3 > &inputDims, const std::vector< std::string > &outputs, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator, char **engineStream, size_t *engineSize)
Create and output an optimized network model.
bool PROFILER_QUERY(profilerQuery query)
Query the CUDA part of a profiler query.
Definition: tensorNet.h:738
std::string mPrototxtPath
Definition: tensorNet.h:779
static bool LoadClassColors(const char *filename, float4 *colors, int expectedClasses, float defaultAlpha=255.0f)
Load class colors from a text file.
tensorNet::mCacheCalibrationPath
std::string mCacheCalibrationPath
Definition: tensorNet.h:784
tensorNet::Logger gLogger
cudaStream_t CreateStream(bool nonBlocking=true)
Create and use a new stream for execution.
nvinfer1::IRuntime * mInfer
Definition: tensorNet.h:794
void PROFILER_BEGIN(profilerQuery query)
Begin a profiling query, before network is run.
Definition: tensorNet.h:701
Profiler interface for measuring layer timings.
Definition: tensorNet.h:684
float GetNetworkTime()
Retrieve the network runtime (in milliseconds).
Definition: tensorNet.h:552
tensorNet::mProfilerQueriesDone
uint32_t mProfilerQueriesDone
Definition: tensorNet.h:800
std::vector< layerInfo > mOutputs
Definition: tensorNet.h:819
Definition: tensorNet.h:808
std::string mModelPath
Definition: tensorNet.h:780
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
void timeDiff(const timespec &start, const timespec &end, timespec *result)
Find the difference between two timestamps.
Definition: timespec.h:73
@ TYPE_DISABLED
Unknown, unspecified, or disabled type.
Definition: tensorNet.h:104