docs/html/imageNet_8h_source.html
| | Jetson Inference
DNN Vision Library |
imageNet.h
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #ifndef __IMAGE_NET_H__
24 #define __IMAGE_NET_H__
25
26
27 #include "tensorNet.h"
28
29
34 #define IMAGENET_DEFAULT_INPUT "data"
35
40 #define IMAGENET_DEFAULT_OUTPUT "prob"
41
46 #define IMAGENET_DEFAULT_THRESHOLD 0.01f
47
52 #define IMAGENET_MODEL_TYPE "classification"
53
58 #define IMAGENET_USAGE_STRING "imageNet arguments: \n" \
59 " --network=NETWORK pre-trained model to load, one of the following:\n" \
60 " * alexnet\n" \
61 " * googlenet (default)\n" \
62 " * googlenet-12\n" \
63 " * resnet-18\n" \
64 " * resnet-50\n" \
65 " * resnet-101\n" \
66 " * resnet-152\n" \
67 " * vgg-16\n" \
68 " * vgg-19\n" \
69 " * inception-v4\n" \
70 " --model=MODEL path to custom model to load (caffemodel, uff, or onnx)\n" \
71 " --prototxt=PROTOTXT path to custom prototxt to load (for .caffemodel only)\n" \
72 " --labels=LABELS path to text file containing the labels for each class\n" \
73 " --input-blob=INPUT name of the input layer (default is '" IMAGENET_DEFAULT_INPUT "')\n" \
74 " --output-blob=OUTPUT name of the output layer (default is '" IMAGENET_DEFAULT_OUTPUT "')\n" \
75 " --threshold=CONF minimum confidence threshold for classification (default is 0.01)\n" \
76 " --smoothing=WEIGHT weight between [0,1] or number of frames (disabled by default)\n" \
77 " --profile enable layer profiling in TensorRT\n\n"
78
79
84 class imageNet : public tensorNet
85 {
86 public:
90typedef std::vector<std::pair<uint32_t, float>> Classifications;
91
102static imageNet* Create( const char* network="googlenet",
103 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
104precisionType precision=TYPE_FASTEST,
105deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
106
117static imageNet* Create( const char* prototxt_path, const char* model_path,
118const char* mean_binary, const char* class_labels,
119const char* input=IMAGENET_DEFAULT_INPUT,
120const char* output=IMAGENET_DEFAULT_OUTPUT,
121 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
122precisionType precision=TYPE_FASTEST,
123deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
124
128static imageNet* Create( int argc, char** argv );
129
133static imageNet* Create( const commandLine& cmdLine );
134
138static inline const char* Usage() { return IMAGENET_USAGE_STRING; }
139
143virtual ~imageNet();
144
158template<typename T> int Classify( T* image, uint32_t width, uint32_t height, float* confidence=NULL ) { return Classify((void*)image, width, height, imageFormatFromType<T>(), confidence); }
159
174int Classify( void* image, uint32_t width, uint32_t height, imageFormat format, float* confidence=NULL );
175
191int Classify( float* rgba, uint32_t width, uint32_t height, float* confidence=NULL, imageFormat format=IMAGE_RGBA32F );
192
208template<typename T> int Classify( T* image, uint32_t width, uint32_t height, Classifications& classifications, int topK=1 ) { return Classify((void*)image, width, height, imageFormatFromType<T>(), classifications, topK); }
209
226int Classify( void* image, uint32_t width, uint32_t height, imageFormat format, Classifications& classifications, int topK=1 );
227
231inline uint32_t GetNumClasses() const { return mNumClasses; }
232
236inline const char* GetClassLabel( int index ) const { return GetClassDesc(index); }
237
241inline const char* GetClassDesc( int index ) const { return index >= 0 ? mClassDesc[index].c_str() : "none"; }
242
246inline const char* GetClassSynset( int index ) const { return index >= 0 ? mClassSynset[index].c_str() : "none"; }
247
251inline const char* GetClassPath() const { return mClassPath.c_str(); }
252
256inline float GetThreshold() const { return mThreshold; }
257
263inline void SetThreshold( float threshold ) { mThreshold = threshold; }
264
269inline float GetSmoothing() const { return mSmoothingFactor; }
270
289inline void SetSmoothing( float factor ) { mSmoothingFactor = factor; }
290
291 protected:
292imageNet();
293
294//bool init( NetworkType networkType, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback );
295bool init(const char* prototxt_path, const char* model_path, const char* mean_binary, const char* class_path, const char* input, const char* output, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback );
296bool loadClassInfo( const char* filename, int expectedClasses=-1 );
297
298bool preProcess( void* image, uint32_t width, uint32_t height, imageFormat format );
299
300float* applySmoothing();
301
302 uint32_t mNumClasses;
303
304 std::vector<std::string> mClassSynset; // 1000 class ID's (ie n01580077, n04325704)
305 std::vector<std::string> mClassDesc;
306
307 std::string mClassPath;
308//NetworkType mNetworkType;
309
310float* mSmoothingBuffer;
311floatmSmoothingFactor;
312
313float mThreshold;
314 };
315
316
317 #endif
float * applySmoothing()
@ IMAGE_RGBA32F
float4 RGBA32F (‘'rgba32f’`)
Definition: imageFormat.h:55
int Classify(T *image, uint32_t width, uint32_t height, float *confidence=NULL)
Predict the maximum-likelihood image class whose confidence meets the minimum threshold.
Definition: imageNet.h:158
static const char * Usage()
Usage string for command line arguments to Create()
Definition: imageNet.h:138
#define IMAGENET_DEFAULT_INPUT
Name of default input blob for imageNet model.
Definition: imageNet.h:34
int Classify(T *image, uint32_t width, uint32_t height, Classifications &classifications, int topK=1)
Classify the image and return the topK image classification results that meet the minimum confidence ...
Definition: imageNet.h:208
float GetSmoothing() const
Return the temporal smoothing weight or number of frames in the smoothing window.
Definition: imageNet.h:269
const char * GetClassSynset(int index) const
Retrieve the class synset category of a particular class.
Definition: imageNet.h:246
#define IMAGENET_DEFAULT_OUTPUT
Name of default output confidence values for imageNet model.
Definition: imageNet.h:40
float * mSmoothingBuffer
Definition: imageNet.h:310
void SetSmoothing(float factor)
Enable temporal smoothing of the results using EWMA (exponentially-weighted moving average).
Definition: imageNet.h:289
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
const char * GetClassLabel(int index) const
Retrieve the description of a particular class.
Definition: imageNet.h:236
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
uint32_t GetNumClasses() const
Retrieve the number of image recognition classes (typically 1000)
Definition: imageNet.h:231
std::vector< std::string > mClassSynset
Definition: imageNet.h:304
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
std::vector< std::pair< uint32_t, float > > Classifications
List of classification results where each entry represents a (classID, confidence) pair.
Definition: imageNet.h:90
float GetThreshold() const
Return the confidence threshold used for classification.
Definition: imageNet.h:256
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
bool preProcess(void *image, uint32_t width, uint32_t height, imageFormat format)
virtual ~imageNet()
Destroy.
std::vector< std::string > mClassDesc
Definition: imageNet.h:305
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
const char * GetClassPath() const
Retrieve the path to the file containing the class descriptions.
Definition: imageNet.h:251
Image recognition with classification networks, using TensorRT.
Definition: imageNet.h:84
float mThreshold
Definition: imageNet.h:313
std::string mClassPath
Definition: imageNet.h:307
uint32_t mNumClasses
Definition: imageNet.h:302
#define IMAGENET_USAGE_STRING
Standard command-line options able to be passed to imageNet::Create()
Definition: imageNet.h:58
imageNet()
const char * GetClassDesc(int index) const
Retrieve the description of a particular class.
Definition: imageNet.h:241
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
float mSmoothingFactor
Definition: imageNet.h:311
bool init(const char *prototxt_path, const char *model_path, const char *mean_binary, const char *class_path, const char *input, const char *output, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback)
static imageNet * Create(const char *network="googlenet", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
Load one of the following pre-trained models:
bool loadClassInfo(const char *filename, int expectedClasses=-1)
Command line parser for extracting flags, values, and strings.
Definition: commandLine.h:35
imageFormat
The imageFormat enum is used to identify the pixel format and colorspace of an image.
Definition: imageFormat.h:49
void SetThreshold(float threshold)
Set the confidence threshold used for classification.
Definition: imageNet.h:263