docs/html/poseNet_8h_source.html
| | Jetson Inference
DNN Vision Library |
poseNet.h
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #ifndef __POSE_NET_H__
24 #define __POSE_NET_H__
25
26
27 #include "tensorNet.h"
28
29 #include <array>
30 #include <vector>
31
32
37 #define POSENET_DEFAULT_INPUT "input"
38
43 #define POSENET_DEFAULT_CMAP "cmap"
44
49 #define POSENET_DEFAULT_PAF "paf"
50
55 #define POSENET_DEFAULT_THRESHOLD 0.15f
56
62 #define POSENET_DEFAULT_KEYPOINT_SCALE 0.0052f
63
69 #define POSENET_DEFAULT_LINK_SCALE 0.0013f
70
75 #define POSENET_MODEL_TYPE "pose"
76
81 #define POSENET_USAGE_STRING "poseNet arguments: \n" \
82 " --network=NETWORK pre-trained model to load, one of the following:\n" \
83 " * resnet18-body (default)\n" \
84 " * resnet18-hand\n" \
85 " * densenet121-body\n" \
86 " --model=MODEL path to custom model to load (caffemodel, uff, or onnx)\n" \
87 " --prototxt=PROTOTXT path to custom prototxt to load (for .caffemodel only)\n" \
88 " --labels=LABELS path to text file containing the labels for each class\n" \
89 " --input-blob=INPUT name of the input layer (default is '" POSENET_DEFAULT_INPUT "')\n" \
90 " --output-cvg=COVERAGE name of the coverge output layer (default is '" POSENET_DEFAULT_CMAP "')\n" \
91 " --output-bbox=BOXES name of the bounding output layer (default is '" POSENET_DEFAULT_PAF "')\n" \
92 " --mean-pixel=PIXEL mean pixel value to subtract from input (default is 0.0)\n" \
93 " --threshold=THRESHOLD minimum threshold for detection (default is 0.5)\n" \
94 " --overlay=OVERLAY detection overlay flags (e.g. --overlay=links,keypoints)\n" \
95 " valid combinations are: 'box', 'links', 'keypoints', 'none'\n" \
96 " --keypoint-scale=X radius scale for keypoints, relative to image (default: 0.0052)\n" \
97 " --link-scale=X line width scale for links, relative to image (default: 0.0013)\n" \
98 " --profile enable layer profiling in TensorRT\n\n"
99
100
105 class poseNet : public tensorNet
106 {
107 public:
112struct ObjectPose
113 {
125 {
129 };
130
131 std::vector<Keypoint> Keypoints;
132 std::vector<std::array<uint32_t, 2>> Links;
135inline int FindKeypoint(uint32_t id) const;
136
138inline int FindLink(uint32_t a, uint32_t b) const;
139 };
140
144enum OverlayFlags
145 {
146OVERLAY_NONE = 0,
147OVERLAY_BOX = (1 << 0),
148OVERLAY_LINKS = (1 << 1),
149OVERLAY_KEYPOINTS = (1 << 2),
150OVERLAY_DEFAULT = OVERLAY_LINKS|OVERLAY_KEYPOINTS,
151 };
152
159static uint32_t OverlayFlagsFromStr( const char* flags );
160
167static poseNet* Create( const char* network="resnet18-body", float threshold=POSENET_DEFAULT_THRESHOLD,
168 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST,
169deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
170
182static poseNet* Create( const char* model_path, const char* topology, const char* colors,
183float threshold=POSENET_DEFAULT_THRESHOLD,
184const char* input = POSENET_DEFAULT_INPUT,
185const char* cmap = POSENET_DEFAULT_CMAP,
186const char* paf = POSENET_DEFAULT_PAF,
187 uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE,
188precisionType precision=TYPE_FASTEST,
189deviceType device=DEVICE_GPU, bool allowGPUFallback=true );
190
194static poseNet* Create( int argc, char** argv );
195
199static poseNet* Create( const commandLine& cmdLine );
200
204static inline const char* Usage() { return POSENET_USAGE_STRING; }
205
209virtual ~poseNet();
210
220template<typename T> bool Process( T* image, uint32_t width, uint32_t height, std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT ) { return Process((void*)image, width, height, imageFormatFromType<T>(), poses, overlay); }
221
231bool Process( void* image, uint32_t width, uint32_t height, imageFormat format, std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT );
232
241template<typename T> bool Process( T* image, uint32_t width, uint32_t height, uint32_t overlay=OVERLAY_DEFAULT ) { return Process((void*)image, width, height, imageFormatFromType<T>(), overlay); }
242
251bool Process( void* image, uint32_t width, uint32_t height, imageFormat format, uint32_t overlay=OVERLAY_DEFAULT );
252
256template<typename T> bool Overlay( T* input, T* output, uint32_t width, uint32_t height, const std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT ) { return Overlay((void*)input, (void*)output, width, height, imageFormatFromType<T>(), overlay); }
257
261bool Overlay( void* input, void* output, uint32_t width, uint32_t height, imageFormat format, const std::vector<ObjectPose>& poses, uint32_t overlay=OVERLAY_DEFAULT );
262
266inline float GetThreshold() const { return mThreshold; }
267
271inline void SetThreshold( float threshold ) { mThreshold = threshold; }
272
276inline const char* GetCategory() const { return mTopology.category.c_str(); }
277
281inline uint32_t GetNumKeypoints() const { return mTopology.keypoints.size(); }
282
286inline const char* GetKeypointName( uint32_t index ) const { return mTopology.keypoints[index].c_str(); }
287
291inline int FindKeypointID( const char* name ) const;
292
296inline float4 GetKeypointColor( uint32_t index ) const { return mKeypointColors[index]; }
297
301inline void SetKeypointColor( uint32_t index, const float4& color ) { mKeypointColors[index] = color; }
302
306inline void SetKeypointAlpha( uint32_t index, float alpha ) { mKeypointColors[index].w = alpha; }
307
311inline void SetKeypointAlpha( float alpha );
312
316inline float GetKeypointScale() const { return mKeypointScale; }
317
322inline void SetKeypointScale( float scale ) { mKeypointScale = scale; }
323
327inline float GetLinkScale() const { return mLinkScale; }
328
333inline void SetLinkScale( float scale ) { mLinkScale = scale; }
334
335 protected:
336
337static const int CMAP_WINDOW_SIZE=5;
338static const int PAF_INTEGRAL_SAMPLES=7;
339static const int MAX_LINKS=100;
340static const int MAX_OBJECTS=100;
341
343 {
345 std::vector<std::string> keypoints;
348 };
349
350// constructor
351poseNet();
352
353bool init( const char* model_path, const char* topology, const char* colors, float threshold,
354const char* input, const char* cmap, const char* paf, uint32_t maxBatchSize,
355precisionType precision, deviceType device, bool allowGPUFallback );
356
357bool postProcess(std::vector<ObjectPose>& poses, uint32_t width, uint32_t height);
358
359bool loadTopology( const char* json_path, Topology* topology );
360bool loadKeypointColors( const char* filename );
361
363
364float mThreshold;
365float mLinkScale;
366float mKeypointScale;
367
368 float4* mKeypointColors;
369
370// post-processing buffers
372int* mPeakCounts;
373int* mConnections;
375intmNumObjects;
376
377float* mRefinedPeaks;
378float* mScoreGraph;
379
380void* mAssignmentWorkspace;
381void* mConnectionWorkspace;
382 };
383
384
385 // FindKeypointID
386 inline int poseNet::FindKeypointID( const char* name ) const
387 {
388if( !name )
389return -1;
390
391const uint32_t numKeypoints = GetNumKeypoints();
392
393for( uint32_t n=0; n < numKeypoints; n++ )
394 {
395if( strcasecmp(GetKeypointName(n), name) == 0 )
396return n;
397 }
398
399return -1;
400 }
401
402 // FindKeypoint
403 inline int poseNet::ObjectPose::FindKeypoint( uint32_t id ) const
404 {
405const uint32_t numKeypoints = Keypoints.size();
406
407for( uint32_t n=0; n < numKeypoints; n++ )
408 {
409if( id == Keypoints[n].ID )
410return n;
411 }
412
413return -1;
414 }
415
416 // FindLink
417 inline int poseNet::ObjectPose::FindLink( uint32_t a, uint32_t b ) const
418 {
419const uint32_t numLinks = Links.size();
420
421for( uint32_t n=0; n < numLinks; n++ )
422 {
423if( a == Keypoints[Links[n][0]].ID && b == Keypoints[Links[n][1]].ID )
424return n;
425 }
426
427return -1;
428 }
429
430 // SetKeypointAlpha
431 inline void poseNet::SetKeypointAlpha( float alpha )
432 {
433const uint32_t numKeypoints = GetNumKeypoints();
434
435for( uint32_t n=0; n < numKeypoints; n++ )
436mKeypointColors[n].w = alpha;
437 }
438
439
440 #endif
void SetLinkScale(float scale)
Set the scale used to calculate the width of link lines.
Definition: poseNet.h:333
int * mObjects
Definition: poseNet.h:374
bool Overlay(T *input, T *output, uint32_t width, uint32_t height, const std::vector< ObjectPose > &poses, uint32_t overlay=OVERLAY_DEFAULT)
Overlay the results on the image.
Definition: poseNet.h:256
bool init(const char *model_path, const char *topology, const char *colors, float threshold, const char *input, const char *cmap, const char *paf, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback)
float Bottom
Bounding box bottom, as determined by the bottom-most keypoint in the pose.
Definition: poseNet.h:119
const char * GetKeypointName(uint32_t index) const
Get the name of a keypoint in the topology by it's ID.
Definition: poseNet.h:286
std::string category
Definition: poseNet.h:344
static const int PAF_INTEGRAL_SAMPLES
Definition: poseNet.h:338
@ OVERLAY_KEYPOINTS
Overlay the keypoints (joints) as circles.
Definition: poseNet.h:149
uchar3 color
The RGB color of the point.
Definition: cudaPointCloud.h:11
void * mAssignmentWorkspace
Definition: poseNet.h:380
int * mPeakCounts
Definition: poseNet.h:372
#define POSENET_USAGE_STRING
Standard command-line options able to be passed to poseNet::Create()
Definition: poseNet.h:81
virtual ~poseNet()
Destory.
float * mScoreGraph
Definition: poseNet.h:378
static uint32_t OverlayFlagsFromStr(const char *flags)
Parse a string sequence into OverlayFlags enum.
bool loadKeypointColors(const char *filename)
float * mRefinedPeaks
Definition: poseNet.h:377
float GetKeypointScale() const
Get the scale used to calculate the radius of keypoints relative to input image dimensions.
Definition: poseNet.h:316
float Top
Bounding box top, as determined by the top-most keypoint in the pose.
Definition: poseNet.h:118
float mThreshold
Definition: poseNet.h:364
@ DEVICE_GPU
GPU (if multiple GPUs are present, a specific GPU can be selected with cudaSetDevice()
Definition: tensorNet.h:131
bool postProcess(std::vector< ObjectPose > &poses, uint32_t width, uint32_t height)
void SetKeypointScale(float scale)
Set the scale used to calculate the radius of keypoint circles.
Definition: poseNet.h:322
#define POSENET_DEFAULT_PAF
Name of default output blob of the Part Affinity Field (PAF) for pose estimation ONNX model.
Definition: poseNet.h:49
std::vector< std::string > keypoints
Definition: poseNet.h:345
deviceType
Enumeration for indicating the desired device that the network should run on, if available in hardwar...
Definition: tensorNet.h:129
poseNet::ObjectPose::FindKeypoint
int FindKeypoint(uint32_t id) const
Find a link index by two keypoint ID's, or return -1 if not found.
Definition: poseNet.h:403
uint32_t GetNumKeypoints() const
Get the number of keypoints in the topology.
Definition: poseNet.h:281
#define POSENET_DEFAULT_INPUT
Name of default input blob for pose estimation ONNX model.
Definition: poseNet.h:37
float mLinkScale
Definition: poseNet.h:365
OverlayFlags
Overlay flags (can be OR'd together).
Definition: poseNet.h:144
float GetLinkScale() const
Get the scale used to calculate the width of link lines relative to input image dimensions.
Definition: poseNet.h:327
int * mConnections
Definition: poseNet.h:373
@ TYPE_FASTEST
The fastest detected precision should be use (i.e.
Definition: tensorNet.h:105
static const int CMAP_WINDOW_SIZE
Definition: poseNet.h:337
@ OVERLAY_DEFAULT
Definition: poseNet.h:150
void * mConnectionWorkspace
Definition: poseNet.h:381
Topology mTopology
Definition: poseNet.h:362
static const char * Usage()
Usage string for command line arguments to Create()
Definition: poseNet.h:204
int numLinks
Definition: poseNet.h:347
A keypoint or joint in the topology.
Definition: poseNet.h:124
static poseNet * Create(const char *network="resnet18-body", float threshold=POSENET_DEFAULT_THRESHOLD, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true)
Load a pre-trained model.
precisionType
Enumeration for indicating the desired precision that the network should run in, if available in hard...
Definition: tensorNet.h:102
bool Process(T *image, uint32_t width, uint32_t height, uint32_t overlay=OVERLAY_DEFAULT)
Perform pose estimation on the given image, and overlay the results.
Definition: poseNet.h:241
void SetKeypointColor(uint32_t index, const float4 &color)
Set the overlay color for a keypoint.
Definition: poseNet.h:301
int * mPeaks
Definition: poseNet.h:371
float GetThreshold() const
Retrieve the minimum confidence threshold.
Definition: poseNet.h:266
@ OVERLAY_NONE
No overlay.
Definition: poseNet.h:146
float4 GetKeypointColor(uint32_t index) const
Get the overlay color of a keypoint.
Definition: poseNet.h:296
float Right
Bounding box right, as determined by the right-most keypoint in the pose.
Definition: poseNet.h:117
void SetKeypointAlpha(uint32_t index, float alpha)
Set the alpha channel for a keypoint color (between 0-255).
Definition: poseNet.h:306
Abstract class for loading a tensor network with TensorRT.
Definition: tensorNet.h:218
int FindLink(uint32_t a, uint32_t b) const
Definition: poseNet.h:417
static const int MAX_OBJECTS
Definition: poseNet.h:340
const char * GetCategory() const
Get the category of objects that are detected (e.g.
Definition: poseNet.h:276
float Left
Bounding box left, as determined by the left-most keypoint in the pose.
Definition: poseNet.h:116
#define POSENET_DEFAULT_CMAP
Name of default output blob of the confidence map for pose estimation ONNX model.
Definition: poseNet.h:43
bool loadTopology(const char *json_path, Topology *topology)
void SetThreshold(float threshold)
Set the minimum confidence threshold.
Definition: poseNet.h:271
poseNet::ObjectPose::Keypoint::x
float x
The x coordinate of the keypoint.
Definition: poseNet.h:127
bool Process(T *image, uint32_t width, uint32_t height, std::vector< ObjectPose > &poses, uint32_t overlay=OVERLAY_DEFAULT)
Perform pose estimation on the given image, returning object poses, and overlay the results.
Definition: poseNet.h:220
std::vector< std::array< uint32_t, 2 > > Links
List of links in the object.
Definition: poseNet.h:132
#define POSENET_DEFAULT_THRESHOLD
Default value of the minimum confidence threshold.
Definition: poseNet.h:55
uint32_t ID
Object ID in the image frame, starting with 0.
Definition: poseNet.h:114
#define DEFAULT_MAX_BATCH_SIZE
Default maximum batch size.
Definition: tensorNet.h:88
int mNumObjects
Definition: poseNet.h:375
@ OVERLAY_LINKS
Overlay the skeleton links (bones) as lines
Definition: poseNet.h:148
poseNet::ObjectPose::Keypoint::ID
uint32_t ID
Type ID of the keypoint - the name can be retrieved with poseNet::GetKeypointName()
Definition: poseNet.h:126
int FindKeypointID(const char *name) const
Find the ID of a keypoint by name, or return -1 if not found.
Definition: poseNet.h:386
poseNet()
static const int MAX_LINKS
Definition: poseNet.h:339
float mKeypointScale
Definition: poseNet.h:366
Command line parser for extracting flags, values, and strings.
Definition: commandLine.h:35
float4 * mKeypointColors
Definition: poseNet.h:368
poseNet::ObjectPose::Keypoint::y
float y
The y coordinate of the keypoint.
Definition: poseNet.h:128
Definition: poseNet.h:342
int links[MAX_LINKS *4]
Definition: poseNet.h:346
@ OVERLAY_BOX
Overlay object bounding boxes.
Definition: poseNet.h:147
The pose of an object, composed of links between keypoints.
Definition: poseNet.h:112
imageFormat
The imageFormat enum is used to identify the pixel format and colorspace of an image.
Definition: imageFormat.h:49
Pose estimation models with TensorRT support.
Definition: poseNet.h:105
__device__ cudaVectorTypeInfo< T >::Base alpha(T vec, typename cudaVectorTypeInfo< T >::Base default_alpha=255)
Definition: cudaVector.h:98
poseNet::ObjectPose::Keypoints
std::vector< Keypoint > Keypoints
List of keypoints in the object, which contain the keypoint ID and x/y coordinates.
Definition: poseNet.h:131