docs/partitioner_8hpp_source.html
| | Taskflow: A General-purpose Task-parallel Programming System |
Loading...
Searching...
No Matches
partitioner.hpp
1// reference:
2// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c
3// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp\_dispatch.cpp
4
5#pragma once
6
11
12namespace tf {
13
19enum class PartitionerType : int {
24};
25
26
27//template <typename C>
28//class PartitionInvoker : public PartitionerBase {
29//
30// protected
31//
32// C _closure;
33//
34// template <typename... ArgsT>
35// auto operator()(ArgsT&&... args) {
36// return std::invoke(closure, std::forward<ArgsT>(args)...);
37// }
38//
39// template <typename... ArgsT>
40// auto operator()(ArgsT&&... args) const {
41// return std::invoke(closure, std::forward<ArgsT>(args)...);
42// }
43//
44//};
45
51class DefaultClosureWrapper {};
52
53
54
55// ----------------------------------------------------------------------------
56// Partitioner Base
57// ----------------------------------------------------------------------------
58
124template <typename C = DefaultClosureWrapper>
125class PartitionerBase {
126
127public:
128
132constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;
133
137using closure_wrapper_type = C;
138
142PartitionerBase() = default;
143
147explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {}
148
152PartitionerBase(size_t chunk_size, C&& closure_wrapper) :
153 _chunk_size {chunk_size},
154 _closure_wrapper {std::forward<C>(closure_wrapper)} {
155 }
156
160size_t chunk_size() const { return _chunk_size; }
161
165void chunk_size(size_t cz) { _chunk_size = cz; }
166
170const C& closure_wrapper() const { return _closure_wrapper; }
171
175 C& closure_wrapper() { return _closure_wrapper; }
176
180template <typename F>
181void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }
182
186template <typename F>
187 TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {
188if constexpr(is_default_wrapper_v) {
189return std::forward<F>(callable);
190 }
191else {
192// closure wrapper is stateful - capture it by reference
193return this, c=std::forward<F>(callable) mutable { _closure_wrapper(c); };
194 }
195 }
196
197protected:
198
202size_t _chunk_size{0};
203
207 C _closure_wrapper;
208};
209
210// ----------------------------------------------------------------------------
211// Static Partitioner
212// ----------------------------------------------------------------------------
213
261template <typename C = DefaultClosureWrapper>
262class StaticPartitioner : public PartitionerBase<C> {
263
264public:
265
269static constexpr PartitionerType type() { return PartitionerType::STATIC; }
270
274StaticPartitioner() = default;
275
279explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
280
284explicit StaticPartitioner(size_t sz, C&& closure) :
285PartitionerBase<C>(sz, std::forward<C>(closure)) {
286 }
287
295size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {
296return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);
297 }
298
299// --------------------------------------------------------------------------
300// scheduling methods
301// --------------------------------------------------------------------------
302
306template <typename F>
307void loop(
308size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
309 ) {
310size_t stride = W * chunk_size;
311while(curr_b < N) {
312size_t curr_e = (std::min)(curr_b + chunk_size, N);
313if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
314if(func(curr_b, curr_e)) {
315return;
316 }
317 } else {
318 func(curr_b, curr_e);
319 }
320 curr_b += stride;
321 }
322 }
323
340template <IndexRangesLike R, typename F>
341void loop(const R& range, size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func) const {
342size_t stride = W * chunk_size;
343while(curr_b < N) {
344size_t curr_e = (std::min)(curr_b + chunk_size, N);
345if constexpr (R::rank == 1) {
346if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
347if(func(range.unravel(curr_b, curr_e))) {
348return;
349 }
350 } else {
351 func(range.unravel(curr_b, curr_e));
352 }
353 curr_b = curr_e;
354 } else {
355while(curr_b < curr_e) {
356auto box = range.lower_slice(curr_b, curr_e - curr_b);
357if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
358if(func(box)) {
359return;
360 }
361 } else {
362 func(box);
363 }
364 curr_b += box.size();
365 }
366 }
367 curr_b += stride - chunk_size;
368 }
369 }
370
371};
372
373// ----------------------------------------------------------------------------
374// Guided Partitioner
375// ----------------------------------------------------------------------------
376
416template <typename C = DefaultClosureWrapper>
417class GuidedPartitioner : public PartitionerBase<C> {
418
419public:
420
424static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
425
429GuidedPartitioner() = default;
430
435explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}
436
440explicit GuidedPartitioner(size_t sz, C&& closure) :
441PartitionerBase<C>(sz, std::forward<C>(closure)) {
442 }
443
444// --------------------------------------------------------------------------
445// scheduling methods
446// --------------------------------------------------------------------------
447
451template <typename F>
452void loop(
453size_t N, size_t W, std::atomic<size_t>& next, F&& func
454 ) const {
455
456size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
457
458size_t p1 = 2 * W * (chunk_size + 1);
459float p2 = 0.5f / static_cast<float>(W);
460size_t curr_b = next.load(std::memory_order_relaxed);
461
462while(curr_b < N) {
463
464size_t r = N - curr_b;
465
466// fine-grained
467if(r < p1) {
468while(1) {
469 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
470if(curr_b >= N) {
471return;
472 }
473if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
474if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
475return;
476 }
477 } else {
478 func(curr_b, (std::min)(curr_b + chunk_size, N));
479 }
480 }
481break;
482 }
483// coarse-grained
484else {
485size_t q = static_cast<size_t>(p2 * r);
486if(q < chunk_size) {
487 q = chunk_size;
488 }
489size_t curr_e = (std::min)(curr_b + q, N);
490if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
491 std::memory_order_relaxed)) {
492if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
493if(func(curr_b, curr_e)) {
494return;
495 }
496 } else {
497 func(curr_b, curr_e);
498 }
499 curr_b = curr_e;
500 }
501 }
502 }
503 }
504
508template <IndexRangesLike R, typename F>
509void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
510
511size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
512size_t p1 = 2 * W * (chunk_size + 1);
513float p2 = 0.5f / static_cast<float>(W);
514size_t curr_b = next.load(std::memory_order_relaxed);
515
516while(curr_b < N) {
517size_t r = N - curr_b;
518size_t csize = (r < p1) ? chunk_size : (std::max)(static_cast<size_t>(p2 * r), chunk_size);
519if constexpr (R::rank == 1) {
520size_t curr_e = (std::min)(curr_b + csize, N);
521if(next.compare_exchange_weak(curr_b, curr_e,
522 std::memory_order_relaxed,
523 std::memory_order_relaxed)) {
524if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
525if(func(range.unravel(curr_b, curr_e))) {
526return;
527 }
528 } else {
529 func(range.unravel(curr_b, curr_e));
530 }
531 curr_b = curr_e;
532 }
533 } else {
534auto box = range.upper_slice(curr_b, csize);
535if(next.compare_exchange_weak(curr_b, curr_b + box.size(),
536 std::memory_order_relaxed,
537 std::memory_order_relaxed)) {
538if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
539if(func(box)) {
540return;
541 }
542 } else {
543 func(box);
544 }
545 curr_b += box.size();
546 }
547 }
548 }
549 }
550
551};
552
553// ----------------------------------------------------------------------------
554// Dynamic Partitioner
555// ----------------------------------------------------------------------------
556
596template <typename C = DefaultClosureWrapper>
597class DynamicPartitioner : public PartitionerBase<C> {
598
599public:
600
604static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
605
609DynamicPartitioner() = default;
610
614explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
615
619explicit DynamicPartitioner(size_t sz, C&& closure) :
620PartitionerBase<C>(sz, std::forward<C>(closure)) {
621 }
622
623// --------------------------------------------------------------------------
624// scheduling methods
625// --------------------------------------------------------------------------
626
630template <typename F>
631void loop(
632size_t N, size_t, std::atomic<size_t>& next, F&& func
633 ) const {
634
635size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
636size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
637
638while(curr_b < N) {
639if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
640if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
641return;
642 }
643 } else {
644 func(curr_b, (std::min)(curr_b + chunk_size, N));
645 }
646 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
647 }
648 }
649
653template <IndexRangesLike R, typename F>
654void loop(const R& range, size_t N, size_t, std::atomic<size_t>& next, F&& func) const {
655size_t curr_b = next.load(std::memory_order_relaxed);
656size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
657
658while(curr_b < N) {
659if constexpr (R::rank == 1) {
660size_t curr_e = (std::min)(curr_b + chunk_size, N);
661if(next.compare_exchange_weak(curr_b, curr_e,
662 std::memory_order_relaxed,
663 std::memory_order_relaxed)) {
664if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
665if(func(range.unravel(curr_b, curr_e))) {
666return;
667 }
668 } else {
669 func(range.unravel(curr_b, curr_e));
670 }
671 curr_b = curr_e;
672 }
673 } else {
674auto box = range.upper_slice(curr_b, chunk_size);
675if(next.compare_exchange_weak(curr_b, curr_b + box.size(),
676 std::memory_order_relaxed,
677 std::memory_order_relaxed)) {
678if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
679if(func(box)) {
680return;
681 }
682 } else {
683 func(box);
684 }
685 curr_b += box.size();
686 }
687 }
688 }
689 }
690
691};
692
693// ----------------------------------------------------------------------------
694// RandomPartitioner
695// ----------------------------------------------------------------------------
696
736template <typename C = DefaultClosureWrapper>
737class RandomPartitioner : public PartitionerBase<C> {
738
739public:
740
744static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
745
749RandomPartitioner() = default;
750
754explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
755
759explicit RandomPartitioner(size_t sz, C&& closure) :
760PartitionerBase<C>(sz, std::forward<C>(closure)) {
761 }
762
766RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}
767
771RandomPartitioner(float alpha, float beta, C&& closure) :
772 _alpha {alpha}, _beta {beta},
773PartitionerBase<C>(0, std::forward<C>(closure)) {
774 }
775
779float alpha() const { return _alpha; }
780
784float beta() const { return _beta; }
785
792 std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {
793
794size_t b1 = static_cast<size_t>(_alpha * N * W);
795size_t b2 = static_cast<size_t>(_beta * N * W);
796
797if(b1 > b2) {
798 std::swap(b1, b2);
799 }
800
801 b1 = (std::max)(b1, size_t{1});
802 b2 = (std::max)(b2, b1 + 1);
803
804return {b1, b2};
805 }
806
807// --------------------------------------------------------------------------
808// scheduling methods
809// --------------------------------------------------------------------------
810
814template <typename F>
815void loop(
816size_t N, size_t W, std::atomic<size_t>& next, F&& func
817 ) const {
818
819auto [b1, b2] = chunk_size_range(N, W);
820
821 std::default_random_engine engine {std::random_device{}()};
822 std::uniform_int_distribution<size_t> dist(b1, b2);
823
824size_t chunk_size = dist(engine);
825size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
826
827while(curr_b < N) {
828if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
829if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
830return;
831 }
832 } else {
833 func(curr_b, (std::min)(curr_b + chunk_size, N));
834 }
835chunk_size = dist(engine);
836 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
837 }
838 }
839
843template <IndexRangesLike R, typename F>
844void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
845
846auto [b1, b2] = chunk_size_range(N, W);
847
848 std::default_random_engine engine{std::random_device{}()};
849 std::uniform_int_distribution<size_t> dist(b1, b2);
850
851size_t curr_b = next.load(std::memory_order_relaxed);
852
853while(curr_b < N) {
854if constexpr (R::rank == 1) {
855size_t curr_e = (std::min)(curr_b + dist(engine), N);
856if(next.compare_exchange_weak(curr_b, curr_e,
857 std::memory_order_relaxed,
858 std::memory_order_relaxed)) {
859if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
860if(func(range.unravel(curr_b, curr_e))) {
861return;
862 }
863 } else {
864 func(range.unravel(curr_b, curr_e));
865 }
866 curr_b = curr_e;
867 }
868 } else {
869auto box = range.upper_slice(curr_b, dist(engine));
870if(next.compare_exchange_weak(curr_b, curr_b + box.size(),
871 std::memory_order_relaxed,
872 std::memory_order_relaxed)) {
873if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
874if(func(box)) {
875return;
876 }
877 } else {
878 func(box);
879 }
880 curr_b += box.size();
881 }
882 }
883 }
884 }
885
886private:
887
888float _alpha {0.01f};
889float _beta {0.50f};
890};
891
898using DefaultPartitioner = GuidedPartitioner<>;
899
905template <typename P>
906concept PartitionerLike = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;
907
915template <typename P>
916inline constexpr bool is_partitioner_v = PartitionerLike<P>;
917
918} // end of namespace tf -----------------------------------------------------
class to create a default closure wrapper
Definition partitioner.hpp:51
tf::DynamicPartitioner::DynamicPartitioner
DynamicPartitioner()=default
default constructor
tf::DynamicPartitioner::DynamicPartitioner
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:619
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:604
tf::DynamicPartitioner::DynamicPartitioner
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:614
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:417
tf::GuidedPartitioner::GuidedPartitioner
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:440
tf::GuidedPartitioner::GuidedPartitioner
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:435
tf::GuidedPartitioner::GuidedPartitioner
GuidedPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:424
tf::PartitionerBase::PartitionerBase
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147
tf::PartitionerBase::is_default_wrapper_v
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132
tf::PartitionerBase::closure_wrapper_type
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137
tf::PartitionerBase::chunk_size
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165
tf::PartitionerBase::closure_wrapper
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170
tf::PartitionerBase::closure_wrapper
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181
tf::PartitionerBase::PartitionerBase
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152
tf::PartitionerBase::closure_wrapper
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175
tf::PartitionerBase::PartitionerBase
PartitionerBase()=default
default constructor
tf::PartitionerBase::chunk_size
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160
tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:759
tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:771
tf::RandomPartitioner::chunk_size_range
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:792
tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:766
tf::RandomPartitioner::RandomPartitioner
RandomPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:744
float alpha() const
queries the alpha value
Definition partitioner.hpp:779
tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:754
float beta() const
queries the beta value
Definition partitioner.hpp:784
tf::StaticPartitioner::StaticPartitioner
StaticPartitioner()=default
default constructor
tf::StaticPartitioner::adjusted_chunk_size
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:295
tf::StaticPartitioner::StaticPartitioner
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:279
tf::StaticPartitioner::StaticPartitioner
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:284
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:269
determines if a type is a partitioner
Definition partitioner.hpp:906
taskflow namespace
Definition small_vector.hpp:20
@ STATIC
static task type
Definition task.hpp:25
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19
@ DYNAMIC
dynamic partitioner type
Definition partitioner.hpp:23
@ STATIC
static partitioner type
Definition partitioner.hpp:21
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:916
GuidedPartitioner<> DefaultPartitioner
default partitioner set to tf::GuidedPartitioner
Definition partitioner.hpp:898