Back to Taskflow

Taskflow: A General

docs/partitioner_8hpp_source.html

4.1.035.4 KB
Original Source

| | Taskflow: A General-purpose Task-parallel Programming System |

Loading...

Searching...

No Matches

partitioner.hpp

1// reference:

2// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c

3// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp\_dispatch.cpp

4

5#pragma once

6

11

12namespace tf {

13

19enum class PartitionerType : int {

21STATIC,

23DYNAMIC

24};

25

26

27//template <typename C>

28//class PartitionInvoker : public PartitionerBase {

29//

30// protected

31//

32// C _closure;

33//

34// template <typename... ArgsT>

35// auto operator()(ArgsT&&... args) {

36// return std::invoke(closure, std::forward<ArgsT>(args)...);

37// }

38//

39// template <typename... ArgsT>

40// auto operator()(ArgsT&&... args) const {

41// return std::invoke(closure, std::forward<ArgsT>(args)...);

42// }

43//

44//};

45

51class DefaultClosureWrapper {};

52

53

54

55// ----------------------------------------------------------------------------

56// Partitioner Base

57// ----------------------------------------------------------------------------

58

124template <typename C = DefaultClosureWrapper>

125class PartitionerBase {

126

127public:

128

132constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;

133

137using closure_wrapper_type = C;

138

142PartitionerBase() = default;

143

147explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {}

148

152PartitionerBase(size_t chunk_size, C&& closure_wrapper) :

153 _chunk_size {chunk_size},

154 _closure_wrapper {std::forward<C>(closure_wrapper)} {

155 }

156

160size_t chunk_size() const { return _chunk_size; }

161

165void chunk_size(size_t cz) { _chunk_size = cz; }

166

170const C& closure_wrapper() const { return _closure_wrapper; }

171

175 C& closure_wrapper() { return _closure_wrapper; }

176

180template <typename F>

181void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }

182

186template <typename F>

187 TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {

188if constexpr(is_default_wrapper_v) {

189return std::forward<F>(callable);

190 }

191else {

192// closure wrapper is stateful - capture it by reference

193return this, c=std::forward<F>(callable) mutable { _closure_wrapper(c); };

194 }

195 }

196

197protected:

198

202size_t _chunk_size{0};

203

207 C _closure_wrapper;

208};

209

210// ----------------------------------------------------------------------------

211// Static Partitioner

212// ----------------------------------------------------------------------------

213

261template <typename C = DefaultClosureWrapper>

262class StaticPartitioner : public PartitionerBase<C> {

263

264public:

265

269static constexpr PartitionerType type() { return PartitionerType::STATIC; }

270

274StaticPartitioner() = default;

275

279explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}

280

284explicit StaticPartitioner(size_t sz, C&& closure) :

285PartitionerBase<C>(sz, std::forward<C>(closure)) {

286 }

287

295size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {

296return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);

297 }

298

299// --------------------------------------------------------------------------

300// scheduling methods

301// --------------------------------------------------------------------------

302

306template <typename F>

307void loop(

308size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func

309 ) {

310size_t stride = W * chunk_size;

311while(curr_b < N) {

312size_t curr_e = (std::min)(curr_b + chunk_size, N);

313if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

314if(func(curr_b, curr_e)) {

315return;

316 }

317 } else {

318 func(curr_b, curr_e);

319 }

320 curr_b += stride;

321 }

322 }

323

340template <IndexRangesLike R, typename F>

341void loop(const R& range, size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func) const {

342size_t stride = W * chunk_size;

343while(curr_b < N) {

344size_t curr_e = (std::min)(curr_b + chunk_size, N);

345if constexpr (R::rank == 1) {

346if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

347if(func(range.unravel(curr_b, curr_e))) {

348return;

349 }

350 } else {

351 func(range.unravel(curr_b, curr_e));

352 }

353 curr_b = curr_e;

354 } else {

355while(curr_b < curr_e) {

356auto box = range.lower_slice(curr_b, curr_e - curr_b);

357if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

358if(func(box)) {

359return;

360 }

361 } else {

362 func(box);

363 }

364 curr_b += box.size();

365 }

366 }

367 curr_b += stride - chunk_size;

368 }

369 }

370

371};

372

373// ----------------------------------------------------------------------------

374// Guided Partitioner

375// ----------------------------------------------------------------------------

376

416template <typename C = DefaultClosureWrapper>

417class GuidedPartitioner : public PartitionerBase<C> {

418

419public:

420

424static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }

425

429GuidedPartitioner() = default;

430

435explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}

436

440explicit GuidedPartitioner(size_t sz, C&& closure) :

441PartitionerBase<C>(sz, std::forward<C>(closure)) {

442 }

443

444// --------------------------------------------------------------------------

445// scheduling methods

446// --------------------------------------------------------------------------

447

451template <typename F>

452void loop(

453size_t N, size_t W, std::atomic<size_t>& next, F&& func

454 ) const {

455

456size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;

457

458size_t p1 = 2 * W * (chunk_size + 1);

459float p2 = 0.5f / static_cast<float>(W);

460size_t curr_b = next.load(std::memory_order_relaxed);

461

462while(curr_b < N) {

463

464size_t r = N - curr_b;

465

466// fine-grained

467if(r < p1) {

468while(1) {

469 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

470if(curr_b >= N) {

471return;

472 }

473if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

474if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {

475return;

476 }

477 } else {

478 func(curr_b, (std::min)(curr_b + chunk_size, N));

479 }

480 }

481break;

482 }

483// coarse-grained

484else {

485size_t q = static_cast<size_t>(p2 * r);

486if(q < chunk_size) {

487 q = chunk_size;

488 }

489size_t curr_e = (std::min)(curr_b + q, N);

490if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,

491 std::memory_order_relaxed)) {

492if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

493if(func(curr_b, curr_e)) {

494return;

495 }

496 } else {

497 func(curr_b, curr_e);

498 }

499 curr_b = curr_e;

500 }

501 }

502 }

503 }

504

508template <IndexRangesLike R, typename F>

509void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {

510

511size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;

512size_t p1 = 2 * W * (chunk_size + 1);

513float p2 = 0.5f / static_cast<float>(W);

514size_t curr_b = next.load(std::memory_order_relaxed);

515

516while(curr_b < N) {

517size_t r = N - curr_b;

518size_t csize = (r < p1) ? chunk_size : (std::max)(static_cast<size_t>(p2 * r), chunk_size);

519if constexpr (R::rank == 1) {

520size_t curr_e = (std::min)(curr_b + csize, N);

521if(next.compare_exchange_weak(curr_b, curr_e,

522 std::memory_order_relaxed,

523 std::memory_order_relaxed)) {

524if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

525if(func(range.unravel(curr_b, curr_e))) {

526return;

527 }

528 } else {

529 func(range.unravel(curr_b, curr_e));

530 }

531 curr_b = curr_e;

532 }

533 } else {

534auto box = range.upper_slice(curr_b, csize);

535if(next.compare_exchange_weak(curr_b, curr_b + box.size(),

536 std::memory_order_relaxed,

537 std::memory_order_relaxed)) {

538if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

539if(func(box)) {

540return;

541 }

542 } else {

543 func(box);

544 }

545 curr_b += box.size();

546 }

547 }

548 }

549 }

550

551};

552

553// ----------------------------------------------------------------------------

554// Dynamic Partitioner

555// ----------------------------------------------------------------------------

556

596template <typename C = DefaultClosureWrapper>

597class DynamicPartitioner : public PartitionerBase<C> {

598

599public:

600

604static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }

605

609DynamicPartitioner() = default;

610

614explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}

615

619explicit DynamicPartitioner(size_t sz, C&& closure) :

620PartitionerBase<C>(sz, std::forward<C>(closure)) {

621 }

622

623// --------------------------------------------------------------------------

624// scheduling methods

625// --------------------------------------------------------------------------

626

630template <typename F>

631void loop(

632size_t N, size_t, std::atomic<size_t>& next, F&& func

633 ) const {

634

635size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;

636size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

637

638while(curr_b < N) {

639if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

640if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {

641return;

642 }

643 } else {

644 func(curr_b, (std::min)(curr_b + chunk_size, N));

645 }

646 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

647 }

648 }

649

653template <IndexRangesLike R, typename F>

654void loop(const R& range, size_t N, size_t, std::atomic<size_t>& next, F&& func) const {

655size_t curr_b = next.load(std::memory_order_relaxed);

656size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;

657

658while(curr_b < N) {

659if constexpr (R::rank == 1) {

660size_t curr_e = (std::min)(curr_b + chunk_size, N);

661if(next.compare_exchange_weak(curr_b, curr_e,

662 std::memory_order_relaxed,

663 std::memory_order_relaxed)) {

664if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

665if(func(range.unravel(curr_b, curr_e))) {

666return;

667 }

668 } else {

669 func(range.unravel(curr_b, curr_e));

670 }

671 curr_b = curr_e;

672 }

673 } else {

674auto box = range.upper_slice(curr_b, chunk_size);

675if(next.compare_exchange_weak(curr_b, curr_b + box.size(),

676 std::memory_order_relaxed,

677 std::memory_order_relaxed)) {

678if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

679if(func(box)) {

680return;

681 }

682 } else {

683 func(box);

684 }

685 curr_b += box.size();

686 }

687 }

688 }

689 }

690

691};

692

693// ----------------------------------------------------------------------------

694// RandomPartitioner

695// ----------------------------------------------------------------------------

696

736template <typename C = DefaultClosureWrapper>

737class RandomPartitioner : public PartitionerBase<C> {

738

739public:

740

744static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }

745

749RandomPartitioner() = default;

750

754explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}

755

759explicit RandomPartitioner(size_t sz, C&& closure) :

760PartitionerBase<C>(sz, std::forward<C>(closure)) {

761 }

762

766RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}

767

771RandomPartitioner(float alpha, float beta, C&& closure) :

772 _alpha {alpha}, _beta {beta},

773PartitionerBase<C>(0, std::forward<C>(closure)) {

774 }

775

779float alpha() const { return _alpha; }

780

784float beta() const { return _beta; }

785

792 std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {

793

794size_t b1 = static_cast<size_t>(_alpha * N * W);

795size_t b2 = static_cast<size_t>(_beta * N * W);

796

797if(b1 > b2) {

798 std::swap(b1, b2);

799 }

800

801 b1 = (std::max)(b1, size_t{1});

802 b2 = (std::max)(b2, b1 + 1);

803

804return {b1, b2};

805 }

806

807// --------------------------------------------------------------------------

808// scheduling methods

809// --------------------------------------------------------------------------

810

814template <typename F>

815void loop(

816size_t N, size_t W, std::atomic<size_t>& next, F&& func

817 ) const {

818

819auto [b1, b2] = chunk_size_range(N, W);

820

821 std::default_random_engine engine {std::random_device{}()};

822 std::uniform_int_distribution<size_t> dist(b1, b2);

823

824size_t chunk_size = dist(engine);

825size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

826

827while(curr_b < N) {

828if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

829if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {

830return;

831 }

832 } else {

833 func(curr_b, (std::min)(curr_b + chunk_size, N));

834 }

835chunk_size = dist(engine);

836 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

837 }

838 }

839

843template <IndexRangesLike R, typename F>

844void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {

845

846auto [b1, b2] = chunk_size_range(N, W);

847

848 std::default_random_engine engine{std::random_device{}()};

849 std::uniform_int_distribution<size_t> dist(b1, b2);

850

851size_t curr_b = next.load(std::memory_order_relaxed);

852

853while(curr_b < N) {

854if constexpr (R::rank == 1) {

855size_t curr_e = (std::min)(curr_b + dist(engine), N);

856if(next.compare_exchange_weak(curr_b, curr_e,

857 std::memory_order_relaxed,

858 std::memory_order_relaxed)) {

859if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

860if(func(range.unravel(curr_b, curr_e))) {

861return;

862 }

863 } else {

864 func(range.unravel(curr_b, curr_e));

865 }

866 curr_b = curr_e;

867 }

868 } else {

869auto box = range.upper_slice(curr_b, dist(engine));

870if(next.compare_exchange_weak(curr_b, curr_b + box.size(),

871 std::memory_order_relaxed,

872 std::memory_order_relaxed)) {

873if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

874if(func(box)) {

875return;

876 }

877 } else {

878 func(box);

879 }

880 curr_b += box.size();

881 }

882 }

883 }

884 }

885

886private:

887

888float _alpha {0.01f};

889float _beta {0.50f};

890};

891

898using DefaultPartitioner = GuidedPartitioner<>;

899

905template <typename P>

906concept PartitionerLike = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;

907

915template <typename P>

916inline constexpr bool is_partitioner_v = PartitionerLike<P>;

917

918} // end of namespace tf -----------------------------------------------------

tf::DefaultClosureWrapper

class to create a default closure wrapper

Definition partitioner.hpp:51

tf::DynamicPartitioner::DynamicPartitioner

DynamicPartitioner()=default

default constructor

tf::DynamicPartitioner::DynamicPartitioner

DynamicPartitioner(size_t sz, C &&closure)

construct a dynamic partitioner with the given chunk size and the closure

Definition partitioner.hpp:619

tf::DynamicPartitioner::type

static constexpr PartitionerType type()

queries the partition type (dynamic)

Definition partitioner.hpp:604

tf::DynamicPartitioner::DynamicPartitioner

DynamicPartitioner(size_t sz)

construct a dynamic partitioner with the given chunk size

Definition partitioner.hpp:614

tf::GuidedPartitioner

class to create a guided partitioner for scheduling parallel algorithms

Definition partitioner.hpp:417

tf::GuidedPartitioner::GuidedPartitioner

GuidedPartitioner(size_t sz, C &&closure)

construct a guided partitioner with the given chunk size and the closure

Definition partitioner.hpp:440

tf::GuidedPartitioner::GuidedPartitioner

GuidedPartitioner(size_t sz)

construct a guided partitioner with the given chunk size

Definition partitioner.hpp:435

tf::GuidedPartitioner::GuidedPartitioner

GuidedPartitioner()=default

default constructor

tf::GuidedPartitioner::type

static constexpr PartitionerType type()

queries the partition type (dynamic)

Definition partitioner.hpp:424

tf::PartitionerBase::PartitionerBase

PartitionerBase(size_t chunk_size)

construct a partitioner with the given chunk size

Definition partitioner.hpp:147

tf::PartitionerBase::is_default_wrapper_v

static constexpr bool is_default_wrapper_v

indicating if the given closure wrapper is a default wrapper (i.e., empty)

Definition partitioner.hpp:132

tf::PartitionerBase::closure_wrapper_type

C closure_wrapper_type

the closure type

Definition partitioner.hpp:137

tf::PartitionerBase::chunk_size

void chunk_size(size_t cz)

update the chunk size of this partitioner

Definition partitioner.hpp:165

tf::PartitionerBase::closure_wrapper

const C & closure_wrapper() const

acquire an immutable access to the closure wrapper object

Definition partitioner.hpp:170

tf::PartitionerBase::closure_wrapper

void closure_wrapper(F &&fn)

modify the closure wrapper object

Definition partitioner.hpp:181

tf::PartitionerBase::PartitionerBase

PartitionerBase(size_t chunk_size, C &&closure_wrapper)

construct a partitioner with the given chunk size and closure wrapper

Definition partitioner.hpp:152

tf::PartitionerBase::closure_wrapper

C & closure_wrapper()

acquire a mutable access to the closure wrapper object

Definition partitioner.hpp:175

tf::PartitionerBase::PartitionerBase

PartitionerBase()=default

default constructor

tf::PartitionerBase::chunk_size

size_t chunk_size() const

query the chunk size of this partitioner

Definition partitioner.hpp:160

tf::RandomPartitioner::RandomPartitioner

RandomPartitioner(size_t sz, C &&closure)

construct a random partitioner with the given chunk size and the closure

Definition partitioner.hpp:759

tf::RandomPartitioner::RandomPartitioner

RandomPartitioner(float alpha, float beta, C &&closure)

constructs a random partitioner with the given parameters and the closure

Definition partitioner.hpp:771

tf::RandomPartitioner::chunk_size_range

std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const

queries the range of chunk size

Definition partitioner.hpp:792

tf::RandomPartitioner::RandomPartitioner

RandomPartitioner(float alpha, float beta)

constructs a random partitioner with the given parameters

Definition partitioner.hpp:766

tf::RandomPartitioner::RandomPartitioner

RandomPartitioner()=default

default constructor

tf::RandomPartitioner::type

static constexpr PartitionerType type()

queries the partition type (dynamic)

Definition partitioner.hpp:744

tf::RandomPartitioner::alpha

float alpha() const

queries the alpha value

Definition partitioner.hpp:779

tf::RandomPartitioner::RandomPartitioner

RandomPartitioner(size_t sz)

construct a dynamic partitioner with the given chunk size

Definition partitioner.hpp:754

tf::RandomPartitioner::beta

float beta() const

queries the beta value

Definition partitioner.hpp:784

tf::StaticPartitioner::StaticPartitioner

StaticPartitioner()=default

default constructor

tf::StaticPartitioner::adjusted_chunk_size

size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const

queries the adjusted chunk size

Definition partitioner.hpp:295

tf::StaticPartitioner::StaticPartitioner

StaticPartitioner(size_t sz)

construct a static partitioner with the given chunk size

Definition partitioner.hpp:279

tf::StaticPartitioner::StaticPartitioner

StaticPartitioner(size_t sz, C &&closure)

construct a static partitioner with the given chunk size and the closure

Definition partitioner.hpp:284

tf::StaticPartitioner::type

static constexpr PartitionerType type()

queries the partition type (static)

Definition partitioner.hpp:269

tf::PartitionerLike

determines if a type is a partitioner

Definition partitioner.hpp:906

tf

taskflow namespace

Definition small_vector.hpp:20

tf::TaskType::STATIC

@ STATIC

static task type

Definition task.hpp:25

tf::PartitionerType

PartitionerType

enumeration of all partitioner types

Definition partitioner.hpp:19

tf::PartitionerType::DYNAMIC

@ DYNAMIC

dynamic partitioner type

Definition partitioner.hpp:23

tf::PartitionerType::STATIC

@ STATIC

static partitioner type

Definition partitioner.hpp:21

tf::is_partitioner_v

constexpr bool is_partitioner_v

determines if a type is a partitioner (variable template)

Definition partitioner.hpp:916

tf::DefaultPartitioner

GuidedPartitioner<> DefaultPartitioner

default partitioner set to tf::GuidedPartitioner

Definition partitioner.hpp:898