forked from OctaForge/libostd
1187 lines
36 KiB
C++
1187 lines
36 KiB
C++
/** @defgroup Concurrency
|
|
*
|
|
* @brief Concurrent/parallel task execution support and related APIs.
|
|
*
|
|
* libostd provides an elaborate concurrency system that covers multiple
|
|
* schedulers with different characteristics as well as different ways to
|
|
* pass data between tasks.
|
|
*
|
|
* It implements 1:1 (tasks are OS threads), N:1 (tasks are lightweight
|
|
* threads running on a single thread) and M:N (tasks are lightweight
|
|
* threads running on a fixed number of OS threads) scheduling approaches.
|
|
*
|
|
* The system is flexible and extensible; it can be used as a base for higher
|
|
* level systems, and its individual components are independently usable.
|
|
*
|
|
* Typical usage of the concurrency system is as follows:
|
|
*
|
|
* @include concurrency.cc
|
|
*
|
|
* See the examples provided with the library for further information.
|
|
*
|
|
* It also implements all sorts of utilities for dealing with parallel
|
|
* programs and synchronization, including a thread pool and other facilities.
|
|
*
|
|
* @{
|
|
*/
|
|
|
|
/** @file concurrency.hh
|
|
*
|
|
* @brief Different concurrent schedulers and related APIs.
|
|
*
|
|
* This file implements several schedulers as well as APIs to spawn
|
|
* tasks, coroutines and channels that utilize the current scheduler's API.
|
|
*
|
|
* @include concurrency.cc
|
|
*
|
|
* @copyright See COPYING.md in the project tree for further information.
|
|
*/
|
|
|
|
#ifndef OSTD_CONCURRENCY_HH
|
|
#define OSTD_CONCURRENCY_HH
|
|
|
|
#include <cstddef>
|
|
#include <vector>
|
|
#include <list>
|
|
#include <thread>
|
|
#include <utility>
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
#include <exception>
|
|
#include <type_traits>
|
|
|
|
#include <ostd/platform.hh>
|
|
#include <ostd/coroutine.hh>
|
|
#include <ostd/channel.hh>
|
|
#include <ostd/generic_condvar.hh>
|
|
|
|
namespace ostd {
|
|
|
|
/** @addtogroup Concurrency
|
|
* @{
|
|
*/
|
|
|
|
struct scheduler;
|
|
|
|
namespace detail {
|
|
template<typename T>
|
|
struct tid_impl {
|
|
tid_impl() = delete;
|
|
|
|
template<typename F>
|
|
tid_impl(F &func): p_lock(), p_eptr(), p_cond(func()) {}
|
|
|
|
T get() {
|
|
std::unique_lock<std::mutex> l{p_lock};
|
|
while (!p_stor) {
|
|
p_cond.wait(l);
|
|
}
|
|
if (p_eptr) {
|
|
std::rethrow_exception(std::exchange(p_eptr, nullptr));
|
|
}
|
|
auto ret = std::move(p_stor);
|
|
if constexpr(!std::is_same_v<T, void>) {
|
|
if constexpr(std::is_lvalue_reference_v<T>) {
|
|
return **ret;
|
|
} else {
|
|
return std::move(*ret);
|
|
}
|
|
}
|
|
}
|
|
|
|
void wait() {
|
|
std::unique_lock<std::mutex> l{p_lock};
|
|
while (!p_stor) {
|
|
p_cond.wait(l);
|
|
}
|
|
}
|
|
|
|
template<typename F>
|
|
void set_value(F &func) {
|
|
{
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
try {
|
|
if constexpr(std::is_same_v<T, void>) {
|
|
func();
|
|
p_stor = true;
|
|
} else {
|
|
if constexpr(std::is_lvalue_reference_v<T>) {
|
|
p_stor = &func();
|
|
} else {
|
|
p_stor = std::move(func());
|
|
}
|
|
}
|
|
} catch (...) {
|
|
p_eptr = std::current_exception();
|
|
}
|
|
}
|
|
p_cond.notify_one();
|
|
}
|
|
|
|
private:
|
|
using storage = std::conditional_t<
|
|
std::is_same_v<T, void>,
|
|
bool,
|
|
std::optional<std::conditional_t<
|
|
std::is_lvalue_reference_v<T>,
|
|
std::remove_reference_t<T> *,
|
|
std::decay_t<T>
|
|
>>
|
|
>;
|
|
|
|
mutable std::mutex p_lock;
|
|
mutable std::exception_ptr p_eptr;
|
|
generic_condvar p_cond;
|
|
storage p_stor = storage{};
|
|
};
|
|
}
|
|
|
|
/** @brief An object that defines a task.
|
|
*
|
|
* This is returned by ostd::spawn() and obviously scheduler::spawn() It
|
|
* represents a spawned task, allowing you to wait for the task to be done
|
|
* as well as retrieve its return value. It also allows sane cross-task
|
|
* exception handling, as any exception is saved inside the internal
|
|
* state and propagated on get().
|
|
*
|
|
* Tids are move constructible and assignable, but not copyable. They also
|
|
* cannot be constructed normally, only returned from the appropriate spawn
|
|
* function. The internal state stays alive for as long as necessary (always
|
|
* at least until the associated task finishes, but it can remain alive after
|
|
* that to either get the return value or propagate an exception).
|
|
*
|
|
* The `T` template parameter is the type of the result. It can be `void`,
|
|
* in which case get() returns nothing, but can still propagate exceptions.
|
|
*/
|
|
template<typename T>
|
|
struct tid {
|
|
friend struct scheduler;
|
|
|
|
tid() = delete;
|
|
tid(tid const &) = delete;
|
|
tid &operator=(tid const &) = delete;
|
|
|
|
tid(tid &&) = default;
|
|
tid &operator=(tid &&) = default;
|
|
|
|
/** @brief Waits for the result and returns it.
|
|
*
|
|
* If `T` is void, this does not return. If an exception was thrown by
|
|
* the task, it will be rethrown here, allowing sane exception handling
|
|
* between different tasks and possibly threads.
|
|
*
|
|
* Effectively calls wait() before returning or throwing. After this
|
|
* call is done, valid() will no longer be true. It is undefined to
|
|
* call this when valid() is not true.
|
|
*/
|
|
T get() {
|
|
auto p = std::move(p_state);
|
|
return p->get();
|
|
}
|
|
|
|
/** @brief Checks if this `tid` points to a valid shared state. */
|
|
bool valid() const {
|
|
return p_state;
|
|
}
|
|
|
|
/** @brief Waits for the associated task to finish.
|
|
*
|
|
* The behavior is undefined when valid() is not true.
|
|
*/
|
|
void wait() {
|
|
p_state->wait();
|
|
}
|
|
|
|
private:
|
|
template<typename F>
|
|
tid(F func): p_state(new detail::tid_impl<T>{func}) {}
|
|
|
|
std::shared_ptr<detail::tid_impl<T>> p_state;
|
|
};
|
|
|
|
/** @brief A base interface for any scheduler.
|
|
*
|
|
* All schedulers derive from this. Its core interface is defined using
|
|
* pure virtual functions, which the schedulers are supposed to implement.
|
|
*
|
|
* Every scheduler its supposed to implement its own method `start`, which
|
|
* will take a function, arguments and will return any value returned from
|
|
* the given function. The given function will be used as the very first
|
|
* task (the main task) which typically replaces your `main` function.
|
|
*
|
|
* The `start` method will also set the internal current scheduler pointer
|
|
* so that APIs such as ostd::spawn() can work.
|
|
*/
|
|
struct scheduler {
|
|
private:
|
|
struct stack_allocator {
|
|
stack_allocator() = delete;
|
|
stack_allocator(scheduler &s) noexcept: p_sched(&s) {}
|
|
|
|
stack_context allocate() {
|
|
return p_sched->allocate_stack();
|
|
}
|
|
|
|
void deaallocate(stack_context &st) noexcept {
|
|
p_sched->deallocate_stack(st);
|
|
}
|
|
|
|
private:
|
|
scheduler *p_sched;
|
|
};
|
|
|
|
protected:
|
|
/** @brief Does nothing, this base class is empty. */
|
|
scheduler() {}
|
|
|
|
public:
|
|
/** @brief Does nothing, this base class is empty. */
|
|
virtual ~scheduler();
|
|
|
|
scheduler(scheduler const &) = delete;
|
|
scheduler(scheduler &&) = delete;
|
|
scheduler &operator=(scheduler const &) = delete;
|
|
scheduler &operator=(scheduler &&) = delete;
|
|
|
|
/** @brief Spawns a task.
|
|
*
|
|
* Spawns a task and schedules it for execution. This is a low level
|
|
* interface function. Typically you will want ostd::spawn().
|
|
* The detailed behavior of the function is completely scheduler dependent.
|
|
*
|
|
* @see ostd::spawn(), spawn()
|
|
*/
|
|
virtual void do_spawn(std::function<void()>) = 0;
|
|
|
|
/** @brief Tells the scheduler to re-schedule the current task.
|
|
*
|
|
* In ostd::thread_scheduler, this is just a hint, as it uses OS threading
|
|
* facilities. In coroutine based schedulers, this will typically suspend
|
|
* the currently running task and re-schedule it for later.
|
|
*
|
|
* @see ostd::yield()
|
|
*/
|
|
virtual void yield() noexcept = 0;
|
|
|
|
/** @brief Creates a condition variable using ostd::generic_condvar.
|
|
*
|
|
* A scheduler might be using a custom condition variable type depending
|
|
* on how its tasks are implemented. Other data structures might want to
|
|
* use these condition variables to synchronize (see make_channel() for
|
|
* an example).
|
|
*
|
|
* @see make_channel(), make_coroutine(), make_generator()
|
|
*/
|
|
virtual generic_condvar make_condition() = 0;
|
|
|
|
/** @brief Allocates a stack suitable for a coroutine.
|
|
*
|
|
* If the scheduler uses coroutine based tasks, this allows us to
|
|
* create coroutines and generators that use the same stacks as tasks.
|
|
* This has benefits particularly when a pooled stack allocator is in
|
|
* use for the tasks.
|
|
*
|
|
* Using get_stack_allocator() you can create an actual stack allocator
|
|
* usable with coroutines that uses this set of methods.
|
|
*
|
|
* @see deallocate_stack(), reserve_stacks(), get_stack_allocator()
|
|
*/
|
|
virtual stack_context allocate_stack() = 0;
|
|
|
|
/** @brief Deallocates a stack allocated with allocate_stack().
|
|
*
|
|
* @see allocate_stack(), reserve_stacks(), get_stack_allocator()
|
|
*/
|
|
virtual void deallocate_stack(stack_context &st) noexcept = 0;
|
|
|
|
/** @brief Reserves at least `n` stacks.
|
|
*
|
|
* If the stack allocator used in the scheduler is pooled, this will
|
|
* reserve the given number of stacks (or more). It can, however, be
|
|
* a no-op if the allocator is not pooled.
|
|
*
|
|
* @see allocate_stack(), deallocate_stack(), get_stack_allocator()
|
|
*/
|
|
virtual void reserve_stacks(std::size_t n) = 0;
|
|
|
|
/** @brief Gets a stack allocator using the scheduler's stack allocation.
|
|
*
|
|
* The stack allocator will use allocate_stack() and deallocate_stack()
|
|
* to perform the alloaction and deallocation.
|
|
*/
|
|
stack_allocator get_stack_allocator() noexcept {
|
|
return stack_allocator{*this};
|
|
}
|
|
|
|
/** @brief Spawns a task using any callable and a set of arguments.
|
|
*
|
|
* Just like do_spawn(), but works for any callable and accepts arguments.
|
|
* If any arguments are provided, they're bound to the callable first.
|
|
* Then the result is converted to the right type for do_spawn() and passed.
|
|
*
|
|
* You can use this to spawn a task directly on a scheduler. However,
|
|
* typically you will not want to pass the scheduler around and instead
|
|
* use the generic ostd::spawn(), which works on any scheduler.
|
|
*
|
|
* @see do_spawn(), ostd::spawn()
|
|
*/
|
|
template<typename F, typename ...A>
|
|
tid<std::result_of_t<F(A...)>> spawn(F func, A &&...args) {
|
|
tid<std::result_of_t<F(A...)>> t{[this]() {
|
|
return make_condition();
|
|
}};
|
|
/* private shared state reference */
|
|
auto st = t.p_state;
|
|
if constexpr(sizeof...(A) == 0) {
|
|
do_spawn([lfunc = std::move(func), lst = std::move(st)]() {
|
|
lst->set_value(lfunc);
|
|
});
|
|
} else {
|
|
do_spawn([lfunc = std::bind(
|
|
std::move(func), std::forward<A>(args)...
|
|
), lst = std::move(st)]() {
|
|
lst->set_value(lfunc);
|
|
});
|
|
}
|
|
return t;
|
|
}
|
|
|
|
/** @brief Creates a channel suitable for the scheduler.
|
|
*
|
|
* Returns a channel that uses a condition variable type returned by
|
|
* make_condition(). You can use this to create a channel directly
|
|
* with the scheduler. However, typically you will not want to pass
|
|
* it around, so ostd::make_channel() is a more convenient choice.
|
|
*
|
|
* @tparam T The type of the channel value.
|
|
*
|
|
* @see ostd::make_channel()
|
|
*/
|
|
template<typename T>
|
|
channel<T> make_channel() {
|
|
return channel<T>{[this]() {
|
|
return make_condition();
|
|
}};
|
|
}
|
|
|
|
/** @brief Creates a coroutine using the scheduler's stack allocator.
|
|
*
|
|
* Using ostd::make_coroutine() will do the same thing, but without
|
|
* the need to explicitly pass the scheduler around.
|
|
*
|
|
* @tparam T The type passed to the coroutine, `Result(Args...)`.
|
|
*
|
|
* @see make_generator(), ostd::make_coroutine()
|
|
*/
|
|
template<typename T, typename F>
|
|
coroutine<T> make_coroutine(F &&func) {
|
|
return coroutine<T>{std::forward<F>(func), get_stack_allocator()};
|
|
}
|
|
|
|
/** @brief Creates a generator using the scheduler's stack allocator.
|
|
*
|
|
* Using ostd::make_generator() will do the same thing, but without
|
|
* the need to explicitly pass the scheduler around.
|
|
*
|
|
* @tparam T The value type of the generator.
|
|
*
|
|
* @see make_coroutine(), ostd::make_generator()
|
|
*/
|
|
template<typename T, typename F>
|
|
generator<T> make_generator(F &&func) {
|
|
return generator<T>{std::forward<F>(func), get_stack_allocator()};
|
|
}
|
|
};
|
|
|
|
namespace detail {
|
|
OSTD_EXPORT extern scheduler *current_scheduler;
|
|
|
|
struct current_scheduler_owner {
|
|
current_scheduler_owner() = delete;
|
|
|
|
template<typename S>
|
|
current_scheduler_owner(S &sched) {
|
|
if (current_scheduler) {
|
|
throw std::logic_error{"another scheduler already running"};
|
|
}
|
|
current_scheduler = &sched;
|
|
}
|
|
|
|
current_scheduler_owner(current_scheduler_owner const &) = delete;
|
|
current_scheduler_owner(current_scheduler_owner &&) = delete;
|
|
current_scheduler_owner &operator=(current_scheduler_owner const &) = delete;
|
|
current_scheduler_owner &operator=(current_scheduler_owner &&) = delete;
|
|
|
|
~current_scheduler_owner() {
|
|
current_scheduler = nullptr;
|
|
}
|
|
};
|
|
}
|
|
|
|
/** @brief A scheduler that uses an `std::thread` per each task.
|
|
*
|
|
* This one doesn't actually do any scheduling, it leaves it to the OS.
|
|
* Effectively this implements a 1:1 model.
|
|
*
|
|
* @tparam SA The stack allocator to use when requesting stacks. It's not
|
|
* actually used anywhere else, as thread stacks are managed by the OS.
|
|
* Can be a stack pool and only has to be move constructible.
|
|
*/
|
|
template<typename SA>
|
|
struct basic_thread_scheduler: scheduler {
|
|
/** @brief The stack allocator type. */
|
|
using allocator_type = SA;
|
|
|
|
/** @brief Creates the scheduler.
|
|
*
|
|
* @param[in] sa The provided stack allocator.
|
|
*/
|
|
basic_thread_scheduler(SA &&sa = SA{}): p_stacks(std::move(sa)) {}
|
|
|
|
/** @brief Starts the scheduler given a set of arguments.
|
|
*
|
|
* Sets the internal current scheduler pointer to this scheduler and
|
|
* calls the given function. As it doesn't do any scheduling, it really
|
|
* just calls. Then it waits for all threads (tasks) it spawned to finish
|
|
* and returns the value returned from the given function, if any.
|
|
*
|
|
* @returns The result of `func`.
|
|
*/
|
|
template<typename F, typename ...A>
|
|
auto start(F func, A &&...args) -> std::result_of_t<F(A...)> {
|
|
detail::current_scheduler_owner iface{*this};
|
|
if constexpr(std::is_same_v<std::result_of_t<F(A...)>, void>) {
|
|
func(std::forward<A>(args)...);
|
|
join_all();
|
|
} else {
|
|
auto ret = func(std::forward<A>(args)...);
|
|
join_all();
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
void do_spawn(std::function<void()> func) {
|
|
{
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
p_threads.emplace_front();
|
|
auto it = p_threads.begin();
|
|
*it = std::thread{[this, it, lfunc = std::move(func)]() {
|
|
lfunc();
|
|
remove_thread(it);
|
|
}};
|
|
}
|
|
yield();
|
|
}
|
|
|
|
void yield() noexcept {
|
|
std::this_thread::yield();
|
|
}
|
|
|
|
generic_condvar make_condition() {
|
|
return generic_condvar{};
|
|
}
|
|
|
|
stack_context allocate_stack() {
|
|
if constexpr(!SA::is_thread_safe) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
return p_stacks.allocate();
|
|
} else {
|
|
return p_stacks.allocate();
|
|
}
|
|
}
|
|
|
|
void deallocate_stack(stack_context &st) noexcept {
|
|
if constexpr(!SA::is_thread_safe) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
p_stacks.deallocate(st);
|
|
} else {
|
|
p_stacks.deallocate();
|
|
}
|
|
}
|
|
|
|
void reserve_stacks(std::size_t n) {
|
|
if constexpr(!SA::is_thread_safe) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
p_stacks.reserve(n);
|
|
} else {
|
|
p_stacks.reserve(n);
|
|
}
|
|
}
|
|
|
|
private:
|
|
void remove_thread(typename std::list<std::thread>::iterator it) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
std::thread t{std::exchange(p_dead, std::move(*it))};
|
|
if (t.joinable()) {
|
|
t.join();
|
|
}
|
|
p_threads.erase(it);
|
|
}
|
|
|
|
void join_all() {
|
|
/* wait for all threads to finish */
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
if (p_dead.joinable()) {
|
|
p_dead.join();
|
|
}
|
|
for (auto &t: p_threads) {
|
|
t.join();
|
|
p_threads.pop_front();
|
|
}
|
|
}
|
|
|
|
SA p_stacks;
|
|
std::list<std::thread> p_threads;
|
|
std::thread p_dead;
|
|
std::mutex p_lock;
|
|
};
|
|
|
|
/** @brief An ostd::basic_thread_scheduler using ostd::stack_pool. */
|
|
using thread_scheduler = basic_thread_scheduler<stack_pool>;
|
|
|
|
namespace detail {
|
|
struct csched_task;
|
|
|
|
OSTD_EXPORT extern thread_local csched_task *current_csched_task;
|
|
|
|
struct csched_task: coroutine_context {
|
|
friend struct coroutine_context;
|
|
|
|
csched_task() = delete;
|
|
csched_task(csched_task const &) = delete;
|
|
csched_task(csched_task &&) = delete;
|
|
csched_task &operator=(csched_task const &) = delete;
|
|
csched_task &operator=(csched_task &&) = delete;
|
|
|
|
/* empty, for vtable placement */
|
|
virtual ~csched_task();
|
|
|
|
template<typename F, typename SA>
|
|
csched_task(F &&f, SA &&sa): p_func(std::forward<F>(f)) {
|
|
if (!p_func) {
|
|
this->set_dead();
|
|
return;
|
|
}
|
|
this->make_context<csched_task>(sa);
|
|
}
|
|
|
|
void operator()() {
|
|
this->set_exec();
|
|
csched_task *curr = std::exchange(current_csched_task, this);
|
|
this->coro_jump();
|
|
current_csched_task = curr;
|
|
this->rethrow();
|
|
}
|
|
|
|
void yield() noexcept {
|
|
/* we'll yield back to the thread we were scheduled to, which
|
|
* will appropriately notify one or all other waiting threads
|
|
* so we either get re-scheduled or the whole scheduler dies
|
|
*/
|
|
this->yield_jump();
|
|
}
|
|
|
|
bool dead() const noexcept {
|
|
return this->is_dead();
|
|
}
|
|
|
|
static csched_task *current() noexcept {
|
|
return current_csched_task;
|
|
}
|
|
|
|
private:
|
|
void resume_call() {
|
|
p_func();
|
|
}
|
|
|
|
std::function<void()> p_func;
|
|
};
|
|
}
|
|
|
|
/** @brief A scheduler that uses a coroutine type for tasks on a single thread.
|
|
*
|
|
* Effectively implements the N:1 model. Runs on a single thread, so it doesn't
|
|
* make any use of multicore systems. The tasks bypass the
|
|
* coroutine_context::current() method, so they're completely hidden from the
|
|
* outside code. This also has several advantages for code using coroutines.
|
|
*
|
|
* @tparam SA The stack allocator to use when requesting stacks. Used for
|
|
* the tasks as well as for the stack request methods.
|
|
*/
|
|
template<typename SA>
|
|
struct basic_simple_coroutine_scheduler: scheduler {
|
|
/** @brief The stack allocator type. */
|
|
using allocator_type = SA;
|
|
|
|
private:
|
|
/* simple one just for channels */
|
|
struct coro_cond {
|
|
coro_cond() = delete;
|
|
coro_cond(coro_cond const &) = delete;
|
|
coro_cond(coro_cond &&) = delete;
|
|
coro_cond &operator=(coro_cond const &) = delete;
|
|
coro_cond &operator=(coro_cond &&) = delete;
|
|
|
|
coro_cond(basic_simple_coroutine_scheduler &s): p_sched(s) {}
|
|
|
|
template<typename L>
|
|
void wait(L &l) noexcept {
|
|
l.unlock();
|
|
while (!p_notified) {
|
|
p_sched.yield();
|
|
}
|
|
p_notified = false;
|
|
l.lock();
|
|
}
|
|
|
|
void notify_one() noexcept {
|
|
p_notified = true;
|
|
p_sched.yield();
|
|
}
|
|
|
|
void notify_all() noexcept {
|
|
p_notified = true;
|
|
p_sched.yield();
|
|
}
|
|
private:
|
|
basic_simple_coroutine_scheduler &p_sched;
|
|
bool p_notified = false;
|
|
};
|
|
|
|
public:
|
|
/* @brief Creates the scheduler.
|
|
*
|
|
* @param[in] sa The provided stack allocator.
|
|
*/
|
|
basic_simple_coroutine_scheduler(SA &&sa = SA{}):
|
|
p_stacks(std::move(sa))
|
|
{}
|
|
|
|
/** @brief Starts the scheduler given a set of arguments.
|
|
*
|
|
* Sets the internal current scheduler pointer to this scheduler creates
|
|
* the main task using the separate provided stack allocator. This is
|
|
* useful because the task stacks tend to be rather small and we need
|
|
* a much bigger stack for the main task.
|
|
*
|
|
* After creating the task, starts the dispatcher on the thread. Returns
|
|
* the return value of the provided main task function once it finishes.
|
|
*
|
|
* @returns The result of `func`.
|
|
*/
|
|
template<typename TSA, typename F, typename ...A>
|
|
auto start(std::allocator_arg_t, TSA &&sa, F func, A &&...args)
|
|
-> std::result_of_t<F(A...)>
|
|
{
|
|
detail::current_scheduler_owner iface{*this};
|
|
|
|
using R = std::result_of_t<F(A...)>;
|
|
|
|
if constexpr(std::is_same_v<R, void>) {
|
|
if constexpr(sizeof...(A) == 0) {
|
|
p_coros.emplace_back(std::move(func), std::forward<TSA>(sa));
|
|
} else {
|
|
p_coros.emplace_back(std::bind(
|
|
std::move(func), std::forward<A>(args)...
|
|
), std::forward<TSA>(sa));
|
|
}
|
|
dispatch();
|
|
} else {
|
|
R ret;
|
|
if constexpr(sizeof...(A) == 0) {
|
|
p_coros.emplace_back([&ret, lfunc = std::move(func)] {
|
|
ret = lfunc();
|
|
}, std::forward<TSA>(sa));
|
|
} else {
|
|
p_coros.emplace_back([&ret, lfunc = std::bind(
|
|
std::move(func), std::forward<A>(args)...
|
|
)]() {
|
|
ret = lfunc();
|
|
}, std::forward<TSA>(sa));
|
|
}
|
|
dispatch();
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/** @brief Starts the scheduler given a set of arguments.
|
|
*
|
|
* Like start() but uses a fixed size stack that has the same size as
|
|
* the main thread stack.
|
|
*
|
|
* The stack traits type is inherited from `SA`.
|
|
*
|
|
* @returns The result of `func`.
|
|
*/
|
|
template<typename F, typename ...A>
|
|
auto start(F func, A &&...args) -> std::result_of_t<F(A...)> {
|
|
basic_fixedsize_stack<typename SA::traits_type, false> sa{
|
|
detail::stack_main_size()
|
|
};
|
|
return start(
|
|
std::allocator_arg, sa, std::move(func), std::forward<A>(args)...
|
|
);
|
|
}
|
|
|
|
void do_spawn(std::function<void()> func) {
|
|
p_coros.emplace_back(std::move(func), p_stacks.get_allocator());
|
|
yield();
|
|
}
|
|
|
|
void yield() noexcept {
|
|
detail::csched_task::current()->yield();
|
|
}
|
|
|
|
generic_condvar make_condition() {
|
|
return generic_condvar{[this]() {
|
|
return coro_cond{*this};
|
|
}};
|
|
}
|
|
|
|
stack_context allocate_stack() {
|
|
return p_stacks.allocate();
|
|
}
|
|
|
|
void deallocate_stack(stack_context &st) noexcept {
|
|
p_stacks.deallocate(st);
|
|
}
|
|
|
|
void reserve_stacks(std::size_t n) {
|
|
p_stacks.reserve(n);
|
|
}
|
|
|
|
private:
|
|
void dispatch() {
|
|
while (!p_coros.empty()) {
|
|
if (p_idx == p_coros.end()) {
|
|
p_idx = p_coros.begin();
|
|
}
|
|
(*p_idx)();
|
|
if (p_idx->dead()) {
|
|
p_idx = p_coros.erase(p_idx);
|
|
} else {
|
|
++p_idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
SA p_stacks;
|
|
std::list<detail::csched_task> p_coros;
|
|
typename std::list<detail::csched_task>::iterator p_idx = p_coros.end();
|
|
};
|
|
|
|
/** @brief An ostd::basic_simple_coroutine_scheduler using ostd::stack_pool. */
|
|
using simple_coroutine_scheduler = basic_simple_coroutine_scheduler<stack_pool>;
|
|
|
|
/** @brief A scheduler that uses a coroutine type for tasks on several threads.
|
|
*
|
|
* Effectively implements the M:N model. Runs on several threads, typically as
|
|
* many as there are physical threads on your CPU(s), so it makes use of
|
|
* multicore systems. The tasks bypass the coroutine_context::current() method,
|
|
* so they're completely hidden from the outside code. This also has several
|
|
* advantages for code using coroutines.
|
|
*
|
|
* @tparam SA The stack allocator to use when requesting stacks. Used for
|
|
* the tasks as well as for the stack request methods.
|
|
*/
|
|
template<typename SA>
|
|
struct basic_coroutine_scheduler: scheduler {
|
|
/** @brief The stack allocator type. */
|
|
using allocator_type = SA;
|
|
|
|
private:
|
|
struct task_cond;
|
|
struct task;
|
|
|
|
using tlist = std::list<task>;
|
|
using titer = typename tlist::iterator;
|
|
|
|
struct task {
|
|
private:
|
|
detail::csched_task p_func;
|
|
|
|
public:
|
|
task_cond *waiting_on = nullptr;
|
|
task *next_waiting = nullptr;
|
|
titer pos;
|
|
|
|
template<typename F, typename TSA>
|
|
task(F &&f, TSA &&sa):
|
|
p_func(std::forward<F>(f), std::forward<TSA>(sa))
|
|
{}
|
|
|
|
void operator()() {
|
|
p_func();
|
|
}
|
|
|
|
void yield() noexcept {
|
|
p_func.yield();
|
|
}
|
|
|
|
bool dead() const noexcept {
|
|
return p_func.dead();
|
|
}
|
|
|
|
static task *current() noexcept {
|
|
return reinterpret_cast<task *>(detail::csched_task::current());
|
|
}
|
|
};
|
|
|
|
struct task_cond {
|
|
friend struct basic_coroutine_scheduler;
|
|
|
|
task_cond() = delete;
|
|
task_cond(task_cond const &) = delete;
|
|
task_cond(task_cond &&) = delete;
|
|
task_cond &operator=(task_cond const &) = delete;
|
|
task_cond &operator=(task_cond &&) = delete;
|
|
|
|
task_cond(basic_coroutine_scheduler &s): p_sched(s) {}
|
|
|
|
template<typename L>
|
|
void wait(L &l) noexcept {
|
|
/* lock until the task has been added to the wait queue,
|
|
* that ensures that any notify/notify_any has to wait
|
|
* until after the task has fully blocked... we can't
|
|
* use unique_lock or lock_guard because they're scoped
|
|
*/
|
|
p_sched.p_lock.lock();
|
|
l.unlock();
|
|
task *curr = task::current();
|
|
curr->waiting_on = this;
|
|
curr->yield();
|
|
l.lock();
|
|
}
|
|
|
|
void notify_one() noexcept {
|
|
p_sched.notify_one(p_waiting);
|
|
}
|
|
|
|
void notify_all() noexcept {
|
|
p_sched.notify_all(p_waiting);
|
|
}
|
|
private:
|
|
basic_coroutine_scheduler &p_sched;
|
|
task *p_waiting = nullptr;
|
|
};
|
|
|
|
public:
|
|
/* @brief Creates the scheduler.
|
|
*
|
|
* The number of threads defaults to the number of physical threads.
|
|
*
|
|
* @param[in] thrs The number of threads to use.
|
|
* @param[in] sa The provided stack allocator.
|
|
*/
|
|
basic_coroutine_scheduler(
|
|
std::size_t thrs = std::thread::hardware_concurrency(), SA &&sa = SA{}
|
|
):
|
|
p_threads(thrs), p_stacks(std::move(sa))
|
|
{}
|
|
|
|
~basic_coroutine_scheduler() {}
|
|
|
|
/** @brief Starts the scheduler given a set of arguments.
|
|
*
|
|
* Sets the internal current scheduler pointer to this scheduler creates
|
|
* the main task using the separate provided stack allocator. This is
|
|
* useful because the task stacks tend to be rather small and we need
|
|
* a much bigger stack for the main task.
|
|
*
|
|
* After creating the task, creates the requested number of threads and
|
|
* starts the dispatcher on each. Then it waits for all threads to finish
|
|
* and returns the return value of the provided main task function.
|
|
*
|
|
* @returns The result of `func`.
|
|
*/
|
|
template<typename TSA, typename F, typename ...A>
|
|
auto start(std::allocator_arg_t, TSA &&sa, F func, A &&...args)
|
|
-> std::result_of_t<F(A...)>
|
|
{
|
|
detail::current_scheduler_owner iface{*this};
|
|
|
|
/* start with one task in the queue, this way we can
|
|
* say we've finished when the task queue becomes empty
|
|
*/
|
|
using R = std::result_of_t<F(A...)>;
|
|
|
|
if constexpr(std::is_same_v<R, void>) {
|
|
spawn_add(
|
|
std::forward<TSA>(sa), std::move(func),
|
|
std::forward<A>(args)...
|
|
);
|
|
/* actually start the thread pool */
|
|
init();
|
|
} else {
|
|
R ret;
|
|
spawn_add(
|
|
std::forward<TSA>(sa),
|
|
[&ret, func = std::move(func)](auto &&...fargs) {
|
|
ret = func(std::forward<A>(fargs)...);
|
|
},
|
|
std::forward<A>(args)...
|
|
);
|
|
init();
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/** @brief Starts the scheduler given a set of arguments.
|
|
*
|
|
* Like start() but uses a fixed size stack that has the same size as
|
|
* the main thread stack.
|
|
*
|
|
* The stack traits type is inherited from `SA`.
|
|
*
|
|
* @returns The result of `func`.
|
|
*/
|
|
template<typename F, typename ...A>
|
|
auto start(F func, A &&...args) -> std::result_of_t<F(A...)> {
|
|
/* the default 64 KiB stack won't cut it for main, allocate a stack
|
|
* which matches the size of the process stack outside of the pool
|
|
*/
|
|
basic_fixedsize_stack<typename SA::traits_type, false> sa{
|
|
detail::stack_main_size()
|
|
};
|
|
return start(
|
|
std::allocator_arg, sa, std::move(func), std::forward<A>(args)...
|
|
);
|
|
}
|
|
|
|
void do_spawn(std::function<void()> func) {
|
|
{
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
spawn_add(p_stacks.get_allocator(), std::move(func));
|
|
}
|
|
p_cond.notify_one();
|
|
}
|
|
|
|
void yield() noexcept {
|
|
task::current()->yield();
|
|
}
|
|
|
|
generic_condvar make_condition() {
|
|
return generic_condvar{[this]() {
|
|
return task_cond{*this};
|
|
}};
|
|
}
|
|
|
|
stack_context allocate_stack() {
|
|
if constexpr(!SA::is_thread_safe) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
return p_stacks.allocate();
|
|
} else {
|
|
return p_stacks.allocate();
|
|
}
|
|
}
|
|
|
|
void deallocate_stack(stack_context &st) noexcept {
|
|
if constexpr(!SA::is_thread_safe) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
p_stacks.deallocate(st);
|
|
} else {
|
|
p_stacks.deallocate();
|
|
}
|
|
}
|
|
|
|
void reserve_stacks(std::size_t n) {
|
|
if constexpr(!SA::is_thread_safe) {
|
|
std::lock_guard<std::mutex> l{p_lock};
|
|
p_stacks.reserve(n);
|
|
} else {
|
|
p_stacks.reserve(n);
|
|
}
|
|
}
|
|
|
|
private:
|
|
template<typename TSA, typename F, typename ...A>
|
|
void spawn_add(TSA &&sa, F &&func, A &&...args) {
|
|
task *t = nullptr;
|
|
if constexpr(sizeof...(A) == 0) {
|
|
t = &p_available.emplace_back(
|
|
std::forward<F>(func),
|
|
std::forward<TSA>(sa)
|
|
);
|
|
} else {
|
|
t = &p_available.emplace_back(
|
|
[lfunc = std::bind(
|
|
std::forward<F>(func), std::forward<A>(args)...
|
|
)]() mutable {
|
|
lfunc();
|
|
},
|
|
std::forward<TSA>(sa)
|
|
);
|
|
}
|
|
t->pos = --p_available.end();
|
|
}
|
|
|
|
void init() {
|
|
std::size_t size = p_threads;
|
|
std::vector<std::thread> thrs;
|
|
thrs.reserve(size);
|
|
for (std::size_t i = 0; i < size; ++i) {
|
|
thrs.emplace_back([this]() { thread_run(); });
|
|
}
|
|
for (std::size_t i = 0; i < size; ++i) {
|
|
if (thrs[i].joinable()) {
|
|
thrs[i].join();
|
|
}
|
|
}
|
|
}
|
|
|
|
void notify_one(task *&wl) {
|
|
std::unique_lock<std::mutex> l{p_lock};
|
|
if (wl == nullptr) {
|
|
return;
|
|
}
|
|
wl->waiting_on = nullptr;
|
|
p_available.splice(p_available.cbegin(), p_waiting, wl->pos);
|
|
wl = std::exchange(wl->next_waiting, nullptr);
|
|
l.unlock();
|
|
p_cond.notify_one();
|
|
task::current()->yield();
|
|
}
|
|
|
|
void notify_all(task *&wl) {
|
|
{
|
|
std::unique_lock<std::mutex> l{p_lock};
|
|
while (wl != nullptr) {
|
|
wl->waiting_on = nullptr;
|
|
p_available.splice(p_available.cbegin(), p_waiting, wl->pos);
|
|
wl = std::exchange(wl->next_waiting, nullptr);
|
|
l.unlock();
|
|
p_cond.notify_one();
|
|
l.lock();
|
|
}
|
|
}
|
|
task::current()->yield();
|
|
}
|
|
|
|
void thread_run() {
|
|
for (;;) {
|
|
std::unique_lock<std::mutex> l{p_lock};
|
|
/* wait for an item to become available */
|
|
while (p_available.empty()) {
|
|
/* if all lists have become empty, we're done */
|
|
if (p_waiting.empty() && p_running.empty()) {
|
|
return;
|
|
}
|
|
p_cond.wait(l);
|
|
}
|
|
task_run(l);
|
|
}
|
|
}
|
|
|
|
void task_run(std::unique_lock<std::mutex> &l) {
|
|
auto it = p_available.begin();
|
|
p_running.splice(p_running.cend(), p_available, it);
|
|
task &c = *it;
|
|
l.unlock();
|
|
c();
|
|
if (c.dead()) {
|
|
l.lock();
|
|
p_running.erase(it);
|
|
/* we're dead, notify all threads so they can be joined
|
|
* we check all three, saves the other threads some re-waiting
|
|
* when a task or tasks are already running, and those that do
|
|
* will do the final notify by themselves
|
|
*/
|
|
if (p_available.empty() && p_waiting.empty() && p_running.empty()) {
|
|
l.unlock();
|
|
p_cond.notify_all();
|
|
}
|
|
} else if (!c.waiting_on) {
|
|
/* reschedule to the end of the queue */
|
|
l.lock();
|
|
p_available.splice(p_available.cend(), p_running, it);
|
|
l.unlock();
|
|
p_cond.notify_one();
|
|
} else {
|
|
p_waiting.splice(p_waiting.cbegin(), p_running, it);
|
|
c.next_waiting = c.waiting_on->p_waiting;
|
|
c.waiting_on->p_waiting = &c;
|
|
/* wait locks the mutex, so manually unlock it here */
|
|
p_lock.unlock();
|
|
}
|
|
}
|
|
|
|
std::size_t p_threads;
|
|
std::condition_variable p_cond;
|
|
std::mutex p_lock;
|
|
SA p_stacks;
|
|
tlist p_available;
|
|
tlist p_waiting;
|
|
tlist p_running;
|
|
};
|
|
|
|
/** @brief An ostd::basic_coroutine_scheduler using ostd::stack_pool. */
|
|
using coroutine_scheduler = basic_coroutine_scheduler<stack_pool>;
|
|
|
|
/** @brief Spawns a task on the currently in use scheduler.
|
|
*
|
|
* The arguments are passed to the function. Effectively just calls
|
|
* scheduler::spawn().
|
|
*/
|
|
template<typename F, typename ...A>
|
|
inline tid<std::result_of_t<F(A...)>> spawn(F &&func, A &&...args) {
|
|
return detail::current_scheduler->spawn(
|
|
std::forward<F>(func), std::forward<A>(args)...
|
|
);
|
|
}
|
|
|
|
/** @brief Tells the current scheduler to re-schedule the current task.
|
|
*
|
|
* Effectively calls scheduler::yield().
|
|
*/
|
|
inline void yield() noexcept {
|
|
detail::current_scheduler->yield();
|
|
}
|
|
|
|
/** @brief Creates a channel with the currently in use scheduler.
|
|
*
|
|
* Effectively calls scheduler::make_channel().
|
|
*
|
|
* @tparam T The type of the channel value.
|
|
*
|
|
*/
|
|
template<typename T>
|
|
inline channel<T> make_channel() {
|
|
return detail::current_scheduler->make_channel<T>();
|
|
}
|
|
|
|
/** @brief Creates a coroutine with the currently in use scheduler.
|
|
*
|
|
* Effectively calls scheduler::make_coroutine().
|
|
*
|
|
* @tparam T The type passed to the coroutine, `Result(Args...)`.
|
|
*
|
|
*/
|
|
template<typename T, typename F>
|
|
inline coroutine<T> make_coroutine(F &&func) {
|
|
return detail::current_scheduler->make_coroutine<T>(std::forward<F>(func));
|
|
}
|
|
|
|
/** @brief Creates a generator with the currently in use scheduler.
|
|
*
|
|
* Effectively calls scheduler::make_generator().
|
|
*
|
|
* @tparam T The value type of the generator.
|
|
*
|
|
*/
|
|
template<typename T, typename F>
|
|
inline generator<T> make_generator(F &&func) {
|
|
return detail::current_scheduler->make_generator<T>(std::forward<F>(func));
|
|
}
|
|
|
|
inline void reserve_stacks(std::size_t n) {
|
|
detail::current_scheduler->reserve_stacks(n);
|
|
}
|
|
|
|
/** @} */
|
|
|
|
} /* namespace ostd */
|
|
|
|
#endif
|
|
|
|
/** @} */
|