From 606e6a56d7212eed882ca3f3515a99b8dd1e7ab7 Mon Sep 17 00:00:00 2001 From: Jens Maurer Date: Fri, 27 Jun 2025 09:27:41 +0200 Subject: [PATCH] P2079R10 Parallel scheduler --- source/exceptions.tex | 5 +- source/exec.tex | 436 ++++++++++++++++++++++++++++++++++++++++++ source/support.tex | 1 + 3 files changed, 441 insertions(+), 1 deletion(-) diff --git a/source/exceptions.tex b/source/exceptions.tex index 79684e4c51..d534360dd8 100644 --- a/source/exceptions.tex +++ b/source/exceptions.tex @@ -1128,8 +1128,11 @@ whose continuation is not a handle to a coroutine whose promise type has an \tcode{unhandled_stopped} member function. +\item% +when \tcode{std::execution::get_parallel_scheduler} is called and +\tcode{std::execution::system_context_replace\-ability::query_parallel_scheduler_backend()} +returns a null pointer value\iref{exec.par.scheduler}. \end{itemize} - \end{note} \pnum diff --git a/source/exec.tex b/source/exec.tex index 8fa3c30a24..bdd46b9f13 100644 --- a/source/exec.tex +++ b/source/exec.tex @@ -709,6 +709,21 @@ template<@\exposconcept{class-type}@ Promise> struct with_awaitable_senders; } + +namespace std::execution { + // \ref{exec.par.scheduler}, parallel scheduler + class @\libglobal{parallel_scheduler}@ { @\unspec@ }; + parallel_scheduler get_parallel_scheduler(); +} + +// \ref{exec.sysctxrepl}, namespace \tcode{system_context_replaceability} +namespace std::execution::@\libglobal{system_context_replaceability}@ { + struct receiver_proxy; + struct bulk_item_receiver_proxy; + struct parallel_scheduler_backend; + + shared_ptr query_parallel_scheduler_backend(); +} \end{codeblock} \pnum @@ -5672,3 +5687,424 @@ return as_awaitable(std::forward(value), static_cast(*this)); \end{codeblock} \end{itemdescr} + +\rSec1[exec.par.scheduler]{Parallel scheduler} + +\pnum +\tcode{parallel_scheduler} models \libconcept{scheduler}. + +\pnum +Let \tcode{sch} be an object of type \tcode{parallel_scheduler}, and +let \tcode{\exposid{BACKEND-OF}(sch)} be \tcode{*ptr}, +where \tcode{sch} is associated with \tcode{ptr}. + +\pnum +The expression \tcode{get_forward_progress_guarantee(sch)} returns +\tcode{forward_progress_guarantee::paral\-lel}. + +\pnum +Let \tcode{sch2} be an object of type \tcode{parallel_scheduler}. +Two objects \tcode{sch} and \tcode{sch2} compare equal if and only if +\tcode{\exposid{BACKEND-OF(sch)}} and +\tcode{\exposid{BACKEND-OF(sch2)}} refer to the same object. + +\pnum +Let \tcode{rcvr} be a receiver. +A \defn{proxy} for \tcode{rcvr} with base \tcode{B} is +an lvalue \tcode{r} of type \tcode{B} such that +\begin{itemize} +\item +\tcode{r.set_value()} has effects equivalent to +\tcode{set_value(std::move(rcvr))}. +\item +\tcode{r.set_error(e)}, where \tcode{e} is an \tcode{exception_ptr} object, +has effects equivalent to \tcode{set_error(std::move(\brk{}rcvr), std::move(e))}. +\item +\tcode{r.set_stopped()} has effects equivalent to +\tcode{set_stopped(std::move(rcvr))}. +\end{itemize} + +\pnum +A \defn{preallocated backend storage for a proxy} \tcode{r} is +an object \tcode{s} of type \tcode{span} +such that the range \tcode{s} remains valid and may be overwritten +until one of \tcode{set_value}, \tcode{set_error}, or \tcode{set_stopped} +is called on \tcode{r}. +\begin{note} +The storage referenced by \tcode{s} can be used as temporary storage +for operations launched via calls to \tcode{parallel_scheduler_backend}. +\end{note} + +\pnum +A \defnadj{bulk chunked}{proxy} for \tcode{rcvr} +with callable \tcode{f} and arguments \tcode{args} +is a proxy \tcode{r} for \tcode{rcvr} +with base \tcode{system_context_replaceability::bulk_item_receiver_proxy} +such that +\tcode{r.execute(i, j)} for indices \tcode{i} and \tcode{j} +has effects equivalent to \tcode{f(i, j, args...)}. + +\pnum +A \defnadj{bulk unchunked}{proxy} for \tcode{rcvr} +with callable \tcode{f} and arguments \tcode{args} +is a proxy \tcode{r} for \tcode{rcvr} +with base \tcode{system_context_replaceability::bulk_item_receiver_proxy} +such that +\tcode{r.execute(i, i+1)} for index \tcode{i} +has effects equivalent to \tcode{f(i, args...)}. + +\pnum +Let \tcode{b} be \tcode{\exposid{BACKEND-OF}(sch)}, +let \tcode{sndr} be the object returned by \tcode{schedule(sch)}, and +let \tcode{rcvr} be a receiver. +If \tcode{rcvr} is connected to \tcode{sndr} and +the resulting operation state is started, then: +\begin{itemize} +\item +If \tcode{sndr} completes successfully, +then \tcode{b.schedule(r, s)} is called, where +\begin{itemize} +\item +\tcode{r} is a proxy for \tcode{rcvr} +with base \tcode{system_context_replaceability::receiver_proxy} and +\item +\tcode{s} is a preallocated backend storage for \tcode{r}. +\end{itemize} +\item +All other completion operations are forwarded unchanged. +\end{itemize} + +\pnum +\tcode{parallel_scheduler} provides a customized implementation of +the \tcode{bulk_chunked} algorithm\iref{exec.bulk}. +If a receiver \tcode{rcvr} is connected to the sender +returned by \tcode{bulk_chunked(sndr, pol, shape, f)} and +the resulting operation state is started, then: +\begin{itemize} +\item +If \tcode{sndr} completes with values \tcode{vals}, +let \tcode{args} be a pack of lvalue subexpressions designating \tcode{vals}, +then \tcode{b.schedule_bulk_chunked(shape, r, s)} is called, where +\begin{itemize} +\item +\tcode{r} is a bulk chunked proxy for \tcode{rcvr} +with callable \tcode{f} and arguments \tcode{args} and +\item +\tcode{s} is a preallocated backend storage for \tcode{r}. +\end{itemize} +\item +All other completion operations are forwarded unchanged. +\end{itemize} +\begin{note} +Customizing the behavior of \tcode{bulk_chunked} +affects the default implementation of bulk. +\end{note} + +\pnum +\tcode{parallel_scheduler} provides a customized implementation of +the bulk_unchunked algorithm\iref{exec.bulk}. +If a receiver \tcode{rcvr} is connected to the sender +returned by \tcode{bulk_unchunked(sndr, pol, shape, f)} and +the resulting operation state is started, then: +\begin{itemize} +\item +If \tcode{sndr} completes with values \tcode{vals}, +let \tcode{args} be a pack of lvalue subexpressions designating \tcode{vals}, +then \tcode{b.schedule_bulk_unchunked(shape, r, s)} is called, where +\begin{itemize} +\item +\tcode{r} is a bulk unchunked proxy for \tcode{rcvr} +with callable \tcode{f} and arguments \tcode{args} and +\item +\tcode{s} is a preallocated backend storage for \tcode{r}. +\end{itemize} +\item +All other completion operations are forwarded unchanged. +\end{itemize} + +\indexlibraryglobal{get_parallel_scheduler}% +\begin{itemdecl} +parallel_scheduler get_parallel_scheduler(); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +Let \tcode{eb} be the result of \tcode{system_context_replaceability::query_parallel_scheduler_backend()}. +If \tcode{eb == nullptr} is \tcode{true}, +calls \tcode{terminate}\iref{except.terminate}. +Otherwise, returns a \tcode{parallel_scheduler} object +associated with \tcode{eb}. +\end{itemdescr} + +\rSec1[exec.sysctxrepl]{Namespace \tcode{system_context_replaceability}} + +\rSec2[exec.sysctxrepl.general]{General} + +\pnum +Facilities in the \tcode{system_context_replaceability} namespace +allow users to replace the default implementation of parallel scheduler. + +\rSec2[exec.sysctxrepl.query]{\tcode{query_parallel_scheduler_backend}} + +\begin{itemdecl} + shared_ptr query_parallel_scheduler_backend(); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\tcode{query_parallel_scheduler_backend()} returns +the implementation object for a parallel scheduler. + +\pnum +\returns +A non-null shared pointer to an object +that implements the \tcode{parallel_scheduler_backend} interface. + +\pnum +\remarks +This function is replaceable\iref{dcl.fct.def.replace}. +\end{itemdescr} + +\begin{codeblock} +namespace std::execution::system_context_replaceability { + struct @\libglobal{receiver_proxy}@ { + virtual ~receiver_proxy() = default; + + protected: + virtual bool @\exposid{query-env}@(unspecified) noexcept = 0; // \expos + + public: + virtual void set_value() noexcept = 0; + virtual void set_error(exception_ptr) noexcept = 0; + virtual void set_stopped() noexcept = 0; + + template + optional

try_query(Query q) noexcept; + }; + + struct @\libglobal{bulk_item_receiver_proxy}@ : receiver_proxy { + virtual void execute(size_t, size_t) noexcept = 0; + }; +} +\end{codeblock} + +\pnum +\tcode{receiver_proxy} represents a receiver +that will be notified +by the implementations of \tcode{parallel_scheduler_backend} +to trigger the completion operations. +\tcode{bulk_item_receiver_proxy} is derived from \tcode{receiver_proxy} and +is used for \tcode{bulk_chunked} and \tcode{bulk_unchunked} customizations +that will also receive notifications +from implementations of \tcode{parallel_scheduler_backend} +corresponding to different iterations. + +\begin{itemdecl} +template +optional

@\libglobal{try_query}@(Query q) noexcept; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\mandates +\tcode{P} is a cv-unqualified non-array object type. + +\pnum +\returns +Let \tcode{env} be the environment of the receiver represented by \tcode{*this}. +If +\begin{itemize} +\item +\tcode{Query} is not a member of an implementation-defined set +of supported queries; or +\item +\tcode{P} is not a member of an implementation-defined set +of supported result types for \tcode{Query}; or +\item +the expression \tcode{q(env)} is not well-formed or +does not have type \cv{} \tcode{P}, +\end{itemize} +then returns \tcode{nullopt}. +Otherwise, returns \tcode{q(env)}. + +\pnum +\remarks +\tcode{get_stop_token_t} is +in the implementation-defined set of supported queries, and +\tcode{inplace_stop_token} is a member +of the implementation-defined set of supported result types +for \tcode{get_stop_token_t}. +\end{itemdescr} + +\rSec2[exec.sysctxrepl.psb]{Class \tcode{parallel_scheduler_backend}} + +\begin{codeblock} +namespace std::execution::system_context_replaceability { + struct parallel_scheduler_backend { + virtual ~parallel_scheduler_backend() = default; + + virtual void schedule(receiver_proxy&, span) noexcept = 0; + virtual void schedule_bulk_chunked(size_t, bulk_item_receiver_proxy&, + span) noexcept = 0; + virtual void schedule_bulk_unchunked(size_t, bulk_item_receiver_proxy&, + span) noexcept = 0; + }; +} +\end{codeblock} + +\indexlibrarymember{schedule}{parallel_scheduler_backend}% +\begin{itemdecl} +virtual void schedule(receiver_proxy& r, span s) noexcept = 0; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +The ends of +the lifetimes of \tcode{*this}, +the object referred to by \tcode{r}, and +any storage referenced by \tcode{s} +all happen after +the beginning of the evaluation of one of the expressions below. + +\pnum +\effects +A derived class shall implement this function such that: +\begin{itemize} +\item +One of the following expressions is evaluated: +\begin{itemize} +\item +\tcode{r.set_value()}, if no error occurs, and the work is successful; +\item +\tcode{r.set_error(eptr)}, if an error occurs, +where \tcode{eptr} is an object of type \tcode{exception_ptr}; +\item +\tcode{r.set_stopped()}, if the work is canceled. +\end{itemize} +\item +Any call to \tcode{r.set_value()} happens on +an execution agent of the execution context represented by \tcode{*this}. +\end{itemize} + +\pnum +\remarks +The storage referenced by \tcode{s} +may be used by \tcode{*this} as temporary storage +for the duration of the operation launched by this call. +\end{itemdescr} + +\indexlibrarymember{schedule_bulk_chunked}{parallel_scheduler_backend}% +\begin{itemdecl} +virtual void schedule_bulk_chunked(size_t n, bulk_item_receiver_proxy& r, + span s) noexcept = 0; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +The ends of +the lifetimes of \tcode{*this}, +the object referred to by \tcode{r}, and +any storage referenced by \tcode{s} +all happen after +the beginning of the evaluation of one of the expressions below. + +\pnum +\effects +A derived class shall implement this function such that: +\begin{itemize} +\item +Eventually, one of the following expressions is evaluated: +\begin{itemize} +\item +\tcode{r.set_value()}, if no error occurs, and the work is successful; +\item +\tcode{r.set_error(eptr)}, if an error occurs, +where \tcode{eptr} is an object of type \tcode{exception_ptr}; +\item +\tcode{r.set_stopped()}, if the work is canceled. +\end{itemize} +\item +If \tcode{r.execute(b, e)} is called, +then \tcode{b} and \tcode{e} are in the range \range{0}{n} and +$\tcode{b} < \tcode{e}$. +\item +For each $i$ in \range{0}{n}, +there is at most one call to \tcode{r.execute(b, e)} +such that $i$ is in the range \range{b}{e}. +\item +If \tcode{r.set_value()} is called, +then for each $i$ in \range{0}{n}, +there is exactly one call to \tcode{r.execute(b, e)} +such that $i$ is in the range \range{b}{e}. +\item +All calls to \tcode{execute} on \tcode{r} happen before +the call to either \tcode{set_value}, \tcode{set_error}, or \tcode{set_stopped} +on \tcode{r}. +\item +All calls to \tcode{execute} and \tcode{set_value} on \tcode{r} are made +on execution agents of the execution context represented by \tcode{*this}. +\end{itemize} + +\pnum +\remarks +The storage referenced by \tcode{s} may be used by \tcode{*this} +as temporary storage for the duration of the operation launched by this call. +\end{itemdescr} + +\indexlibrarymember{schedule_bulk_unchunked}{parallel_scheduler_backend}% +\begin{itemdecl} +virtual void schedule_bulk_unchunked(size_t n, bulk_item_receiver_proxy& r, + span s) noexcept = 0; +\end{itemdecl} + +\begin{itemdescr} +\pnum +\expects +The ends of +the lifetimes of \tcode{*this}, +the object referred to by \tcode{r}, and +any storage referenced by \tcode{s} +all happen after +the beginning of the evaluation of one of the expressions below. + +\pnum +\effects +A derived class shall implement this function such that: +\begin{itemize} +\item +Eventually, one of the following expressions is evaluated: +\begin{itemize} +\item +\tcode{r.set_value()}, if no error occurs, and the work is successful; +\item +\tcode{r.set_error(eptr)}, if an error occurs, +where \tcode{eptr} is an object of type \tcode{exception_ptr}; +\item +\tcode{r.set_stopped()}, if the work is canceled. +\end{itemize} +\item +If \tcode{r.execute(b, e)} is called, +then \tcode{b} is in the range \range{0}{n} and +\tcode{e} is equal to \tcode{b + 1}. +For each $i$ in \range{0}{n}, +there is at most one call to \tcode{r.execute($i$, $i$ + 1)}. +\item +If \tcode{r.set_value()} is called, +then for each $i$ in \range{0}{n}, +there is exactly one call to \tcode{r.execute($i$, $i$ + 1)}. +\item +All calls to execute on \tcode{r} happen before +the call to either \tcode{set_value}, \tcode{set_error}, or \tcode{set_stopped} +on \tcode{r}. +\item +All calls to \tcode{execute} and \tcode{set_value} on \tcode{r} are made +on execution agents of the execution context represented by \tcode{*this}. +\end{itemize} + +\pnum +\remarks +The storage referenced by s may be used by *this as temporary storage for the duration of the operation launched by this call. +\end{itemdescr} diff --git a/source/support.tex b/source/support.tex index 95d674aafb..5b717166be 100644 --- a/source/support.tex +++ b/source/support.tex @@ -756,6 +756,7 @@ #define @\defnlibxname{cpp_lib_optional_range_support}@ 202406L // freestanding, also in \libheader{optional} #define @\defnlibxname{cpp_lib_out_ptr}@ 202311L // freestanding, also in \libheader{memory} #define @\defnlibxname{cpp_lib_parallel_algorithm}@ 201603L // also in \libheader{algorithm}, \libheader{numeric} +#define @\defnlibxname{cpp_lib_parallel_scheduler}@ 202506L // also in \libheader{execution} #define @\defnlibxname{cpp_lib_philox_engine}@ 202406L // also in \libheader{random} #define @\defnlibxname{cpp_lib_polymorphic}@ 202502L // also in \libheader{memory} #define @\defnlibxname{cpp_lib_polymorphic_allocator}@ 201902L // also in \libheader{memory_resource}