|
6 | 6 |
|
7 | 7 | use crate::{
|
8 | 8 | bindings,
|
9 |
| - types::{NotThreadSafe, Opaque}, |
| 9 | + pid_namespace::PidNamespace, |
| 10 | + types::{ARef, NotThreadSafe, Opaque}, |
10 | 11 | };
|
11 | 12 | use core::{
|
12 | 13 | cmp::{Eq, PartialEq},
|
@@ -36,6 +37,16 @@ macro_rules! current {
|
36 | 37 | };
|
37 | 38 | }
|
38 | 39 |
|
| 40 | +/// Returns the currently running task's pid namespace. |
| 41 | +#[macro_export] |
| 42 | +macro_rules! current_pid_ns { |
| 43 | + () => { |
| 44 | + // SAFETY: Deref + addr-of below create a temporary `PidNamespaceRef` that cannot outlive |
| 45 | + // the caller. |
| 46 | + unsafe { &*$crate::task::Task::current_pid_ns() } |
| 47 | + }; |
| 48 | +} |
| 49 | + |
39 | 50 | /// Wraps the kernel's `struct task_struct`.
|
40 | 51 | ///
|
41 | 52 | /// # Invariants
|
@@ -145,6 +156,97 @@ impl Task {
|
145 | 156 | }
|
146 | 157 | }
|
147 | 158 |
|
| 159 | + /// Returns a PidNamespace reference for the currently executing task's/thread's pid namespace. |
| 160 | + /// |
| 161 | + /// This function can be used to create an unbounded lifetime by e.g., storing the returned |
| 162 | + /// PidNamespace in a global variable which would be a bug. So the recommended way to get the |
| 163 | + /// current task's/thread's pid namespace is to use the [`current_pid_ns`] macro because it is |
| 164 | + /// safe. |
| 165 | + /// |
| 166 | + /// # Safety |
| 167 | + /// |
| 168 | + /// Callers must ensure that the returned object doesn't outlive the current task/thread. |
| 169 | + pub unsafe fn current_pid_ns() -> impl Deref<Target = PidNamespace> { |
| 170 | + struct PidNamespaceRef<'a> { |
| 171 | + task: &'a PidNamespace, |
| 172 | + _not_send: NotThreadSafe, |
| 173 | + } |
| 174 | + |
| 175 | + impl Deref for PidNamespaceRef<'_> { |
| 176 | + type Target = PidNamespace; |
| 177 | + |
| 178 | + fn deref(&self) -> &Self::Target { |
| 179 | + self.task |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | + // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`. |
| 184 | + // |
| 185 | + // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A |
| 186 | + // `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect |
| 187 | + // on the calling `Task`'s pid namespace. It will only effect the pid namespace of children |
| 188 | + // created by the calling `Task`. This invariant guarantees that after having acquired a |
| 189 | + // reference to a `Task`'s pid namespace it will remain unchanged. |
| 190 | + // |
| 191 | + // When a task has exited and been reaped `release_task()` will be called. This will set |
| 192 | + // the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task |
| 193 | + // that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a |
| 194 | + // referencing count to |
| 195 | + // the `Task` will prevent `release_task()` being called. |
| 196 | + // |
| 197 | + // In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function |
| 198 | + // can be used. There are two cases to consider: |
| 199 | + // |
| 200 | + // (1) retrieving the `PidNamespace` of the `current` task |
| 201 | + // (2) retrieving the `PidNamespace` of a non-`current` task |
| 202 | + // |
| 203 | + // From system call context retrieving the `PidNamespace` for case (1) is always safe and |
| 204 | + // requires neither RCU locking nor a reference count to be held. Retrieving the |
| 205 | + // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath |
| 206 | + // like that is exposed to Rust. |
| 207 | + // |
| 208 | + // Retrieving the `PidNamespace` from system call context for (2) requires RCU protection. |
| 209 | + // Accessing `PidNamespace` outside of RCU protection requires a reference count that |
| 210 | + // must've been acquired while holding the RCU lock. Note that accessing a non-`current` |
| 211 | + // task means `NULL` can be returned as the non-`current` task could have already passed |
| 212 | + // through `release_task()`. |
| 213 | + // |
| 214 | + // To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the |
| 215 | + // returned `PidNamespace` cannot outlive the calling scope. The associated |
| 216 | + // `current_pid_ns()` function should not be called directly as it could be abused to |
| 217 | + // created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows |
| 218 | + // Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU |
| 219 | + // protection and without having to acquire a reference count. |
| 220 | + // |
| 221 | + // For (2) the `task_get_pid_ns()` method must be used. This will always acquire a |
| 222 | + // reference on `PidNamespace` and will return an `Option` to force the caller to |
| 223 | + // explicitly handle the case where `PidNamespace` is `None`, something that tends to be |
| 224 | + // forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it |
| 225 | + // difficult to perform operations that are otherwise safe without holding a reference |
| 226 | + // count as long as RCU protection is guaranteed. But it is not important currently. But we |
| 227 | + // do want it in the future. |
| 228 | + // |
| 229 | + // Note for (2) the required RCU protection around calling `task_active_pid_ns()` |
| 230 | + // synchronizes against putting the last reference of the associated `struct pid` of |
| 231 | + // `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the |
| 232 | + // `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be |
| 233 | + // `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via |
| 234 | + // `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired |
| 235 | + // from `task->thread_pid` to finish. |
| 236 | + // |
| 237 | + // SAFETY: The current task's pid namespace is valid as long as the current task is running. |
| 238 | + let pidns = unsafe { bindings::task_active_pid_ns(Task::current_raw()) }; |
| 239 | + PidNamespaceRef { |
| 240 | + // SAFETY: If the current thread is still running, the current task and its associated |
| 241 | + // pid namespace are valid. `PidNamespaceRef` is not `Send`, so we know it cannot be |
| 242 | + // transferred to another thread (where it could potentially outlive the current |
| 243 | + // `Task`). The caller needs to ensure that the PidNamespaceRef doesn't outlive the |
| 244 | + // current task/thread. |
| 245 | + task: unsafe { PidNamespace::from_ptr(pidns) }, |
| 246 | + _not_send: NotThreadSafe, |
| 247 | + } |
| 248 | + } |
| 249 | + |
148 | 250 | /// Returns a raw pointer to the task.
|
149 | 251 | #[inline]
|
150 | 252 | pub fn as_ptr(&self) -> *mut bindings::task_struct {
|
@@ -188,11 +290,32 @@ impl Task {
|
188 | 290 | unsafe { bindings::signal_pending(self.as_ptr()) != 0 }
|
189 | 291 | }
|
190 | 292 |
|
191 |
| - /// Returns the given task's pid in the current pid namespace. |
192 |
| - pub fn pid_in_current_ns(&self) -> Pid { |
193 |
| - // SAFETY: It's valid to pass a null pointer as the namespace (defaults to current |
194 |
| - // namespace). The task pointer is also valid. |
195 |
| - unsafe { bindings::task_tgid_nr_ns(self.as_ptr(), ptr::null_mut()) } |
| 293 | + /// Returns task's pid namespace with elevated reference count |
| 294 | + pub fn get_pid_ns(&self) -> Option<ARef<PidNamespace>> { |
| 295 | + // SAFETY: By the type invariant, we know that `self.0` is valid. |
| 296 | + let ptr = unsafe { bindings::task_get_pid_ns(self.as_ptr()) }; |
| 297 | + if ptr.is_null() { |
| 298 | + None |
| 299 | + } else { |
| 300 | + // SAFETY: `ptr` is valid by the safety requirements of this function. And we own a |
| 301 | + // reference count via `task_get_pid_ns()`. |
| 302 | + // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::pid_namespace`. |
| 303 | + Some(unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr.cast::<PidNamespace>())) }) |
| 304 | + } |
| 305 | + } |
| 306 | + |
| 307 | + /// Returns the given task's pid in the provided pid namespace. |
| 308 | + #[doc(alias = "task_tgid_nr_ns")] |
| 309 | + pub fn tgid_nr_ns(&self, pidns: Option<&PidNamespace>) -> Pid { |
| 310 | + let pidns = match pidns { |
| 311 | + Some(pidns) => pidns.as_ptr(), |
| 312 | + None => core::ptr::null_mut(), |
| 313 | + }; |
| 314 | + // SAFETY: By the type invariant, we know that `self.0` is valid. We received a valid |
| 315 | + // PidNamespace that we can use as a pointer or we received an empty PidNamespace and |
| 316 | + // thus pass a null pointer. The underlying C function is safe to be used with NULL |
| 317 | + // pointers. |
| 318 | + unsafe { bindings::task_tgid_nr_ns(self.as_ptr(), pidns) } |
196 | 319 | }
|
197 | 320 |
|
198 | 321 | /// Wakes up the task.
|
|
0 commit comments