Skip to content

Commit 54e5b6b

Browse files
youenn98crosvm LUCI
authored andcommitted
device: vhost_user: Enable seccomp filter vhost-user-fs
Vhost-user-fs currently lacks seccomp filter support, which cause security concerns to put into real usage. This change introduces virtio-fs device's seccomp policy filter to vhost-user-fs when sandbox is enabled. When specified path of socket does not exist for vhost-user device, the vhost-user device will call socketpair to create a socket. To support the syscall, the rule allowing socketpair is added to vhost_user.policy. Also, this CL adds disable-sandbox option for vhost-user-fs-device. The option is set to false by default, the vhost-user-fs will enter new mnt/user/pid/net namespace. If the this option is true, the vhost-user-fs device only create a new mount namespace. BUG=b:355159487 TEST=run manual tests TEST=run e2e test in chromium:5746575 Change-Id: I6c18386f690af7b0d2e1550c0b3881d444280a8b Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5741356 Reviewed-by: Keiichi Watanabe <keiichiw@chromium.org> Commit-Queue: Yuan Yao <yuanyaogoog@chromium.org>
1 parent 437d661 commit 54e5b6b

File tree

7 files changed

+95
-44
lines changed

7 files changed

+95
-44
lines changed

devices/src/virtio/vhost/user/device/fs.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,4 +232,11 @@ pub struct Options {
232232
/// gid of the device process in the new user namespace created by minijail.
233233
/// Default: 0.
234234
gid: u32,
235+
#[argh(switch)]
236+
/// disable-sandbox controls whether vhost-user-fs device uses minijail sandbox.
237+
/// By default, it is false, the vhost-user-fs will enter new mnt/user/pid/net
238+
/// namespace. If the this option is true, the vhost-user-fs device only create
239+
/// a new mount namespace and run without seccomp filter.
240+
/// Default: false.
241+
disable_sandbox: bool,
235242
}

devices/src/virtio/vhost/user/device/fs/sys/linux.rs

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use anyhow::Context;
1010
use base::linux::max_open_files;
1111
use base::RawDescriptor;
1212
use cros_async::Executor;
13+
use jail::create_base_minijail;
14+
use jail::set_embedded_bpf_program;
1315
use minijail::Minijail;
1416

1517
use crate::virtio::vhost::user::device::fs::FsBackend;
@@ -37,47 +39,55 @@ fn jail_and_fork(
3739
gid: u32,
3840
uid_map: Option<String>,
3941
gid_map: Option<String>,
42+
disable_sandbox: bool,
4043
) -> anyhow::Result<i32> {
44+
let limit = max_open_files().context("failed to get max open files")?;
4145
// Create new minijail sandbox
42-
let mut j = Minijail::new()?;
43-
44-
j.namespace_pids();
45-
j.namespace_user();
46-
j.namespace_user_disable_setgroups();
47-
if uid != 0 {
48-
j.change_uid(uid);
49-
}
50-
if gid != 0 {
51-
j.change_gid(gid);
52-
}
53-
j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
54-
j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
55-
j.run_as_init();
56-
57-
j.namespace_vfs();
58-
j.namespace_net();
59-
j.no_new_privs();
46+
let jail = if disable_sandbox {
47+
create_base_minijail(dir_path.as_path(), limit)?
48+
} else {
49+
let mut j: Minijail = Minijail::new()?;
50+
j.namespace_pids();
51+
j.namespace_user();
52+
j.namespace_user_disable_setgroups();
53+
if uid != 0 {
54+
j.change_uid(uid);
55+
}
56+
if gid != 0 {
57+
j.change_gid(gid);
58+
}
59+
j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
60+
j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
61+
j.run_as_init();
6062

61-
// Only pivot_root if we are not re-using the current root directory.
62-
if dir_path != Path::new("/") {
63-
// It's safe to call `namespace_vfs` multiple times.
6463
j.namespace_vfs();
65-
j.enter_pivot_root(&dir_path)?;
66-
}
67-
j.set_remount_mode(libc::MS_SLAVE);
64+
j.namespace_net();
65+
j.no_new_privs();
66+
67+
// Only pivot_root if we are not re-using the current root directory.
68+
if dir_path != Path::new("/") {
69+
// It's safe to call `namespace_vfs` multiple times.
70+
j.namespace_vfs();
71+
j.enter_pivot_root(&dir_path)?;
72+
}
73+
j.set_remount_mode(libc::MS_SLAVE);
6874

69-
let limit = max_open_files().context("failed to get max open files")?;
70-
j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
71-
// vvu locks around 512k memory. Just give 1M.
72-
j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;
75+
j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
76+
// vvu locks around 512k memory. Just give 1M.
77+
j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;
78+
#[cfg(not(feature = "seccomp_trace"))]
79+
set_embedded_bpf_program(&mut j, "fs_device_vhost_user")?;
80+
j.use_seccomp_filter();
81+
j
82+
};
7383

7484
// Make sure there are no duplicates in keep_rds
7585
keep_rds.sort_unstable();
7686
keep_rds.dedup();
7787

7888
// fork on the jail here
7989
// SAFETY: trivially safe
80-
let pid = unsafe { j.fork(Some(&keep_rds))? };
90+
let pid = unsafe { jail.fork(Some(&keep_rds))? };
8191

8292
if pid > 0 {
8393
// Current FS driver jail does not use seccomp and jail_and_fork() does not have other
@@ -113,6 +123,7 @@ pub fn start_device(opts: Options) -> anyhow::Result<()> {
113123
opts.gid,
114124
opts.uid_map,
115125
opts.gid_map,
126+
opts.disable_sandbox,
116127
)?;
117128

118129
// Parent, nothing to do but wait and then exit
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copyright 2024 The ChromiumOS Authors
2+
# Use of this source code is governed by a BSD-style license that can be
3+
# found in the LICENSE file.
4+
5+
@include /usr/share/policy/crosvm/vhost_user.policy
6+
7+
@include /usr/share/policy/crosvm/fs_device.policy
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright 2024 The ChromiumOS Authors
2+
# Use of this source code is governed by a BSD-style license that can be
3+
# found in the LICENSE file.
4+
5+
# Policy file for the vhost-user transport over a socket.
6+
7+
# FIONBIO: for setting non-blocking mode over the socket.
8+
# TCGETS/TCSETS: used on FD 0, probably for serial.
9+
# b/239779171: try moving this to the serial device once we can extend ioctls across policy files.
10+
ioctl: arg1 == FIONBIO || arg1 == TCGETS || arg1 == TCSETS
11+
# For accepting a client connection over the socket.
12+
accept4: 1
13+
# For creating a socket if the specified socket path does not exits
14+
socketpair: arg0 == AF_UNIX
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copyright 2024 The ChromiumOS Authors
2+
# Use of this source code is governed by a BSD-style license that can be
3+
# found in the LICENSE file.
4+
5+
@include /usr/share/policy/crosvm/vhost_user.policy
6+
7+
@include /usr/share/policy/crosvm/fs_device.policy

jail/seccomp/x86_64/vhost_user.policy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@
1010
ioctl: arg1 == FIONBIO || arg1 == TCGETS || arg1 == TCSETS
1111
# For accepting a client connection over the socket.
1212
accept4: 1
13+
# For creating a socket if the specified socket path does not exits
14+
socketpair: arg0 == AF_UNIX

jail/src/helpers.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ use zerocopy::AsBytes;
2828

2929
use crate::config::JailConfig;
3030

31-
#[cfg(not(feature = "seccomp_trace"))]
3231
static EMBEDDED_BPFS: Lazy<std::collections::HashMap<&str, Vec<u8>>> =
3332
Lazy::new(|| include!(concat!(env!("OUT_DIR"), "/bpf_includes.in")));
3433

@@ -288,20 +287,7 @@ pub fn create_sandbox_minijail(
288287
})?;
289288
}
290289
} else {
291-
let bpf_program = EMBEDDED_BPFS
292-
.get(&config.seccomp_policy_name)
293-
.with_context(|| {
294-
format!(
295-
"failed to find embedded seccomp policy: {}",
296-
&config.seccomp_policy_name
297-
)
298-
})?;
299-
jail.parse_seccomp_bytes(bpf_program).with_context(|| {
300-
format!(
301-
"failed to parse embedded seccomp policy: {}",
302-
&config.seccomp_policy_name
303-
)
304-
})?;
290+
set_embedded_bpf_program(&mut jail, config.seccomp_policy_name)?;
305291
}
306292

307293
jail.use_seccomp_filter();
@@ -482,3 +468,20 @@ fn add_current_user_to_jail(jail: &mut Minijail) -> Result<()> {
482468
}
483469
Ok(())
484470
}
471+
472+
/// Set the seccomp policy for a jail from embedded bpfs
473+
pub fn set_embedded_bpf_program(jail: &mut Minijail, seccomp_policy_name: &str) -> Result<()> {
474+
let bpf_program = EMBEDDED_BPFS.get(seccomp_policy_name).with_context(|| {
475+
format!(
476+
"failed to find embedded seccomp policy: {}",
477+
seccomp_policy_name
478+
)
479+
})?;
480+
jail.parse_seccomp_bytes(bpf_program).with_context(|| {
481+
format!(
482+
"failed to parse embedded seccomp policy: {}",
483+
seccomp_policy_name
484+
)
485+
})?;
486+
Ok(())
487+
}

0 commit comments

Comments
 (0)