virtio-fs: Unshare fs attributes to avoid locking

Calling `unshare(CLONE_FS)` from worker threads means we can freely
modify the current working directory and umask without having to guard
theses attributes with locks.

BUG=none
TEST=./tools/presubmit --quick

Change-Id: I29144b3d233b84e761c11a5e46efe541117e7f2a
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3263932
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Christian Blichmann <cblichmann@google.com>
Auto-Submit: Christian Blichmann <cblichmann@google.com>
Reviewed-by: Chirantan Ekbote <chirantan@chromium.org>
This commit is contained in:
Christian Blichmann 2021-11-05 14:19:13 +01:00 committed by Commit Bot
parent ef5b1e711b
commit e32b55670b
3 changed files with 33 additions and 47 deletions

View file

@ -77,7 +77,7 @@ pub enum Error {
FuseError(fuse::Error),
/// Failed to get the securebits for the worker thread.
#[error("failed to get securebits for the worker thread: {0}")]
GetSecurebits(io::Error),
GetSecurebits(SysError),
/// The `len` field of the header is too small.
#[error("DescriptorChain is invalid: {0}")]
InvalidDescriptorChain(DescriptorError),
@ -92,13 +92,16 @@ pub enum Error {
ReadQueueEvent(SysError),
/// Failed to set the securebits for the worker thread.
#[error("failed to set securebits for the worker thread: {0}")]
SetSecurebits(io::Error),
SetSecurebits(SysError),
/// Failed to signal the virio used queue.
#[error("failed to signal used queue: {0}")]
SignalUsedQueue(SysError),
/// The tag for the Fs device was too long to fit in the config space.
#[error("Fs device tag is too long: len = {0}, max = {}", FS_MAX_TAG_LEN)]
TagTooLong(usize),
/// Calling unshare to disassociate FS attributes from parent failed.
#[error("failed to unshare fs from parent: {0}")]
UnshareFromParent(SysError),
/// Error while polling for events.
#[error("failed to wait for events: {0}")]
WaitError(SysError),

View file

@ -299,13 +299,22 @@ fn set_creds(
ScopedGid::new(gid, oldgid).and_then(|gid| Ok((ScopedUid::new(uid, olduid)?, gid)))
}
struct ScopedUmask<'a> {
struct ScopedUmask {
old: libc::mode_t,
mask: libc::mode_t,
_factory: &'a mut Umask,
}
impl<'a> Drop for ScopedUmask<'a> {
impl ScopedUmask {
fn new(mask: libc::mode_t) -> ScopedUmask {
ScopedUmask {
// Safe because this doesn't modify any memory and always succeeds.
old: unsafe { libc::umask(mask) },
mask,
}
}
}
impl Drop for ScopedUmask {
fn drop(&mut self) {
// Safe because this doesn't modify any memory and always succeeds.
let previous = unsafe { libc::umask(self.old) };
@ -316,19 +325,6 @@ impl<'a> Drop for ScopedUmask<'a> {
}
}
struct Umask;
impl Umask {
fn set(&mut self, mask: libc::mode_t) -> ScopedUmask {
ScopedUmask {
// Safe because this doesn't modify any memory and always succeeds.
old: unsafe { libc::umask(mask) },
mask,
_factory: self,
}
}
}
struct ScopedFsetid(Caps);
impl Drop for ScopedFsetid {
fn drop(&mut self) {
@ -563,14 +559,6 @@ pub struct PassthroughFs {
// Whether zero message opendir is supported by the kernel driver.
zero_message_opendir: AtomicBool,
// Used to ensure that only one thread at a time uses chdir(). Since chdir() affects the
// process-wide CWD, we cannot allow more than one thread to do it at the same time.
chdir_mutex: Mutex<()>,
// Used when creating files / directories / nodes. Since the umask is process-wide, we can only
// allow one thread at a time to change it.
umask: Mutex<Umask>,
// Used to communicate with other processes using D-Bus.
#[cfg(feature = "chromeos")]
dbus_connection: Option<Mutex<dbus::blocking::Connection>>,
@ -626,9 +614,6 @@ impl PassthroughFs {
zero_message_open: AtomicBool::new(false),
zero_message_opendir: AtomicBool::new(false),
chdir_mutex: Mutex::new(()),
umask: Mutex::new(Umask),
#[cfg(feature = "chromeos")]
dbus_connection,
#[cfg(feature = "chromeos")]
@ -895,7 +880,6 @@ impl PassthroughFs {
F: FnOnce() -> T,
{
let root = self.find_inode(ROOT_ID).expect("failed to find root inode");
let chdir_lock = self.chdir_mutex.lock();
// Safe because this doesn't modify any memory and we check the return value. Since the
// fchdir should never fail we just use debug_asserts.
@ -919,7 +903,6 @@ impl PassthroughFs {
io::Error::last_os_error()
);
mem::drop(chdir_lock);
res
}
@ -1499,12 +1482,12 @@ impl FileSystem for PassthroughFs {
let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
{
let mut um = self.umask.lock();
let _scoped_umask = um.set(umask);
let _scoped_umask = ScopedUmask::new(umask);
// Safe because this doesn't modify any memory and we check the return value.
syscall!(unsafe { libc::mkdirat(data.as_raw_descriptor(), name.as_ptr(), mode) })?;
}
self.do_lookup(&data, name)
}
@ -1582,8 +1565,7 @@ impl FileSystem for PassthroughFs {
let current_dir = unsafe { CStr::from_bytes_with_nul_unchecked(b".\0") };
let fd = {
let mut um = self.umask.lock();
let _scoped_umask = um.set(umask);
let _scoped_umask = ScopedUmask::new(umask);
// Safe because this doesn't modify any memory and we check the return value.
syscall!(unsafe {
@ -1620,8 +1602,7 @@ impl FileSystem for PassthroughFs {
(flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW) & !libc::O_DIRECT;
let fd = {
let mut um = self.umask.lock();
let _scoped_umask = um.set(umask);
let _scoped_umask = ScopedUmask::new(umask);
// Safe because this doesn't modify any memory and we check the return value. We don't
// really check `flags` because if the kernel can't handle poorly specified flags then
@ -1916,8 +1897,7 @@ impl FileSystem for PassthroughFs {
let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
{
let mut um = self.umask.lock();
let _scoped_umask = um.set(umask);
let _scoped_umask = ScopedUmask::new(umask);
// Safe because this doesn't modify any memory and we check the return value.
syscall!(unsafe {
@ -1929,6 +1909,7 @@ impl FileSystem for PassthroughFs {
)
})?;
}
self.do_lookup(&data, name)
}

View file

@ -11,6 +11,7 @@ use std::sync::Arc;
use base::{error, Event, PollToken, SafeDescriptor, Tube, WaitContext};
use fuse::filesystem::{FileSystem, ZeroCopyReader, ZeroCopyWriter};
use sync::Mutex;
use sys_util::syscall;
use vm_control::{FsMappingRequest, VmResponse};
use vm_memory::GuestMemory;
@ -191,20 +192,21 @@ impl<F: FileSystem + Sync> Worker<F> {
#[cfg(target_os = "linux")]
{
// Safe because this doesn't modify any memory and we check the return value.
let mut securebits = unsafe { libc::prctl(libc::PR_GET_SECUREBITS) };
if securebits < 0 {
return Err(Error::GetSecurebits(io::Error::last_os_error()));
}
let mut securebits = syscall!(unsafe { libc::prctl(libc::PR_GET_SECUREBITS) })
.map_err(Error::GetSecurebits)?;
securebits |= SECBIT_NO_SETUID_FIXUP;
// Safe because this doesn't modify any memory and we check the return value.
let ret = unsafe { libc::prctl(libc::PR_SET_SECUREBITS, securebits) };
if ret < 0 {
return Err(Error::SetSecurebits(io::Error::last_os_error()));
}
syscall!(unsafe { libc::prctl(libc::PR_SET_SECUREBITS, securebits) })
.map_err(Error::SetSecurebits)?;
}
// To avoid extra locking, unshare filesystem attributes from parent. This includes the
// current working directory and umask.
// Safe because this doesn't modify any memory and we check the return value.
syscall!(unsafe { libc::unshare(libc::CLONE_FS) }).map_err(Error::UnshareFromParent)?;
#[derive(PollToken)]
enum Token {
// A request is ready on the queue.