Add fs device to --shared-dir

Expand the `--shared-dir` option to allow callers to select between 9p
and virtio-fs for sharing directories.

BUG=b:136128319
TEST=start a VM with a virtio-fs based shared directory

Change-Id: Ie8afc1965b693805dd6000f0157786317aab060d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1705656
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Chirantan Ekbote <chirantan@chromium.org>
This commit is contained in:
Chirantan Ekbote 2019-07-17 10:50:30 +09:00 committed by Commit Bot
parent 9093c002fe
commit bd4723b218
3 changed files with 227 additions and 10 deletions

View file

@ -14,8 +14,11 @@ use std::collections::BTreeMap;
use std::net;
use std::os::unix::io::RawFd;
use std::path::PathBuf;
use std::str::FromStr;
use devices::virtio::fs::passthrough;
use devices::SerialParameters;
use libc::{getegid, geteuid};
static SECCOMP_POLICY_DIR: &str = "/usr/share/policy/crosvm";
@ -69,6 +72,52 @@ impl TouchDeviceOption {
}
}
pub enum SharedDirKind {
FS,
P9,
}
impl FromStr for SharedDirKind {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use SharedDirKind::*;
match s {
"fs" | "FS" => Ok(FS),
"9p" | "9P" | "p9" | "P9" => Ok(P9),
_ => Err("invalid file system type"),
}
}
}
impl Default for SharedDirKind {
fn default() -> SharedDirKind {
SharedDirKind::P9
}
}
pub struct SharedDir {
pub src: PathBuf,
pub tag: String,
pub kind: SharedDirKind,
pub uid_map: String,
pub gid_map: String,
pub cfg: passthrough::Config,
}
impl Default for SharedDir {
fn default() -> SharedDir {
SharedDir {
src: Default::default(),
tag: Default::default(),
kind: Default::default(),
uid_map: format!("0 {} 1", unsafe { geteuid() }),
gid_map: format!("0 {} 1", unsafe { getegid() }),
cfg: Default::default(),
}
}
}
/// Aggregate of all configurable options for a running VM.
pub struct Config {
pub vcpu_count: Option<u32>,
@ -93,7 +142,7 @@ pub struct Config {
pub wayland_socket_path: Option<PathBuf>,
pub wayland_dmabuf: bool,
pub x_display: Option<String>,
pub shared_dirs: Vec<(PathBuf, String)>,
pub shared_dirs: Vec<SharedDir>,
pub sandbox: bool,
pub seccomp_policy_dir: PathBuf,
pub seccomp_log_failures: bool,

View file

@ -14,6 +14,7 @@ use std::mem;
use std::net::Ipv4Addr;
#[cfg(feature = "gpu")]
use std::num::NonZeroU8;
use std::num::ParseIntError;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::os::unix::net::UnixStream;
use std::path::{Path, PathBuf};
@ -58,7 +59,7 @@ use vm_control::{
VmRunMode,
};
use crate::{Config, DiskOption, Executable, TouchDeviceOption};
use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage};
@ -101,6 +102,8 @@ pub enum Error {
Disk(io::Error),
DiskImageLock(sys_util::Error),
DropCapabilities(sys_util::Error),
FsDeviceNew(virtio::fs::Error),
GetMaxOpenFiles(io::Error),
InputDeviceNew(virtio::InputError),
InputEventsOpen(std::io::Error),
InvalidFdPath,
@ -114,6 +117,7 @@ pub enum Error {
OpenKernel(PathBuf, io::Error),
OpenVinput(PathBuf, io::Error),
P9DeviceNew(virtio::P9Error),
ParseMaxOpenFiles(ParseIntError),
PivotRootDoesntExist(&'static str),
PmemDeviceImageTooBig,
PmemDeviceNew(sys_util::Error),
@ -135,6 +139,7 @@ pub enum Error {
ResetTimerFd(sys_util::Error),
RngDeviceNew(virtio::RngError),
SettingGidMap(io_jail::Error),
SettingMaxOpenFiles(io_jail::Error),
SettingUidMap(io_jail::Error),
SignalFd(sys_util::SignalFdError),
SpawnVcpu(io::Error),
@ -183,6 +188,8 @@ impl Display for Error {
Disk(e) => write!(f, "failed to load disk image: {}", e),
DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
@ -201,6 +208,7 @@ impl Display for Error {
OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
PmemDeviceImageTooBig => {
write!(f, "failed to create pmem device: pmem device image too big")
@ -232,6 +240,7 @@ impl Display for Error {
ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
@ -278,6 +287,15 @@ impl AsRawFd for TaggedControlSocket {
}
}
fn get_max_open_files() -> Result<libc::rlim_t> {
let mut buf = String::with_capacity(32);
File::open("/proc/sys/fs/file-max")
.and_then(|mut f| f.read_to_string(&mut buf))
.map_err(Error::GetMaxOpenFiles)?;
Ok(buf.trim().parse().map_err(Error::ParseMaxOpenFiles)?)
}
fn create_base_minijail(
root: &Path,
log_failures: bool,
@ -721,6 +739,65 @@ fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> Devic
})
}
fn create_fs_device(
cfg: &Config,
uid_map: &str,
gid_map: &str,
src: &Path,
tag: &str,
fs_cfg: virtio::fs::passthrough::Config,
) -> DeviceResult {
let mut j = Minijail::new().map_err(Error::DeviceJail)?;
if cfg.sandbox {
j.namespace_pids();
j.namespace_user();
j.namespace_user_disable_setgroups();
j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
// Run in an empty network namespace.
j.namespace_net();
j.no_new_privs();
// TODO(chirantan): Enable seccomp
// Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
// the entire device process if a worker thread commits a seccomp violation.
// let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device.policy");
// j.set_seccomp_filter_tsync();
// if cfg.seccomp_log_failures {
// j.log_seccomp_filter_failures();
// }
// j.parse_seccomp_filters(&seccomp_policy)
// .map_err(Error::DeviceJail)?;
// j.use_seccomp_filter();
// Don't do init setup.
j.run_as_init();
}
// Create a new mount namespace with the source directory as the root. We need this even when
// sandboxing is disabled as the server relies on the host kernel to prevent path traversals
// from leaking out of the shared directory.
j.namespace_vfs();
j.enter_pivot_root(src).map_err(Error::DevicePivotRoot)?;
// The file server opens a lot of fds and needs a really high open file limit.
let max_open_files = get_max_open_files()?;
j.set_rlimit(libc::RLIMIT_NOFILE, max_open_files, max_open_files)
.map_err(Error::SettingMaxOpenFiles)?;
// TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
// when num_queues > 1.
let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
Ok(VirtioDeviceStub {
dev: Box::new(dev),
jail: Some(j),
})
}
fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult {
let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? {
Some(mut jail) => {
@ -926,9 +1003,21 @@ fn create_virtio_devices(
}
let chronos = get_chronos_ids();
for shared_dir in &cfg.shared_dirs {
let SharedDir {
src,
tag,
kind,
uid_map,
gid_map,
cfg: fs_cfg,
} = shared_dir;
for (src, tag) in &cfg.shared_dirs {
devs.push(create_9p_device(cfg, chronos, src, tag)?);
let dev = match kind {
SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
SharedDirKind::P9 => create_9p_device(cfg, chronos, src, tag)?,
};
devs.push(dev);
}
Ok(devs)

View file

@ -17,7 +17,7 @@ use std::time::Duration;
use crosvm::{
argument::{self, print_help, set_arguments, Argument},
linux, BindMount, Config, DiskOption, Executable, GidMap, TouchDeviceOption,
linux, BindMount, Config, DiskOption, Executable, GidMap, SharedDir, TouchDeviceOption,
};
use devices::{SerialParameters, SerialType};
use msg_socket::{MsgReceiver, MsgSender, MsgSocket};
@ -495,9 +495,20 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
);
}
"shared-dir" => {
// Formatted as <src:tag>.
// This is formatted as multiple fields, each separated by ":". The first 2 fields are
// fixed (src:tag). The rest may appear in any order:
//
// * type=TYPE - must be one of "p9" or "fs" (default: p9)
// * uidmap=UIDMAP - a uid map in the format "inner outer count[,inner outer count]"
// (default: "0 <current euid> 1")
// * gidmap=GIDMAP - a gid map in the same format as uidmap
// (default: "0 <current egid> 1")
// * timeout=TIMEOUT - a timeout value in seconds, which indicates how long attributes
// and directory contents should be considered valid (default: 5)
// * cache=CACHE - one of "never", "always", or "auto" (default: auto)
// * writeback=BOOL - indicates whether writeback caching should be enabled (default: false)
let param = value.unwrap();
let mut components = param.splitn(2, ':');
let mut components = param.split(':');
let src =
PathBuf::from(
components
@ -522,7 +533,66 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
});
}
cfg.shared_dirs.push((src, tag));
let mut shared_dir = SharedDir {
src,
tag,
..Default::default()
};
for opt in components {
let mut o = opt.splitn(2, '=');
let kind = o.next().ok_or_else(|| argument::Error::InvalidValue {
value: opt.to_owned(),
expected: "`shared-dir` options must not be empty",
})?;
let value = o.next().ok_or_else(|| argument::Error::InvalidValue {
value: opt.to_owned(),
expected: "`shared-dir` options must be of the form `kind=value`",
})?;
match kind {
"type" => {
shared_dir.kind =
value.parse().map_err(|_| argument::Error::InvalidValue {
value: value.to_owned(),
expected: "`type` must be one of `fs` or `9p`",
})?
}
"uidmap" => shared_dir.uid_map = value.into(),
"gidmap" => shared_dir.gid_map = value.into(),
"timeout" => {
let seconds = value.parse().map_err(|_| argument::Error::InvalidValue {
value: value.to_owned(),
expected: "`timeout` must be an integer",
})?;
let dur = Duration::from_secs(seconds);
shared_dir.cfg.entry_timeout = dur.clone();
shared_dir.cfg.attr_timeout = dur;
}
"cache" => {
let policy = value.parse().map_err(|_| argument::Error::InvalidValue {
value: value.to_owned(),
expected: "`cache` must be one of `never`, `always`, or `auto`",
})?;
shared_dir.cfg.cache_policy = policy;
}
"writeback" => {
let writeback =
value.parse().map_err(|_| argument::Error::InvalidValue {
value: value.to_owned(),
expected: "`writeback` must be a boolean",
})?;
shared_dir.cfg.writeback = writeback;
}
_ => {
return Err(argument::Error::InvalidValue {
value: kind.to_owned(),
expected: "unrecognized option for `shared-dir`",
})
}
}
}
cfg.shared_dirs.push(shared_dir);
}
"seccomp-policy-dir" => {
// `value` is Some because we are in this match so it's safe to unwrap.
@ -813,8 +883,17 @@ fn run_vm(args: std::env::Args) -> std::result::Result<(), ()> {
"Path to put the control socket. If PATH is a directory, a name will be generated."),
Argument::flag("disable-sandbox", "Run all devices in one, non-sandboxed process."),
Argument::value("cid", "CID", "Context ID for virtual sockets."),
Argument::value("shared-dir", "PATH:TAG",
"Directory to be shared with a VM as a source:tag pair. Can be given more than once."),
Argument::value("shared-dir", "PATH:TAG[:type=TYPE:writeback=BOOL:timeout=SECONDS:uidmap=UIDMAP:gidmap=GIDMAP:cache=CACHE]",
"Colon-separated options for configuring a directory to be shared with the VM.
The first field is the directory to be shared and the second field is the tag that the VM can use to identify the device.
The remaining fields are key=value pairs that may appear in any order. Valid keys are:
type=(p9, fs) - Indicates whether the directory should be shared via virtio-9p or virtio-fs (default: p9).
uidmap=UIDMAP - The uid map to use for the device's jail in the format \"inner outer count[,inner outer count]\" (default: 0 <current euid> 1).
gidmap=GIDMAP - The gid map to use for the device's jail in the format \"inner outer count[,inner outer count]\" (default: 0 <current egid> 1).
cache=(never, auto, always) - Indicates whether the VM can cache the contents of the shared directory (default: auto). When set to \"auto\" and the type is \"fs\", the VM will use close-to-open consistency for file contents.
timeout=SECONDS - How long the VM should consider file attributes and directory entries to be valid (default: 5). If the VM has exclusive access to the directory, then this should be a large value. If the directory can be modified by other processes, then this should be 0.
writeback=BOOL - Indicates whether the VM can use writeback caching (default: false). This is only safe to do when the VM has exclusive access to the files in a directory. Additionally, the server should have read permission for all files as the VM may issue read requests even for files that are opened write-only.
"),
Argument::value("seccomp-policy-dir", "PATH", "Path to seccomp .policy files."),
Argument::flag("seccomp-log-failures", "Instead of seccomp filter failures being fatal, they will be logged instead."),
#[cfg(feature = "plugin")]