Implement virtio-vsock

Implement the virtual sockets device using vhost subsystem of the host
kernel to handle data transfer.

BUG=chromium:708267
TEST=build and run maitred in guest VM without issue

Change-Id: I35b542c0fc7e0fd9296f7ba3e1dfce60bf524d15
Signed-off-by: Chirantan Ekbote <chirantan@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/638838
Reviewed-by: Stephen Barber <smbarber@chromium.org>
This commit is contained in:
Chirantan Ekbote 2017-08-28 09:51:18 -07:00 committed by chrome-bot
parent 270f7b6a16
commit 88f9cba448
9 changed files with 465 additions and 16 deletions

View file

@ -0,0 +1,43 @@
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
close: 1
exit_group: 1
futex: 1
# Whitelist vhost_vsock ioctls only.
# arg1 == VHOST_GET_FEATURES ||
# arg1 == VHOST_SET_FEATURES ||
# arg1 == VHOST_SET_OWNER ||
# arg1 == VHOST_RESET_OWNER ||
# arg1 == VHOST_SET_MEM_TABLE ||
# arg1 == VHOST_SET_LOG_BASE ||
# arg1 == VHOST_SET_LOG_FD ||
# arg1 == VHOST_SET_VRING_NUM ||
# arg1 == VHOST_SET_VRING_ADDR ||
# arg1 == VHOST_SET_VRING_BASE ||
# arg1 == VHOST_GET_VRING_BASE ||
# arg1 == VHOST_SET_VRING_KICK ||
# arg1 == VHOST_SET_VRING_CALL ||
# arg1 == VHOST_SET_VRING_ERR ||
# arg1 == VHOST_VSOCK_SET_GUEST_CID ||
# arg1 == VHOST_VSOCK_SET_RUNNING
ioctl: arg1 == 0x8008af00 || arg1 == 0x4008af00 || arg1 == 0x0000af01 || arg1 == 0x0000af02 || arg1 == 0x4008af03 || arg1 == 0x4008af04 || arg1 == 0x4004af07 || arg1 == 0x4008af10 || arg1 == 0x4028af11 || arg1 == 0x4008af12 || arg1 == 0xc008af12 || arg1 == 0x4008af20 || arg1 == 0x4008af21 || arg1 == 0x4008af22 || arg1 == 0x4008af60 || arg1 == 0x4004af61
# Disallow mmap with PROT_EXEC set. The syntax here doesn't allow bit
# negation, thus the manually negated mask constant.
mmap: arg2 in 0xfffffffb
mprotect: arg2 in 0xfffffffb
munmap: 1
poll: 1
read: 1
connect: 1
sendto: 1
recvfrom: 1
sched_getaffinity: 1
set_robust_list: 1
sigaltstack: 1
# Disallow clone's other than new threads.
# arg0 is flags. Because kernel.
clone: arg0 & 0x00010000
write: 1
getpid: 1

View file

@ -30,6 +30,7 @@ const DEVICE_FAILED: u32 = 0x80;
const TYPE_NET: u32 = 1;
const TYPE_BLOCK: u32 = 2;
const TYPE_RNG: u32 = 4;
const TYPE_VSOCK: u32 = 19;
const TYPE_WL: u32 = 30;
const INTERRUPT_STATUS_USED_RING: u32 = 0x1;

View file

@ -11,9 +11,11 @@ use sys_util::Error as SysError;
use vhost::Error as VhostError;
mod net;
mod vsock;
mod worker;
pub use self::net::Net;
pub use self::vsock::Vsock;
#[derive(Debug)]
pub enum Error {
@ -57,6 +59,10 @@ pub enum Error {
VhostSetVringKick(VhostError),
/// Net set backend failed.
VhostNetSetBackend(VhostError),
/// Failed to set CID for guest.
VhostVsockSetCid(VhostError),
/// Failed to start vhost-vsock driver.
VhostVsockStart(VhostError),
/// Failed to create vhost eventfd.
VhostIrqCreate(SysError),
/// Failed to read vhost eventfd.

View file

@ -0,0 +1,280 @@
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::Arc;
use std::sync::atomic::AtomicUsize;
use std::thread::spawn;
use byteorder::{ByteOrder, LittleEndian};
use sys_util::{EventFd, GuestMemory};
use vhost::Vsock as VhostVsockHandle;
use virtio_sys::vhost;
use super::{Error, Result};
use super::super::{Queue, VirtioDevice, TYPE_VSOCK};
use super::worker::Worker;
const QUEUE_SIZE: u16 = 256;
const NUM_QUEUES: usize = 3;
const QUEUE_SIZES: &'static [u16] = &[QUEUE_SIZE; NUM_QUEUES];
pub struct Vsock {
worker_kill_evt: Option<EventFd>,
kill_evt: Option<EventFd>,
vhost_handle: Option<VhostVsockHandle>,
cid: u64,
interrupt: Option<EventFd>,
avail_features: u64,
acked_features: u64,
}
impl Vsock {
/// Create a new virtio-vsock device with the given VM cid.
pub fn new(cid: u64, mem: &GuestMemory) -> Result<Vsock> {
let kill_evt = EventFd::new().map_err(Error::CreateKillEventFd)?;
let handle = VhostVsockHandle::new(mem).map_err(Error::VhostOpen)?;
let avail_features =
1 << vhost::VIRTIO_F_NOTIFY_ON_EMPTY | 1 << vhost::VIRTIO_RING_F_INDIRECT_DESC |
1 << vhost::VIRTIO_RING_F_EVENT_IDX | 1 << vhost::VHOST_F_LOG_ALL |
1 << vhost::VIRTIO_F_ANY_LAYOUT | 1 << vhost::VIRTIO_F_VERSION_1;
Ok(Vsock {
worker_kill_evt: Some(kill_evt.try_clone().map_err(Error::CloneKillEventFd)?),
kill_evt: Some(kill_evt),
vhost_handle: Some(handle),
cid: cid,
interrupt: Some(EventFd::new().map_err(Error::VhostIrqCreate)?),
avail_features: avail_features,
acked_features: 0,
})
}
pub fn new_for_testing(cid: u64, features: u64) -> Vsock {
Vsock {
worker_kill_evt: None,
kill_evt: None,
vhost_handle: None,
cid: cid,
interrupt: None,
avail_features: features,
acked_features: 0,
}
}
pub fn acked_features(&self) -> u64 {
self.acked_features
}
}
impl Drop for Vsock {
fn drop(&mut self) {
// Only kill the child if it claimed its eventfd.
if self.worker_kill_evt.is_none() {
if let Some(ref kill_evt) = self.kill_evt {
// Ignore the result because there is nothing we can do about it.
let _ = kill_evt.write(1);
}
}
}
}
impl VirtioDevice for Vsock {
fn keep_fds(&self) -> Vec<RawFd> {
let mut keep_fds = Vec::new();
if let Some(ref handle) = self.vhost_handle {
keep_fds.push(handle.as_raw_fd());
}
if let Some(ref interrupt) = self.interrupt {
keep_fds.push(interrupt.as_raw_fd());
}
if let Some(ref worker_kill_evt) = self.worker_kill_evt {
keep_fds.push(worker_kill_evt.as_raw_fd());
}
keep_fds
}
fn device_type(&self) -> u32 {
TYPE_VSOCK
}
fn queue_max_sizes(&self) -> &[u16] {
QUEUE_SIZES
}
fn features(&self, page: u32) -> u32 {
match page {
// Get the lower 32-bits of the features bitfield.
0 => self.avail_features as u32,
// Get the upper 32-bits of the features bitfield.
1 => (self.avail_features >> 32) as u32,
_ => {
warn!(
"vsock: virtio-vsock got request for features page: {}",
page
);
0u32
},
}
}
fn read_config(&self, offset: u64, data: &mut [u8]) {
match offset {
0 if data.len() == 8 => LittleEndian::write_u64(data, self.cid),
0 if data.len() == 4 => LittleEndian::write_u32(data, (self.cid & 0xffffffff) as u32),
4 if data.len() == 4 => {
LittleEndian::write_u32(data, ((self.cid >> 32) & 0xffffffff) as u32)
},
_ => warn!(
"vsock: virtio-vsock received invalid read request of {} bytes at offset {}",
data.len(),
offset
),
}
}
fn ack_features(&mut self, page: u32, value: u32) {
let mut v = match page {
0 => value as u64,
1 => (value as u64) << 32,
_ => {
warn!(
"vsock: virtio-vsock device cannot ack unknown feature page: {}",
page
);
0u64
},
};
// Check if the guest is ACK'ing a feature that we didn't claim to have.
let unrequested_features = v & !self.avail_features;
if unrequested_features != 0 {
warn!("vsock: virtio-vsock got unknown feature ack: {:x}", v);
// Don't count these features as acked.
v &= !unrequested_features;
}
self.acked_features |= v;
}
fn activate(
&mut self,
_: GuestMemory,
interrupt_evt: EventFd,
status: Arc<AtomicUsize>,
queues: Vec<Queue>,
queue_evts: Vec<EventFd>,
) {
if queues.len() != NUM_QUEUES || queue_evts.len() != NUM_QUEUES {
error!("net: expected {} queues, got {}", NUM_QUEUES, queues.len());
return;
}
if let Some(vhost_handle) = self.vhost_handle.take() {
if let Some(interrupt) = self.interrupt.take() {
if let Some(kill_evt) = self.worker_kill_evt.take() {
let acked_features = self.acked_features;
let cid = self.cid;
spawn(move || {
// The third vq is an event-only vq that is not handled by the vhost
// subsystem (but still needs to exist). Split it off here.
let vhost_queues = queues[..2].to_vec();
let mut worker = Worker::new(
vhost_queues,
vhost_handle,
interrupt,
status,
interrupt_evt,
acked_features,
);
let activate_vqs = |handle: &VhostVsockHandle| -> Result<()> {
handle.set_cid(cid).map_err(Error::VhostVsockSetCid)?;
handle.start().map_err(Error::VhostVsockStart)?;
Ok(())
};
let result = worker.run(queue_evts, QUEUE_SIZES, kill_evt, activate_vqs);
if let Err(e) = result {
error!("vsock worker thread exited with error: {:?}", e);
}
});
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use byteorder::{ByteOrder, LittleEndian};
#[test]
fn ack_features() {
let cid = 5;
let features: u64 = (1 << 20) | (1 << 49) | (1 << 2) | (1 << 19);
let mut acked_features: u64 = 0;
let mut unavailable_features: u64 = 0;
let mut vsock = Vsock::new_for_testing(cid, features);
assert_eq!(acked_features, vsock.acked_features());
acked_features |= 1 << 2;
vsock.ack_features(0, (acked_features & 0xffffffff) as u32);
assert_eq!(acked_features, vsock.acked_features());
acked_features |= 1 << 49;
vsock.ack_features(1, (acked_features >> 32) as u32);
assert_eq!(acked_features, vsock.acked_features());
acked_features |= 1 << 60;
unavailable_features |= 1 << 60;
vsock.ack_features(1, (acked_features >> 32) as u32);
assert_eq!(acked_features & !unavailable_features, vsock.acked_features());
acked_features |= 1 << 1;
unavailable_features |= 1 << 1;
vsock.ack_features(0, (acked_features & 0xffffffff) as u32);
assert_eq!(acked_features & !unavailable_features, vsock.acked_features());
}
#[test]
fn read_config() {
let cid = 0xfca9a559fdcb9756;
let vsock = Vsock::new_for_testing(cid, 0);
let mut buf = [0 as u8; 8];
vsock.read_config(0, &mut buf);
assert_eq!(cid, LittleEndian::read_u64(&buf));
vsock.read_config(0, &mut buf[..4]);
assert_eq!((cid & 0xffffffff) as u32, LittleEndian::read_u32(&buf[..4]));
vsock.read_config(4, &mut buf[..4]);
assert_eq!((cid >> 32) as u32, LittleEndian::read_u32(&buf[..4]));
let data: [u8; 8] = [8, 226, 5, 46, 159, 59, 89, 77];
buf.copy_from_slice(&data);
vsock.read_config(12, &mut buf);
assert_eq!(&buf, &data);
}
#[test]
fn features() {
let cid = 5;
let features: u64 = 0xfc195ae8db88cff9;
let vsock = Vsock::new_for_testing(cid, features);
assert_eq!((features & 0xffffffff) as u32, vsock.features(0));
assert_eq!((features >> 32) as u32, vsock.features(1));
assert_eq!(0, vsock.features(559));
assert_eq!(0, vsock.features(3));
}
}

View file

@ -60,11 +60,14 @@ enum Error {
VhostNetDeviceNew(hw::virtio::vhost::Error),
NetDeviceNew(hw::virtio::NetError),
NetDeviceRootSetup(sys_util::Error),
VhostVsockDeviceNew(hw::virtio::vhost::Error),
VsockDeviceRootSetup(sys_util::Error),
DeviceJail(io_jail::Error),
DevicePivotRoot(io_jail::Error),
RegisterBlock(device_manager::Error),
RegisterNet(device_manager::Error),
RegisterWayland(device_manager::Error),
RegisterVsock(device_manager::Error),
Cmdline(kernel_cmdline::Error),
MissingWayland(PathBuf),
RegisterIrqfd(sys_util::Error),
@ -114,12 +117,17 @@ impl fmt::Display for Error {
}
&Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e),
&Error::VhostNetDeviceNew(ref e) => write!(f, "failed to set up vhost networking: {:?}", e),
&Error::RegisterVsock(ref e) => write!(f, "error registering virtual socket device: {:?}", e),
&Error::NetDeviceNew(ref e) => write!(f, "failed to set up virtio networking: {:?}", e),
&Error::NetDeviceRootSetup(ref e) => {
write!(f, "failed to create root directory for a net device: {:?}", e)
}
&Error::DeviceJail(ref e) => write!(f, "failed to jail device: {}", e),
&Error::DevicePivotRoot(ref e) => write!(f, "failed to pivot root device: {}", e),
&Error::VhostVsockDeviceNew(ref e) => write!(f, "failed to set up virtual socket device: {:?}", e),
&Error::VsockDeviceRootSetup(ref e) => {
write!(f, "failed to create root directory for a vsock device: {:?}", e)
}
&Error::RegisterNet(ref e) => write!(f, "error registering net device: {:?}", e),
&Error::RegisterRng(ref e) => write!(f, "error registering rng device: {:?}", e),
&Error::RngDeviceNew(ref e) => write!(f, "failed to set up rng: {:?}", e),
@ -190,6 +198,7 @@ struct Config {
socket_path: Option<PathBuf>,
multiprocess: bool,
warn_unknown_ports: bool,
cid: Option<u64>,
}
const KERNEL_START_OFFSET: usize = 0x200000;
@ -383,6 +392,24 @@ fn run_config(cfg: Config) -> Result<()> {
}
}
let vsock_root = TempDir::new(&PathBuf::from("/tmp/vsock_root"))
.map_err(Error::VsockDeviceRootSetup)?;
if let Some(cid) = cfg.cid {
let vsock_box = Box::new(hw::virtio::vhost::Vsock::new(cid, &guest_mem)
.map_err(|e| Error::VhostVsockDeviceNew(e))?);
let jail = if cfg.multiprocess {
let root_path = vsock_root.as_path().unwrap();
let policy_path = Path::new("vhost_vsock_device.policy");
Some(create_base_minijail(root_path, policy_path)?)
} else {
None
};
device_manager.register_mmio(vsock_box, jail, &mut cmdline).map_err(Error::RegisterVsock)?;
}
if !cfg.params.is_empty() {
cmdline
.insert_str(cfg.params)
@ -856,6 +883,17 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
"multiprocess" => {
cfg.multiprocess = true;
}
"cid" => {
if cfg.cid.is_some() {
return Err(argument::Error::TooManyArguments("`cid` alread given".to_owned()));
}
cfg.cid = Some(value.unwrap().parse().map_err(|_| {
argument::Error::InvalidValue {
value: value.unwrap().to_owned(),
expected: "this value for `cid` must be an unsigned integer",
}
})?);
}
"help" => return Err(argument::Error::PrintHelp),
_ => unreachable!(),
}
@ -892,6 +930,7 @@ fn run_vm(args: std::env::Args) {
"PATH",
"Path to put the control socket. If PATH is a directory, a name will be generated."),
Argument::short_flag('u', "multiprocess", "Run each device in a child process."),
Argument::value("cid", "CID", "Context ID for virtual sockets"),
Argument::short_flag('h', "help", "Print help message.")];
let mut cfg = Config::default();

View file

@ -8,7 +8,10 @@ extern crate sys_util;
extern crate virtio_sys;
pub mod net;
mod vsock;
pub use net::Net;
pub use vsock::Vsock;
use std::io::Error as IoError;
use std::mem;

View file

@ -4,10 +4,9 @@
use libc;
use net_util;
use std::ffi::CString;
use std::fs::File;
use std::io::Error as IoError;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::fs::{File, OpenOptions};
use std::os::unix::fs::OpenOptionsExt;
use std::os::unix::io::{AsRawFd, RawFd};
use virtio_sys;
use sys_util::{ioctl_with_ref, GuestMemory};
@ -43,19 +42,13 @@ impl Net {
/// # Arguments
/// * `mem` - Guest memory mapping.
pub fn new(mem: &GuestMemory) -> Result<Net> {
// Open calls are safe because we give a constant nul-terminated
// string and verify the result. The CString unwrap is safe because
// DEVICE does not have any embedded '\0' characters.
let fd = unsafe {
libc::open(CString::new(DEVICE).unwrap().as_ptr(),
libc::O_RDWR | libc::O_NONBLOCK | libc::O_CLOEXEC)
};
if fd < 0 {
return Err(Error::VhostOpen(IoError::last_os_error()));
}
Ok(Net {
// There are no other users of this fd, so this is safe.
fd: unsafe { File::from_raw_fd(fd) },
fd: OpenOptions::new()
.read(true)
.write(true)
.custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
.open(DEVICE)
.map_err(Error::VhostOpen)?,
mem: mem.clone(),
})
}

82
vhost/src/vsock.rs Normal file
View file

@ -0,0 +1,82 @@
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use libc;
use std::fs::{File, OpenOptions};
use std::os::unix::fs::OpenOptionsExt;
use std::os::unix::io::{AsRawFd, RawFd};
use sys_util::{ioctl_with_ref, GuestMemory};
use virtio_sys::{VHOST_VSOCK_SET_GUEST_CID, VHOST_VSOCK_SET_RUNNING};
use super::{ioctl_result, Error, Result, Vhost};
static DEVICE: &'static str = "/dev/vhost-vsock";
/// Handle for running VHOST_VSOCK ioctls.
pub struct Vsock {
fd: File,
mem: GuestMemory,
}
impl Vsock {
/// Open a handle to a new VHOST_VSOCK instance.
pub fn new(mem: &GuestMemory) -> Result<Vsock> {
Ok(Vsock {
fd: OpenOptions::new()
.read(true)
.write(true)
.custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
.open(DEVICE)
.map_err(Error::VhostOpen)?,
mem: mem.clone(),
})
}
/// Set the CID for the guest. This number is used for routing all data destined for
/// programs
/// running in the guest.
///
/// # Arguments
/// * `cid` - CID to assign to the guest
pub fn set_cid(&self, cid: u64) -> Result<()> {
let ret = unsafe { ioctl_with_ref(&self.fd, VHOST_VSOCK_SET_GUEST_CID(), &cid) };
if ret < 0 {
return ioctl_result();
}
Ok(())
}
/// Tell the VHOST driver to start performing data transfer.
pub fn start(&self) -> Result<()> {
self.set_running(true)
}
/// Tell the VHOST driver to stop performing data transfer.
pub fn stop(&self) -> Result<()> {
self.set_running(false)
}
fn set_running(&self, running: bool) -> Result<()> {
let on: ::std::os::raw::c_int = if running { 1 } else { 0 };
let ret = unsafe { ioctl_with_ref(&self.fd, VHOST_VSOCK_SET_RUNNING(), &on) };
if ret < 0 {
return ioctl_result();
}
Ok(())
}
}
impl Vhost for Vsock {
fn mem(&self) -> &GuestMemory {
&self.mem
}
}
impl AsRawFd for Vsock {
fn as_raw_fd(&self) -> RawFd {
self.fd.as_raw_fd()
}
}

View file

@ -41,3 +41,5 @@ ioctl_iow_nr!(VHOST_SCSI_CLEAR_ENDPOINT, VHOST, 0x41, vhost_scsi_target);
ioctl_iow_nr!(VHOST_SCSI_GET_ABI_VERSION, VHOST, 0x42, ::std::os::raw::c_int);
ioctl_iow_nr!(VHOST_SCSI_SET_EVENTS_MISSED, VHOST, 0x43, ::std::os::raw::c_uint);
ioctl_iow_nr!(VHOST_SCSI_GET_EVENTS_MISSED, VHOST, 0x44, ::std::os::raw::c_uint);
ioctl_iow_nr!(VHOST_VSOCK_SET_GUEST_CID, VHOST, 0x60, ::std::os::raw::c_ulonglong);
ioctl_iow_nr!(VHOST_VSOCK_SET_RUNNING, VHOST, 0x61, ::std::os::raw::c_int);