crosvm: add vhost_net virtio device

vhost_net is an in-kernel implementation of a virtio_net device. This device
implementation is responsible only for negotiating features and setting up
vhost_net to point to the virtqueues in the guest.

Signed-off-by: Stephen Barber <smbarber@chromium.org>

BUG=chromium:738639
TEST=boot with networking enabled and ping host

Change-Id: I9192765b9348528a8a7a7f273e9522c2373d22fb
Reviewed-on: https://chromium-review.googlesource.com/538106
Commit-Ready: Stephen Barber <smbarber@chromium.org>
Tested-by: Stephen Barber <smbarber@chromium.org>
Reviewed-by: Zach Reizner <zachr@chromium.org>
This commit is contained in:
Stephen Barber 2017-06-21 15:16:11 -07:00 committed by chrome-bot
parent d02ae30698
commit 2cfc2056aa
6 changed files with 450 additions and 1 deletions

View file

@ -15,6 +15,11 @@ x86_64 = { path = "x86_64" }
kernel_loader = { path = "kernel_loader" } kernel_loader = { path = "kernel_loader" }
libc = "0.2.21" libc = "0.2.21"
byteorder = "1" byteorder = "1"
syscall_defines = { path = "syscall_defines" }
net_sys = { path = "net_sys" }
net_util = { path = "net_util" }
vhost = { path = "vhost" }
virtio_sys = { path = "virtio_sys" }
[dependencies.clap] [dependencies.clap]
version = "*" version = "*"

35
net_device.policy Normal file
View file

@ -0,0 +1,35 @@
close: 1
exit_group: 1
futex: 1
# Whitelist vhost_net ioctls only.
# arg1 == VHOST_GET_FEATURES ||
# arg1 == VHOST_SET_FEATURES ||
# arg1 == VHOST_SET_OWNER ||
# arg1 == VHOST_RESET_OWNER ||
# arg1 == VHOST_SET_MEM_TABLE ||
# arg1 == VHOST_SET_LOG_BASE ||
# arg1 == VHOST_SET_LOG_FD ||
# arg1 == VHOST_SET_VRING_NUM ||
# arg1 == VHOST_SET_VRING_ADDR ||
# arg1 == VHOST_SET_VRING_BASE ||
# arg1 == VHOST_GET_VRING_BASE ||
# arg1 == VHOST_SET_VRING_KICK ||
# arg1 == VHOST_SET_VRING_CALL ||
# arg1 == VHOST_SET_VRING_ERR ||
# arg1 == VHOST_NET_SET_BACKEND
ioctl: arg1 == 0x8008af00 || arg1 == 0x4008af00 || arg1 == 0x0000af01 || arg1 == 0x0000af02 || arg1 == 0x4008af03 || arg1 == 0x4008af04 || arg1 == 0x4004af07 || arg1 == 0x4008af10 || arg1 == 0x4028af11 || arg1 == 0x4008af12 || arg1 == 0xc008af12 || arg1 == 0x4008af20 || arg1 == 0x4008af21 || arg1 == 0x4008af22 || arg1 == 0x4008af30
# Disallow mmap with PROT_EXEC set. The syntax here doesn't allow bit
# negation, thus the manually negated mask constant.
mmap: arg2 in 0xfffffffb
mprotect: arg2 in 0xfffffffb
munmap: 1
poll: 1
read: 1
recvfrom: 1
sched_getaffinity: 1
set_robust_list: 1
sigaltstack: 1
# Disallow clone's other than new threads.
# arg0 is flags. Because kernel.
clone: arg0 & 0x00010000
write: 1

View file

@ -7,10 +7,12 @@
mod queue; mod queue;
mod mmio; mod mmio;
mod block; mod block;
mod net;
pub use self::queue::*; pub use self::queue::*;
pub use self::mmio::*; pub use self::mmio::*;
pub use self::block::*; pub use self::block::*;
pub use self::net::*;
const DEVICE_ACKNOWLEDGE: u32 = 0x01; const DEVICE_ACKNOWLEDGE: u32 = 0x01;
const DEVICE_DRIVER: u32 = 0x02; const DEVICE_DRIVER: u32 = 0x02;
@ -18,6 +20,7 @@ const DEVICE_DRIVER_OK: u32 = 0x04;
const DEVICE_FEATURES_OK: u32 = 0x08; const DEVICE_FEATURES_OK: u32 = 0x08;
const DEVICE_FAILED: u32 = 0x80; const DEVICE_FAILED: u32 = 0x80;
const TYPE_NET: u32 = 1;
const TYPE_BLOCK: u32 = 2; const TYPE_BLOCK: u32 = 2;
const INTERRUPT_STATUS_USED_RING: u32 = 0x1; const INTERRUPT_STATUS_USED_RING: u32 = 0x1;

340
src/hw/virtio/net.rs Normal file
View file

@ -0,0 +1,340 @@
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::mem;
use std::net::Ipv4Addr;
use std::os::raw::*;
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread::spawn;
use net_sys;
use net_util::{Tap, Error as TapError};
use sys_util::{Error as SysError, EventFd, GuestMemory, Poller};
use vhost::{VhostNet, Error as VhostError};
use virtio_sys::{vhost, virtio_net};
use virtio_sys::virtio_net::virtio_net_hdr_mrg_rxbuf;
use super::{VirtioDevice, Queue, INTERRUPT_STATUS_USED_RING, TYPE_NET};
const QUEUE_SIZE: u16 = 256;
const QUEUE_SIZES: &'static [u16] = &[QUEUE_SIZE, QUEUE_SIZE];
#[derive(Debug)]
pub enum NetError {
/// Creating kill eventfd failed.
CreateKillEventFd(SysError),
/// Cloning kill eventfd failed.
CloneKillEventFd(SysError),
/// Open tap device failed.
TapOpen(TapError),
/// Setting tap IP failed.
TapSetIp(TapError),
/// Setting tap netmask failed.
TapSetNetmask(TapError),
/// Setting tap interface offload flags failed.
TapSetOffload(TapError),
/// Setting vnet header size failed.
TapSetVnetHdrSize(TapError),
/// Enabling tap interface failed.
TapEnable(TapError),
/// Open vhost-net device failed.
VhostOpen(VhostError),
/// Set owner failed.
VhostSetOwner(VhostError),
/// Get features failed.
VhostGetFeatures(VhostError),
/// Set features failed.
VhostSetFeatures(VhostError),
/// Set mem table failed.
VhostSetMemTable(VhostError),
/// Set vring num failed.
VhostSetVringNum(VhostError),
/// Set vring addr failed.
VhostSetVringAddr(VhostError),
/// Set vring base failed.
VhostSetVringBase(VhostError),
/// Set vring call failed.
VhostSetVringCall(VhostError),
/// Set vring kick failed.
VhostSetVringKick(VhostError),
/// Net set backend failed.
VhostNetSetBackend(VhostError),
/// Failed to create vhost eventfd.
VhostIrqCreate(SysError),
/// Failed to read vhost eventfd.
VhostIrqRead(SysError),
/// Error while polling for events.
PollError(SysError),
}
struct Worker {
queues: Vec<Queue>,
tap: Tap,
vhost_net: VhostNet,
vhost_interrupt: EventFd,
interrupt_status: Arc<AtomicUsize>,
interrupt_evt: EventFd,
acked_features: u64,
}
impl Worker {
fn signal_used_queue(&self) {
self.interrupt_status
.fetch_or(INTERRUPT_STATUS_USED_RING as usize, Ordering::SeqCst);
self.interrupt_evt.write(1).unwrap();
}
fn run(&mut self, queue_evts: Vec<EventFd>, kill_evt: EventFd) -> Result<(), NetError> {
// Preliminary setup for vhost net.
self.vhost_net.set_owner().map_err(NetError::VhostSetOwner)?;
let avail_features = self.vhost_net
.get_features()
.map_err(NetError::VhostGetFeatures)?;
let features: c_ulonglong = self.acked_features & avail_features;
self.vhost_net
.set_features(features)
.map_err(NetError::VhostSetFeatures)?;
self.vhost_net
.set_mem_table()
.map_err(NetError::VhostSetMemTable)?;
for (queue_index, ref queue) in self.queues.iter().enumerate() {
self.vhost_net
.set_vring_num(queue_index, queue.max_size)
.map_err(NetError::VhostSetVringNum)?;
self.vhost_net
.set_vring_addr(QUEUE_SIZES[queue_index],
queue.actual_size(),
queue_index,
0,
queue.desc_table,
queue.used_ring,
queue.avail_ring,
None)
.map_err(NetError::VhostSetVringAddr)?;
self.vhost_net
.set_vring_base(queue_index, 0)
.map_err(NetError::VhostSetVringBase)?;
self.vhost_net
.set_vring_call(queue_index, &self.vhost_interrupt)
.map_err(NetError::VhostSetVringCall)?;
self.vhost_net
.set_vring_kick(queue_index, &queue_evts[queue_index])
.map_err(NetError::VhostSetVringKick)?;
self.vhost_net
.net_set_backend(queue_index, &self.tap)
.map_err(NetError::VhostNetSetBackend)?;
}
const VHOST_IRQ: u32 = 1;
const KILL: u32 = 2;
let mut poller = Poller::new(2);
'poll: loop {
let tokens =
match poller.poll(&[(VHOST_IRQ, &self.vhost_interrupt), (KILL, &kill_evt)]) {
Ok(v) => v,
Err(e) => return Err(NetError::PollError(e))
};
let mut needs_interrupt = false;
for &token in tokens {
match token {
VHOST_IRQ => {
needs_interrupt = true;
self.vhost_interrupt.read().map_err(NetError::VhostIrqRead)?;
}
KILL => break 'poll,
_ => unreachable!(),
}
}
if needs_interrupt {
self.signal_used_queue();
}
}
Ok(())
}
}
pub struct Net {
workers_kill_evt: Option<EventFd>,
kill_evt: EventFd,
tap: Option<Tap>,
vhost_net: Option<VhostNet>,
vhost_interrupt: Option<EventFd>,
avail_features: u64,
acked_features: u64,
}
impl Net {
/// Create a new virtio network device with the given IP address and
/// netmask.
pub fn new(ip_addr: Ipv4Addr,
netmask: Ipv4Addr,
mem: &GuestMemory) -> Result<Net, NetError> {
let kill_evt = EventFd::new().map_err(NetError::CreateKillEventFd)?;
let tap = Tap::new().map_err(NetError::TapOpen)?;
tap.set_ip_addr(ip_addr).map_err(NetError::TapSetIp)?;
tap.set_netmask(netmask).map_err(NetError::TapSetNetmask)?;
// Set offload flags to match the virtio features below.
tap.set_offload(net_sys::TUN_F_CSUM |
net_sys::TUN_F_UFO |
net_sys::TUN_F_TSO4 |
net_sys::TUN_F_TSO6)
.map_err(NetError::TapSetOffload)?;
// We declare VIRTIO_NET_F_MRG_RXBUF, so set the vnet hdr size to match.
let vnet_hdr_size = mem::size_of::<virtio_net_hdr_mrg_rxbuf>() as i32;
tap.set_vnet_hdr_size(vnet_hdr_size)
.map_err(NetError::TapSetVnetHdrSize)?;
tap.enable().map_err(NetError::TapEnable)?;
let vhost_net = VhostNet::new(mem).map_err(NetError::VhostOpen)?;
let avail_features =
1 << virtio_net::VIRTIO_NET_F_GUEST_CSUM |
1 << virtio_net::VIRTIO_NET_F_CSUM |
1 << virtio_net::VIRTIO_NET_F_GUEST_TSO4 |
1 << virtio_net::VIRTIO_NET_F_GUEST_UFO |
1 << virtio_net::VIRTIO_NET_F_HOST_TSO4 |
1 << virtio_net::VIRTIO_NET_F_HOST_UFO |
1 << virtio_net::VIRTIO_NET_F_MRG_RXBUF |
1 << vhost::VIRTIO_RING_F_INDIRECT_DESC |
1 << vhost::VIRTIO_RING_F_EVENT_IDX |
1 << vhost::VIRTIO_F_NOTIFY_ON_EMPTY |
1 << vhost::VIRTIO_F_VERSION_1;
Ok(Net {
workers_kill_evt: Some(kill_evt.try_clone().map_err(NetError::CloneKillEventFd)?),
kill_evt: kill_evt,
tap: Some(tap),
vhost_net: Some(vhost_net),
vhost_interrupt: Some(EventFd::new().map_err(NetError::VhostIrqCreate)?),
avail_features: avail_features,
acked_features: 0u64,
})
}
}
impl Drop for Net {
fn drop(&mut self) {
// Only kill the child if it claimed its eventfd.
if self.workers_kill_evt.is_none() {
// Ignore the result because there is nothing we can do about it.
let _ = self.kill_evt.write(1);
}
}
}
impl VirtioDevice for Net {
fn keep_fds(&self) -> Vec<RawFd> {
let mut keep_fds = Vec::new();
if let Some(ref tap) = self.tap {
keep_fds.push(tap.as_raw_fd());
}
if let Some(ref vhost_net) = self.vhost_net {
keep_fds.push(vhost_net.as_raw_fd());
}
if let Some(ref vhost_interrupt) = self.vhost_interrupt {
keep_fds.push(vhost_interrupt.as_raw_fd());
}
if let Some(ref workers_kill_evt) = self.workers_kill_evt {
keep_fds.push(workers_kill_evt.as_raw_fd());
}
keep_fds
}
fn device_type(&self) -> u32 {
TYPE_NET
}
fn queue_max_sizes(&self) -> &[u16] {
QUEUE_SIZES
}
fn features(&self, page: u32) -> u32 {
match page {
0 => self.avail_features as u32,
1 => (self.avail_features >> 32) as u32,
_ => {
warn!("net: virtio net got request for features page: {}", page);
0u32
}
}
}
fn ack_features(&mut self, page: u32, value: u32) {
let mut v = match page {
0 => value as u64,
1 => (value as u64) << 32,
_ => {
warn!("net: virtio net device cannot ack unknown feature page: {}",
page);
0u64
}
};
// Check if the guest is ACK'ing a feature that we didn't claim to have.
let unrequested_features = v & !self.avail_features;
if unrequested_features != 0 {
warn!("net: virtio net got unknown feature ack: {:x}", v);
// Don't count these features as acked.
v &= !unrequested_features;
}
self.acked_features |= v;
}
fn activate(&mut self,
_: GuestMemory,
interrupt_evt: EventFd,
status: Arc<AtomicUsize>,
queues: Vec<Queue>,
queue_evts: Vec<EventFd>) {
if queues.len() != 2 || queue_evts.len() != 2 {
error!("net: expected 2 queues, got {}", queues.len());
return;
}
if let Some(vhost_net) = self.vhost_net.take() {
if let Some(tap) = self.tap.take() {
if let Some(vhost_interrupt) = self.vhost_interrupt.take() {
if let Some(kill_evt) = self.workers_kill_evt.take() {
let acked_features = self.acked_features;
spawn(move || {
let mut worker = Worker {
queues: queues,
tap: tap,
vhost_net: vhost_net,
vhost_interrupt: vhost_interrupt,
interrupt_status: status,
interrupt_evt: interrupt_evt,
acked_features: acked_features,
};
let result = worker.run(queue_evts, kill_evt);
if let Err(e) = result {
error!("net worker thread exited with error: {:?}",
e);
}
});
}
}
}
}
}
}

View file

@ -198,7 +198,9 @@ impl Queue {
} }
} }
fn actual_size(&self) -> u16 { /// Return the actual size of the queue, as the driver may not set up a
/// queue as big as the device allows.
pub fn actual_size(&self) -> u16 {
min(self.size, self.max_size) min(self.size, self.max_size)
} }

View file

@ -12,11 +12,16 @@ extern crate x86_64;
extern crate kernel_loader; extern crate kernel_loader;
extern crate byteorder; extern crate byteorder;
#[macro_use] extern crate sys_util; #[macro_use] extern crate sys_util;
extern crate net_sys;
extern crate net_util;
extern crate vhost;
extern crate virtio_sys;
use std::ffi::{CString, CStr}; use std::ffi::{CString, CStr};
use std::fmt; use std::fmt;
use std::fs::File; use std::fs::File;
use std::io::{stdin, stdout}; use std::io::{stdin, stdout};
use std::net;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::ptr; use std::ptr;
use std::string::String; use std::string::String;
@ -45,9 +50,14 @@ enum Error {
Disk(std::io::Error), Disk(std::io::Error),
BlockDeviceNew(sys_util::Error), BlockDeviceNew(sys_util::Error),
BlockDeviceRootSetup(sys_util::Error), BlockDeviceRootSetup(sys_util::Error),
NetDeviceNew(hw::virtio::NetError),
NetDeviceRootSetup(sys_util::Error),
MacAddressNeedsNetConfig,
NetMissingConfig,
DeviceJail(io_jail::Error), DeviceJail(io_jail::Error),
DevicePivotRoot(io_jail::Error), DevicePivotRoot(io_jail::Error),
RegisterBlock(device_manager::Error), RegisterBlock(device_manager::Error),
RegisterNet(device_manager::Error),
Cmdline(kernel_cmdline::Error), Cmdline(kernel_cmdline::Error),
RegisterIoevent(sys_util::Error), RegisterIoevent(sys_util::Error),
RegisterIrqfd(sys_util::Error), RegisterIrqfd(sys_util::Error),
@ -89,8 +99,15 @@ impl fmt::Display for Error {
write!(f, "failed to create root directory for a block device: {:?}", e) write!(f, "failed to create root directory for a block device: {:?}", e)
} }
&Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e), &Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e),
&Error::NetDeviceNew(ref e) => write!(f, "failed to set up networking: {:?}", e),
&Error::NetDeviceRootSetup(ref e) => {
write!(f, "failed to create root directory for a net device: {:?}", e)
}
&Error::MacAddressNeedsNetConfig => write!(f, "MAC address can only be specified when host IP and netmask are provided"),
&Error::NetMissingConfig => write!(f, "networking requires both host IP and netmask specified"),
&Error::DeviceJail(ref e) => write!(f, "failed to jail device: {:?}", e), &Error::DeviceJail(ref e) => write!(f, "failed to jail device: {:?}", e),
&Error::DevicePivotRoot(ref e) => write!(f, "failed to pivot root device: {:?}", e), &Error::DevicePivotRoot(ref e) => write!(f, "failed to pivot root device: {:?}", e),
&Error::RegisterNet(ref e) => write!(f, "error registering net device: {:?}", e),
&Error::Cmdline(ref e) => write!(f, "the given kernel command line was invalid: {}", e), &Error::Cmdline(ref e) => write!(f, "the given kernel command line was invalid: {}", e),
&Error::RegisterIoevent(ref e) => write!(f, "error registering ioevent: {:?}", e), &Error::RegisterIoevent(ref e) => write!(f, "error registering ioevent: {:?}", e),
&Error::RegisterIrqfd(ref e) => write!(f, "error registering irqfd: {:?}", e), &Error::RegisterIrqfd(ref e) => write!(f, "error registering irqfd: {:?}", e),
@ -114,6 +131,9 @@ struct Config {
memory: Option<usize>, memory: Option<usize>,
kernel_image: File, kernel_image: File,
params: Option<String>, params: Option<String>,
host_ip: Option<net::Ipv4Addr>,
netmask: Option<net::Ipv4Addr>,
mac_address: Option<String>,
socket_path: Option<String>, socket_path: Option<String>,
multiprocess: bool, multiprocess: bool,
warn_unknown_ports: bool, warn_unknown_ports: bool,
@ -185,6 +205,15 @@ fn wait_all_children() -> bool {
} }
fn run_config(cfg: Config) -> Result<()> { fn run_config(cfg: Config) -> Result<()> {
if cfg.mac_address.is_some() &&
(cfg.netmask.is_none() || cfg.host_ip.is_none()) {
return Err(Error::MacAddressNeedsNetConfig);
}
if cfg.netmask.is_some() != cfg.host_ip.is_some() {
return Err(Error::NetMissingConfig);
}
let socket = if let Some(ref socket_path) = cfg.socket_path { let socket = if let Some(ref socket_path) = cfg.socket_path {
Some(ControlSocketRecv::new(socket_path) Some(ControlSocketRecv::new(socket_path)
.map_err(|e| Error::Socket(e))?) .map_err(|e| Error::Socket(e))?)
@ -226,6 +255,26 @@ fn run_config(cfg: Config) -> Result<()> {
.map_err(Error::RegisterBlock)?; .map_err(Error::RegisterBlock)?;
} }
// We checked above that if the IP is defined, then the netmask is, too.
let net_root = TempDir::new(&PathBuf::from("/tmp/net_root"))
.map_err(Error::NetDeviceRootSetup)?;
if let Some(host_ip) = cfg.host_ip {
if let Some(netmask) = cfg.netmask {
let net_box = Box::new(hw::virtio::Net::new(host_ip, netmask, &guest_mem)
.map_err(|e| Error::NetDeviceNew(e))?);
let jail = if cfg.multiprocess {
let net_root_path = net_root.as_path().unwrap(); // Won't fail if new succeeded.
Some(create_base_minijail(net_root_path, Path::new("net_device.policy"))?)
}
else {
None
};
device_manager.register_mmio(net_box, jail, &mut cmdline).map_err(Error::RegisterNet)?;
}
}
if let Some(params) = cfg.params { if let Some(params) = cfg.params {
cmdline cmdline
.insert_str(params) .insert_str(params)
@ -586,6 +635,18 @@ fn main() {
.short("u") .short("u")
.long("multiprocess") .long("multiprocess")
.help("run the devices in a child process")) .help("run the devices in a child process"))
.arg(Arg::with_name("host_ip")
.long("host_ip")
.value_name("HOST_IP")
.help("IP address to assign to host tap interface"))
.arg(Arg::with_name("netmask")
.long("netmask")
.value_name("NETMASK")
.help("netmask for VM subnet"))
.arg(Arg::with_name("mac")
.long("mac")
.value_name("MAC")
.help("mac address for VM"))
.arg(Arg::with_name("socket") .arg(Arg::with_name("socket")
.short("s") .short("s")
.long("socket") .long("socket")
@ -624,6 +685,9 @@ fn main() {
.expect("Expected kernel image path to be valid"), .expect("Expected kernel image path to be valid"),
params: matches.value_of("params").map(|s| s.to_string()), params: matches.value_of("params").map(|s| s.to_string()),
multiprocess: matches.is_present("multiprocess"), multiprocess: matches.is_present("multiprocess"),
host_ip: matches.value_of("host_ip").and_then(|v| v.parse().ok()),
netmask: matches.value_of("netmask").and_then(|v| v.parse().ok()),
mac_address: matches.value_of("mac").map(|s| s.to_string()),
socket_path: matches.value_of("socket").map(|s| s.to_string()), socket_path: matches.value_of("socket").map(|s| s.to_string()),
warn_unknown_ports: matches.is_present("warn-unknown-ports"), warn_unknown_ports: matches.is_present("warn-unknown-ports"),
}; };