crosvm: refactor linux vm running sequence

The old sequence used tail cails to foward resources to the next stage
of the process, making it cumbersome to add new resources to earlier
stages. Certain tasks, such as IO port bus creation or irqfd
registration, were delayed to late stages for no good reason, causing
the stage names to have no correspondence with their contents.

The new sequence organizes setup into logical functions that only receive
and return resources that make sense for their operations.

BUG=None
TEST=./build_test; cargo run

Change-Id: Idaf1488da515e2473125333bf997638da60fa992
Reviewed-on: https://chromium-review.googlesource.com/822927
Commit-Ready: Zach Reizner <zachr@chromium.org>
Tested-by: Zach Reizner <zachr@chromium.org>
Reviewed-by: Zach Reizner <zachr@chromium.org>
This commit is contained in:
Zach Reizner 2017-12-13 16:04:39 -08:00 committed by chrome-bot
parent d3a7a1f63e
commit 8fb5211c3b

View file

@ -6,10 +6,9 @@ use std;
use std::ffi::{CString, CStr};
use std::fmt;
use std::fs::{File, OpenOptions, remove_file};
use std::io::{stdin, stdout};
use std::io::{self, stdin, stdout};
use std::os::unix::net::UnixDatagram;
use std::path::{Path, PathBuf};
use std::string::String;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, Barrier};
use std::thread;
@ -17,7 +16,7 @@ use std::thread::JoinHandle;
use libc;
use device_manager::{self, DeviceManager};
use device_manager;
use devices;
use io_jail::{self, Minijail};
use kernel_cmdline;
@ -34,99 +33,118 @@ use x86_64;
pub enum Error {
BalloonDeviceNew(devices::virtio::BalloonError),
OpenKernel(PathBuf, std::io::Error),
Socket(std::io::Error),
Disk(std::io::Error),
BlockDeviceNew(sys_util::Error),
VhostNetDeviceNew(devices::virtio::vhost::Error),
NetDeviceNew(devices::virtio::NetError),
NoVarEmpty,
VhostVsockDeviceNew(devices::virtio::vhost::Error),
ChownWaylandRoot(sys_util::Error),
CloneEventFd(sys_util::Error),
Cmdline(kernel_cmdline::Error),
CreateEventFd(sys_util::Error),
CreateGuestMemory(sys_util::GuestMemoryError),
CreateIrqChip(sys_util::Error),
CreateKvm(sys_util::Error),
CreatePit(sys_util::Error),
CreateSignalFd(sys_util::SignalFdError),
CreateSocket(io::Error),
CreateVcpu(sys_util::Error),
CreateVm(sys_util::Error),
DeviceJail(io_jail::Error),
DevicePivotRoot(io_jail::Error),
Disk(io::Error),
GetWaylandGroup(sys_util::Error),
LoadCmdline(kernel_loader::Error),
LoadKernel(kernel_loader::Error),
NetDeviceNew(devices::virtio::NetError),
NoVarEmpty,
OpenKernel(PathBuf, io::Error),
RegisterBalloon(device_manager::Error),
RegisterBlock(device_manager::Error),
RegisterNet(device_manager::Error),
RegisterWayland(device_manager::Error),
RegisterVsock(device_manager::Error),
Cmdline(kernel_cmdline::Error),
GetWaylandGroup(sys_util::Error),
SettingUidMap(io_jail::Error),
SettingGidMap(io_jail::Error),
ChownWaylandRoot(sys_util::Error),
RegisterIrqfd(sys_util::Error),
RegisterNet(device_manager::Error),
RegisterRng(device_manager::Error),
RegisterVsock(device_manager::Error),
RegisterWayland(device_manager::Error),
RngDeviceNew(devices::virtio::RngError),
KernelLoader(kernel_loader::Error),
SettingGidMap(io_jail::Error),
SettingUidMap(io_jail::Error),
SetTssAddr(sys_util::Error),
SignalFd(sys_util::SignalFdError),
SpawnVcpu(io::Error),
VhostNetDeviceNew(devices::virtio::vhost::Error),
VhostVsockDeviceNew(devices::virtio::vhost::Error),
WaylandDeviceNew(sys_util::Error),
WaylandTempDir(sys_util::Error),
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
ConfigureSystem(x86_64::Error),
EventFd(sys_util::Error),
SignalFd(sys_util::SignalFdError),
Kvm(sys_util::Error),
Vm(sys_util::Error),
Vcpu(sys_util::Error),
SpawnVcpu(std::io::Error),
Sys(sys_util::Error),
}
impl std::convert::From<kernel_loader::Error> for Error {
fn from(e: kernel_loader::Error) -> Error {
Error::KernelLoader(e)
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
impl std::convert::From<x86_64::Error> for Error {
fn from(e: x86_64::Error) -> Error {
Error::ConfigureSystem(e)
}
}
impl std::convert::From<sys_util::Error> for Error {
fn from(e: sys_util::Error) -> Error {
Error::Sys(e)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
ConfigureVcpu(x86_64::Error),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
&Error::BalloonDeviceNew(ref e) => write!(f, "failed to create balloon: {:?}", e),
&Error::OpenKernel(ref p, ref e) => write!(f, "failed to open kernel image {:?}: {}", p, e),
&Error::Socket(ref e) => write!(f, "failed to create socket: {}", e),
&Error::Disk(ref e) => write!(f, "failed to load disk image: {}", e),
&Error::BlockDeviceNew(ref e) => write!(f, "failed to create block device: {:?}", e),
&Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e),
&Error::VhostNetDeviceNew(ref e) => write!(f, "failed to set up vhost networking: {:?}", e),
&Error::RegisterVsock(ref e) => write!(f, "error registering virtual socket device: {:?}", e),
&Error::NetDeviceNew(ref e) => write!(f, "failed to set up virtio networking: {:?}", e),
&Error::NoVarEmpty => write!(f, "/var/empty doesn't exist, can't jail devices."),
&Error::ChownWaylandRoot(ref e) => {
write!(f, "error chowning wayland root directory: {:?}", e)
}
&Error::CloneEventFd(ref e) => write!(f, "failed to clone eventfd: {:?}", e),
&Error::Cmdline(ref e) => write!(f, "the given kernel command line was invalid: {}", e),
&Error::CreateEventFd(ref e) => write!(f, "failed to create eventfd: {:?}", e),
&Error::CreateGuestMemory(ref e) => write!(f, "failed to create guest memory: {:?}", e),
&Error::CreateIrqChip(ref e) => {
write!(f, "failed to create in-kernel IRQ chip: {:?}", e)
}
&Error::CreateKvm(ref e) => write!(f, "failed to open /dev/kvm: {:?}", e),
&Error::CreatePit(ref e) => write!(f, "failed to create in-kernel PIT: {:?}", e),
&Error::CreateSignalFd(ref e) => write!(f, "failed to create signalfd: {:?}", e),
&Error::CreateSocket(ref e) => write!(f, "failed to create socket: {}", e),
&Error::CreateVcpu(ref e) => write!(f, "failed to create VCPU: {:?}", e),
&Error::CreateVm(ref e) => write!(f, "failed to create KVM VM object: {:?}", e),
&Error::DeviceJail(ref e) => write!(f, "failed to jail device: {}", e),
&Error::DevicePivotRoot(ref e) => write!(f, "failed to pivot root device: {}", e),
&Error::VhostVsockDeviceNew(ref e) => write!(f, "failed to set up virtual socket device: {:?}", e),
&Error::Disk(ref e) => write!(f, "failed to load disk image: {}", e),
&Error::GetWaylandGroup(ref e) => {
write!(f, "could not find gid for wayland group: {:?}", e)
}
&Error::LoadCmdline(ref e) => write!(f, "error loading kernel command line: {:?}", e),
&Error::LoadKernel(ref e) => write!(f, "error loading kernel: {:?}", e),
&Error::NetDeviceNew(ref e) => write!(f, "failed to set up virtio networking: {:?}", e),
&Error::NoVarEmpty => write!(f, "/var/empty doesn't exist, can't jail devices."),
&Error::OpenKernel(ref p, ref e) => {
write!(f, "failed to open kernel image {:?}: {}", p, e)
}
&Error::RegisterBalloon(ref e) => {
write!(f, "error registering balloon device: {:?}", e)
},
&Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e),
&Error::RegisterIrqfd(ref e) => write!(f, "error registering irqfd: {:?}", e),
&Error::RegisterNet(ref e) => write!(f, "error registering net device: {:?}", e),
&Error::RegisterRng(ref e) => write!(f, "error registering rng device: {:?}", e),
&Error::RngDeviceNew(ref e) => write!(f, "failed to set up rng: {:?}", e),
&Error::RegisterVsock(ref e) => {
write!(f, "error registering virtual socket device: {:?}", e)
}
&Error::RegisterWayland(ref e) => write!(f, "error registering wayland device: {}", e),
&Error::SettingUidMap(ref e) => write!(f, "error setting UID map: {}", e),
&Error::RngDeviceNew(ref e) => write!(f, "failed to set up rng: {:?}", e),
&Error::SettingGidMap(ref e) => write!(f, "error setting GID map: {}", e),
&Error::ChownWaylandRoot(ref e) => write!(f, "error chowning wayland root directory: {:?}", e),
&Error::Cmdline(ref e) => write!(f, "the given kernel command line was invalid: {}", e),
&Error::GetWaylandGroup(ref e) => write!(f, "could not find gid for wayland group: {:?}", e),
&Error::RegisterIrqfd(ref e) => write!(f, "error registering irqfd: {:?}", e),
&Error::KernelLoader(ref e) => write!(f, "error loading kernel: {:?}", e),
&Error::SettingUidMap(ref e) => write!(f, "error setting UID map: {}", e),
&Error::SetTssAddr(ref e) => write!(f, "failed to set TSS address: {:?}", e),
&Error::SignalFd(ref e) => write!(f, "failed to read signal fd: {:?}", e),
&Error::SpawnVcpu(ref e) => write!(f, "failed to spawn VCPU thread: {:?}", e),
&Error::VhostNetDeviceNew(ref e) => {
write!(f, "failed to set up vhost networking: {:?}", e)
}
&Error::VhostVsockDeviceNew(ref e) => {
write!(f, "failed to set up virtual socket device: {:?}", e)
}
&Error::WaylandDeviceNew(ref e) => {
write!(f, "failed to create wayland device: {:?}", e)
}
&Error::WaylandTempDir(ref e) => {
write!(f, "failed to create wayland device jail directroy: {:?}", e)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
&Error::ConfigureSystem(ref e) => write!(f, "error configuring system: {:?}", e),
&Error::EventFd(ref e) => write!(f, "error creating EventFd: {:?}", e),
&Error::SignalFd(ref e) => write!(f, "error with SignalFd: {:?}", e),
&Error::Kvm(ref e) => write!(f, "error creating Kvm: {:?}", e),
&Error::Vm(ref e) => write!(f, "error creating Vm: {:?}", e),
&Error::Vcpu(ref e) => write!(f, "error creating Vcpu: {:?}", e),
&Error::SpawnVcpu(ref e) => write!(f, "error creating spawning Vcpu: {}", e),
&Error::Sys(ref e) => write!(f, "error with system call: {:?}", e),
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
&Error::ConfigureVcpu(ref e) => write!(f, "failed to configure vcpu: {:?}", e),
}
}
}
@ -181,15 +199,86 @@ fn create_base_minijail(root: &Path, seccomp_policy: &Path) -> Result<Minijail>
Ok(j)
}
fn setup_mmio_bus(vm: &mut Vm,
guest_mem: &GuestMemory,
cfg: Config,
fn setup_memory(memory: Option<usize>) -> Result<GuestMemory> {
let mem_size = memory.unwrap_or(256) << 20;
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
let arch_mem_regions = vec![(GuestAddress(0), mem_size)];
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let arch_mem_regions = x86_64::arch_memory_regions(mem_size);
GuestMemory::new(&arch_mem_regions).map_err(Error::CreateGuestMemory)
}
fn setup_io_bus(vm: &mut Vm,
exit_evt: EventFd)
-> Result<(devices::Bus, Arc<Mutex<devices::Serial>>)> {
struct NoDevice;
impl devices::BusDevice for NoDevice {}
let mut io_bus = devices::Bus::new();
let com_evt_1_3 = EventFd::new().map_err(Error::CreateEventFd)?;
let com_evt_2_4 = EventFd::new().map_err(Error::CreateEventFd)?;
let stdio_serial =
Arc::new(Mutex::new(devices::Serial::new_out(com_evt_1_3
.try_clone()
.map_err(Error::CloneEventFd)?,
Box::new(stdout()))));
let nul_device = Arc::new(Mutex::new(NoDevice));
io_bus.insert(stdio_serial.clone(), 0x3f8, 0x8).unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Serial::new_sink(com_evt_2_4
.try_clone()
.map_err(Error::CloneEventFd)?))),
0x2f8,
0x8)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Serial::new_sink(com_evt_1_3
.try_clone()
.map_err(Error::CloneEventFd)?))),
0x3e8,
0x8)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Serial::new_sink(com_evt_2_4
.try_clone()
.map_err(Error::CloneEventFd)?))),
0x2e8,
0x8)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Cmos::new())), 0x70, 0x2)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::I8042Device::new(exit_evt
.try_clone()
.map_err(Error::CloneEventFd)?))),
0x061,
0x4)
.unwrap();
io_bus.insert(nul_device.clone(), 0x040, 0x8).unwrap(); // ignore pit
io_bus.insert(nul_device.clone(), 0x0ed, 0x1).unwrap(); // most likely this one does nothing
io_bus.insert(nul_device.clone(), 0x0f0, 0x2).unwrap(); // ignore fpu
io_bus.insert(nul_device.clone(), 0xcf8, 0x8).unwrap(); // ignore pci
vm.register_irqfd(&com_evt_1_3, 4)
.map_err(Error::RegisterIrqfd)?;
vm.register_irqfd(&com_evt_2_4, 3)
.map_err(Error::RegisterIrqfd)?;
Ok((io_bus, stdio_serial))
}
fn setup_mmio_bus(cfg: &Config,
vm: &mut Vm,
mem: &GuestMemory,
cmdline: &mut kernel_cmdline::Cmdline,
control_sockets: &mut Vec<UnlinkUnixDatagram>,
balloon_device_socket: UnixDatagram)
-> Result<devices::Bus> {
static DEFAULT_PIVOT_ROOT: &'static str = "/var/empty";
let mut device_manager = DeviceManager::new(vm, guest_mem.clone(), 0x1000, 0xd0000000, 5);
let mut device_manager =
device_manager::DeviceManager::new(vm, mem.clone(), 0x1000, 0xd0000000, 5);
// An empty directory for jailed device's pivot root.
let empty_root_path = Path::new(DEFAULT_PIVOT_ROOT);
@ -197,11 +286,11 @@ fn setup_mmio_bus(vm: &mut Vm,
return Err(Error::NoVarEmpty);
}
for disk in cfg.disks {
for disk in &cfg.disks {
let disk_image = OpenOptions::new()
.read(true)
.write(disk.writable)
.open(disk.path)
.open(&disk.path)
.map_err(|e| Error::Disk(e))?;
let block_box = Box::new(devices::virtio::Block::new(disk_image)
@ -245,8 +334,8 @@ fn setup_mmio_bus(vm: &mut Vm,
if let Some(host_ip) = cfg.host_ip {
if let Some(netmask) = cfg.netmask {
let net_box: Box<devices::virtio::VirtioDevice> = if cfg.vhost_net {
Box::new(devices::virtio::vhost::Net::new(host_ip, netmask, &guest_mem)
.map_err(|e| Error::VhostNetDeviceNew(e))?)
Box::new(devices::virtio::vhost::Net::new(host_ip, netmask, &mem)
.map_err(Error::VhostNetDeviceNew)?)
} else {
Box::new(devices::virtio::Net::new(host_ip, netmask)
.map_err(|e| Error::NetDeviceNew(e))?)
@ -271,18 +360,19 @@ fn setup_mmio_bus(vm: &mut Vm,
}
}
let wl_root = TempDir::new(&PathBuf::from("/tmp/wl_root"))?;
if let Some(wayland_socket_path) = cfg.wayland_socket_path {
let wl_root = TempDir::new(&PathBuf::from("/tmp/wl_root")).map_err(Error::WaylandTempDir)?;
if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() {
let jailed_wayland_path = Path::new("/wayland-0");
let (host_socket, device_socket) = UnixDatagram::pair().map_err(Error::Socket)?;
let (host_socket, device_socket) = UnixDatagram::pair().map_err(Error::CreateSocket)?;
control_sockets.push(UnlinkUnixDatagram(host_socket));
let wl_box = Box::new(devices::virtio::Wl::new(if cfg.multiprocess {
&jailed_wayland_path
} else {
wayland_socket_path.as_path()
},
device_socket)?);
&jailed_wayland_path
} else {
wayland_socket_path.as_path()
},
device_socket)
.map_err(Error::WaylandDeviceNew)?);
let jail = if cfg.multiprocess {
let wl_root_path = wl_root.as_path().unwrap(); // Won't fail if new succeeded.
@ -296,8 +386,8 @@ fn setup_mmio_bus(vm: &mut Vm,
// Set the uid/gid for the jailed process, and give a basic id map. This
// is required for the above bind mount to work.
let wayland_group = cfg.wayland_group.unwrap_or(String::from("wayland"));
let wayland_cstr = CString::new(wayland_group.into_bytes()).unwrap();
let wayland_group = cfg.wayland_group.as_ref().map(|s| s.as_str()).unwrap_or("wayland");
let wayland_cstr = CString::new(wayland_group).unwrap();
let wayland_gid = get_group_id(&wayland_cstr)
.map_err(Error::GetWaylandGroup)?;
@ -339,8 +429,8 @@ fn setup_mmio_bus(vm: &mut Vm,
}
if let Some(cid) = cfg.cid {
let vsock_box = Box::new(devices::virtio::vhost::Vsock::new(cid, &guest_mem)
.map_err(|e| Error::VhostVsockDeviceNew(e))?);
let vsock_box = Box::new(devices::virtio::vhost::Vsock::new(cid, &mem)
.map_err(Error::VhostVsockDeviceNew)?);
let jail = if cfg.multiprocess {
let policy_path: PathBuf = cfg.seccomp_policy_dir.join("vhost_vsock_device.policy");
@ -355,171 +445,66 @@ fn setup_mmio_bus(vm: &mut Vm,
.map_err(Error::RegisterVsock)?;
}
if !cfg.params.is_empty() {
cmdline
.insert_str(cfg.params)
.map_err(|e| Error::Cmdline(e))?;
}
Ok(device_manager.bus)
}
pub fn run_config(cfg: Config) -> Result<()> {
if cfg.multiprocess {
// Printing something to the syslog before entering minijail so that libc's syslogger has a
// chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
// access to those files will not be possible.
info!("crosvm entering multiprocess mode");
}
let kernel_image = File::open(cfg.kernel_path.as_path())
.map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?;
let mut control_sockets = Vec::new();
if let Some(ref path) = cfg.socket_path {
let path = Path::new(path);
let control_socket = UnixDatagram::bind(path).map_err(|e| Error::Socket(e))?;
control_sockets.push(UnlinkUnixDatagram(control_socket));
}
let mem_size = cfg.memory.unwrap_or(256) << 20;
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
let arch_mem_regions = vec![(GuestAddress(0), mem_size)];
fn setup_system_memory(mem: &GuestMemory,
vcpu_count: u32,
mut kernel_image: File,
cmdline: &CStr)
-> Result<()> {
let kernel_start_addr = GuestAddress(KERNEL_START_OFFSET);
let cmdline_addr = GuestAddress(CMDLINE_OFFSET);
kernel_loader::load_kernel(mem, kernel_start_addr, &mut kernel_image)
.map_err(Error::LoadKernel)?;
kernel_loader::load_cmdline(mem, cmdline_addr, cmdline)
.map_err(Error::LoadCmdline)?;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let arch_mem_regions = x86_64::arch_memory_regions(mem_size);
let guest_mem = GuestMemory::new(&arch_mem_regions).expect("new mmap failed");
x86_64::configure_system(mem,
kernel_start_addr,
cmdline_addr,
cmdline.to_bytes().len() + 1,
vcpu_count as u8)
.map_err(Error::ConfigureSystem)?;
Ok(())
}
let kvm = Kvm::new().map_err(Error::Kvm)?;
let mut vm = Vm::new(&kvm, guest_mem.clone()).map_err(Error::Vm)?;
fn setup_vm(kvm: &Kvm, mem: GuestMemory) -> Result<Vm> {
let vm = Vm::new(&kvm, mem).map_err(Error::CreateVm)?;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let tss_addr = GuestAddress(0xfffbd000);
vm.set_tss_addr(tss_addr).expect("set tss addr failed");
vm.create_pit().expect("create pit failed");
}
vm.create_irq_chip().expect("create irq chip failed");
let mut cmdline = kernel_cmdline::Cmdline::new(CMDLINE_MAX_SIZE);
cmdline
.insert_str("console=ttyS0 noacpi reboot=k panic=1 pci=off")
.unwrap();
let vcpu_count = cfg.vcpu_count.unwrap_or(1);
let (balloon_host_socket, balloon_device_socket) = UnixDatagram::pair().map_err(Error::Socket)?;
let mmio_bus = setup_mmio_bus(&mut vm, &guest_mem, cfg, &mut cmdline, &mut control_sockets, balloon_device_socket)?;
run_kvm(kvm,
vm,
kernel_image,
&CString::new(cmdline).unwrap(),
vcpu_count,
mmio_bus,
control_sockets,
balloon_host_socket)
vm.create_irq_chip().map_err(Error::CreateIrqChip)?;
Ok(vm)
}
fn run_kvm(kvm: Kvm,
vm: Vm,
mut kernel_image: File,
cmdline: &CStr,
vcpu_count: u32,
mmio_bus: devices::Bus,
control_sockets: Vec<UnlinkUnixDatagram>,
balloon_host_socket: UnixDatagram)
-> Result<()> {
fn setup_vcpu(kvm: &Kvm,
vm: &Vm,
cpu_id: u32,
vcpu_count: u32,
start_barrier: Arc<Barrier>,
exit_evt: EventFd,
kill_signaled: Arc<AtomicBool>,
io_bus: devices::Bus,
mmio_bus: devices::Bus)
-> Result<JoinHandle<()>> {
let kernel_start_addr = GuestAddress(KERNEL_START_OFFSET);
let cmdline_addr = GuestAddress(CMDLINE_OFFSET);
kernel_loader::load_kernel(vm.get_memory(), kernel_start_addr, &mut kernel_image)?;
kernel_loader::load_cmdline(vm.get_memory(), cmdline_addr, cmdline)?;
let vcpu = Vcpu::new(cpu_id as libc::c_ulong, &kvm, &vm)
.map_err(Error::CreateVcpu)?;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
x86_64::configure_system(vm.get_memory(),
kernel_start_addr,
cmdline_addr,
cmdline.to_bytes().len() + 1,
vcpu_count as u8)?;
let mut io_bus = devices::Bus::new();
let exit_evt = EventFd::new().expect("failed to create exit eventfd");
// Masking signals is inherently dangerous, since this can persist across
// clones/execs. Do this after any jailed devices have been spawned, but
// before the vcpus spawn so they also inherit the masking for SIGCHLD.
let sigchld_fd = SignalFd::new(libc::SIGCHLD)
.expect("failed to create child signalfd");
struct NoDevice;
impl devices::BusDevice for NoDevice {}
let com_evt_1_3 = EventFd::new().map_err(Error::EventFd)?;
let com_evt_2_4 = EventFd::new().map_err(Error::EventFd)?;
let stdio_serial =
Arc::new(Mutex::new(
devices::Serial::new_out(com_evt_1_3.try_clone().map_err(Error::EventFd)?,
Box::new(stdout()))));
let nul_device = Arc::new(Mutex::new(NoDevice));
io_bus.insert(stdio_serial.clone(), 0x3f8, 0x8).unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Serial::new_sink(com_evt_2_4
.try_clone()
.map_err(Error::EventFd)?))),
0x2f8,
0x8)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Serial::new_sink(com_evt_1_3
.try_clone()
.map_err(Error::EventFd)?))),
0x3e8,
0x8)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Serial::new_sink(com_evt_2_4
.try_clone()
.map_err(Error::EventFd)?))),
0x2e8,
0x8)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::Cmos::new())), 0x70, 0x2)
.unwrap();
io_bus
.insert(Arc::new(Mutex::new(devices::I8042Device::new(exit_evt
.try_clone()
.map_err(Error::EventFd)?))),
0x061,
0x4)
.unwrap();
io_bus.insert(nul_device.clone(), 0x040, 0x8).unwrap(); // ignore pit
io_bus.insert(nul_device.clone(), 0x0ed, 0x1).unwrap(); // most likely this one does nothing
io_bus.insert(nul_device.clone(), 0x0f0, 0x2).unwrap(); // ignore fpu
io_bus.insert(nul_device.clone(), 0xcf8, 0x8).unwrap(); // ignore pci
vm.register_irqfd(&com_evt_1_3, 4)
.map_err(Error::RegisterIrqfd)?;
vm.register_irqfd(&com_evt_2_4, 3)
.map_err(Error::RegisterIrqfd)?;
let kill_signaled = Arc::new(AtomicBool::new(false));
let mut vcpu_handles = Vec::with_capacity(vcpu_count as usize);
let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count + 1) as usize));
for cpu_id in 0..vcpu_count {
let mmio_bus = mmio_bus.clone();
let io_bus = io_bus.clone();
let kill_signaled = kill_signaled.clone();
let vcpu_thread_barrier = vcpu_thread_barrier.clone();
let vcpu_exit_evt = exit_evt.try_clone().map_err(Error::EventFd)?;
let vcpu = Vcpu::new(cpu_id as libc::c_ulong, &kvm, &vm).map_err(Error::Vcpu)?;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
x86_64::configure_vcpu(vm.get_memory(),
kernel_start_addr,
&kvm,
&vcpu,
cpu_id as u64,
vcpu_count as u64)?;
vcpu_handles.push(thread::Builder::new()
.name(format!("crosvm_vcpu{}", cpu_id))
.spawn(move || {
x86_64::configure_vcpu(vm.get_memory(),
kernel_start_addr,
&kvm,
&vcpu,
cpu_id as u64,
vcpu_count as u64)
.map_err(Error::ConfigureVcpu)?;
thread::Builder::new()
.name(format!("crosvm_vcpu{}", cpu_id))
.spawn(move || {
unsafe {
extern "C" fn handle_signal() {}
// Our signal handler does nothing and is trivially async signal safe.
@ -527,7 +512,7 @@ fn run_kvm(kvm: Kvm,
.expect("failed to register vcpu signal handler");
}
vcpu_thread_barrier.wait();
start_barrier.wait();
loop {
let run_res = vcpu.run();
match run_res {
@ -564,26 +549,16 @@ fn run_kvm(kvm: Kvm,
break;
}
}
vcpu_exit_evt
exit_evt
.write(1)
.expect("failed to signal vcpu exit eventfd");
}).map_err(Error::SpawnVcpu)?);
}
vcpu_thread_barrier.wait();
run_control(vm,
control_sockets,
stdio_serial,
exit_evt,
sigchld_fd,
kill_signaled,
vcpu_handles,
balloon_host_socket)
})
.map_err(Error::SpawnVcpu)
}
fn run_control(mut vm: Vm,
fn run_control(vm: &mut Vm,
control_sockets: Vec<UnlinkUnixDatagram>,
next_dev_pfn: &mut u64,
stdio_serial: Arc<Mutex<devices::Serial>>,
exit_evt: EventFd,
sigchld_fd: SignalFd,
@ -598,8 +573,6 @@ fn run_control(mut vm: Vm,
const CHILD_SIGNAL: u32 = 2;
const VM_BASE: u32 = 3;
let mut next_dev_pfn = BASE_DEV_MEMORY_PFN;
let stdin_handle = stdin();
let stdin_lock = stdin_handle.lock();
stdin_lock
@ -673,7 +646,7 @@ fn run_control(mut vm: Vm,
Ok(request) => {
let mut running = true;
let response =
request.execute(&mut vm, &mut next_dev_pfn,
request.execute(vm, next_dev_pfn,
&mut running, &balloon_host_socket);
if let Err(e) = response.send(&mut scm, socket.as_ref()) {
error!("failed to send VmResponse: {:?}", e);
@ -711,3 +684,89 @@ fn run_control(mut vm: Vm,
Ok(())
}
pub fn run_config(cfg: Config) -> Result<()> {
if cfg.multiprocess {
// Printing something to the syslog before entering minijail so that libc's syslogger has a
// chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
// access to those files will not be possible.
info!("crosvm entering multiprocess mode");
}
// Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
// before any jailed devices have been spawned, so that we can catch any of them that fail very
// quickly.
let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
let mut control_sockets = Vec::new();
if let Some(ref path) = cfg.socket_path {
let path = Path::new(path);
let control_socket = UnixDatagram::bind(path).map_err(Error::CreateSocket)?;
control_sockets.push(UnlinkUnixDatagram(control_socket));
}
let kill_signaled = Arc::new(AtomicBool::new(false));
let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?;
let mem = setup_memory(cfg.memory)?;
let kvm = Kvm::new().map_err(Error::CreateKvm)?;
let mut vm = setup_vm(&kvm, mem.clone())?;
let mut cmdline = kernel_cmdline::Cmdline::new(CMDLINE_MAX_SIZE);
cmdline
.insert_str("console=ttyS0 noacpi reboot=k panic=1 pci=off")
.unwrap();
let mut next_dev_pfn = BASE_DEV_MEMORY_PFN;
let mut control_sockets = Vec::new();
let (io_bus, stdio_serial) = setup_io_bus(&mut vm,
exit_evt.try_clone().map_err(Error::CloneEventFd)?)?;
let (balloon_host_socket, balloon_device_socket) = UnixDatagram::pair()
.map_err(Error::CreateSocket)?;
let mmio_bus = setup_mmio_bus(&cfg,
&mut vm,
&mem,
&mut cmdline,
&mut control_sockets,
balloon_device_socket)?;
if !cfg.params.is_empty() {
cmdline.insert_str(&cfg.params).map_err(Error::Cmdline)?;
}
let vcpu_count = cfg.vcpu_count.unwrap_or(1);
let kernel_image = File::open(cfg.kernel_path.as_path())
.map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?;
setup_system_memory(&mem,
vcpu_count,
kernel_image,
&CString::new(cmdline).unwrap())?;
let vcpu_handles = Vec::with_capacity(vcpu_count as usize);
let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count + 1) as usize));
for cpu_id in 0..vcpu_count {
let exit_evt = exit_evt.try_clone().map_err(Error::CloneEventFd)?;
setup_vcpu(&kvm,
&vm,
cpu_id,
vcpu_count,
vcpu_thread_barrier.clone(),
exit_evt,
kill_signaled.clone(),
io_bus.clone(),
mmio_bus.clone())?;
}
vcpu_thread_barrier.wait();
run_control(&mut vm,
control_sockets,
&mut next_dev_pfn,
stdio_serial,
exit_evt,
sigchld_fd,
kill_signaled,
vcpu_handles,
balloon_host_socket)
}