diff --git a/kvm/src/lib.rs b/kvm/src/lib.rs index 0a8d5c1f94..06f59157b9 100644 --- a/kvm/src/lib.rs +++ b/kvm/src/lib.rs @@ -11,12 +11,13 @@ use std::cmp::{min, Ordering}; use std::collections::{BinaryHeap, HashMap}; use std::fs::File; use std::mem::size_of; +use std::ops::{Deref, DerefMut}; use std::os::raw::*; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::ptr::copy_nonoverlapping; use libc::sigset_t; -use libc::{open, EINVAL, ENOENT, ENOSPC, EOVERFLOW, O_CLOEXEC, O_RDWR}; +use libc::{open, EBUSY, EINVAL, ENOENT, ENOSPC, EOVERFLOW, O_CLOEXEC, O_RDWR}; use kvm_sys::*; @@ -1148,6 +1149,8 @@ pub enum VcpuExit { } /// A wrapper around creating and using a VCPU. +/// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to +/// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running. pub struct Vcpu { vcpu: File, run_mmap: MemoryMapping, @@ -1156,7 +1159,7 @@ pub struct Vcpu { pub struct VcpuThread { run: *mut kvm_run, - signal_num: c_int, + signal_num: Option, } thread_local!(static VCPU_THREAD: RefCell> = RefCell::new(None)); @@ -1190,33 +1193,39 @@ impl Vcpu { }) } - /// Sets the thread id for the vcpu and stores it in a hash map that can be used - /// by signal handlers to call set_local_immediate_exit(). Signal - /// number (if provided, otherwise use -1) will be temporily blocked when the vcpu - /// is added to the map, or later destroyed/removed from the map. + /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the + /// guest. + /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal + /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily + /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is + /// destroyed. + /// + /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu. #[allow(clippy::cast_ptr_alignment)] - pub fn set_thread_id(&mut self, signal_num: c_int) { + pub fn to_runnable(self, signal_num: Option) -> Result { // Block signal while we add -- if a signal fires (very unlikely, // as this means something is trying to pause the vcpu before it has // even started) it'll try to grab the read lock while this write // lock is grabbed and cause a deadlock. - let mut unblock = false; - if signal_num >= 0 { - unblock = true; - // Assuming that a failure to block means it's already blocked. - if block_signal(signal_num).is_err() { - unblock = false; - } - } + // Assuming that a failure to block means it's already blocked. + let _blocked_signal = signal_num.map(BlockedSignal::new); + VCPU_THREAD.with(|v| { - *v.borrow_mut() = Some(VcpuThread { - run: self.run_mmap.as_ptr() as *mut kvm_run, - signal_num, - }); - }); - if unblock { - let _ = unblock_signal(signal_num).expect("failed to restore signal mask"); - } + if v.borrow().is_none() { + *v.borrow_mut() = Some(VcpuThread { + run: self.run_mmap.as_ptr() as *mut kvm_run, + signal_num, + }); + Ok(()) + } else { + Err(Error::new(EBUSY)) + } + })?; + + Ok(RunnableVcpu { + vcpu: self, + phantom: Default::default(), + }) } /// Gets a reference to the guest memory owned by this VM of this VCPU. @@ -1297,99 +1306,6 @@ impl Vcpu { }); } - /// Runs the VCPU until it exits, returning the reason. - /// - /// Note that the state of the VCPU and associated VM must be setup first for this to do - /// anything useful. - #[allow(clippy::cast_ptr_alignment)] - // The pointer is page aligned so casting to a different type is well defined, hence the clippy - // allow attribute. - pub fn run(&self) -> Result { - // Safe because we know that our file is a VCPU fd and we verify the return result. - let ret = unsafe { ioctl(self, KVM_RUN()) }; - if ret == 0 { - // Safe because we know we mapped enough memory to hold the kvm_run struct because the - // kernel told us how large it was. - let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) }; - match run.exit_reason { - KVM_EXIT_IO => { - // Safe because the exit_reason (which comes from the kernel) told us which - // union field to use. - let io = unsafe { run.__bindgen_anon_1.io }; - let port = io.port; - let size = (io.count as usize) * (io.size as usize); - match io.direction as u32 { - KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }), - KVM_EXIT_IO_OUT => { - let mut data = [0; 8]; - let run_start = run as *const kvm_run as *const u8; - // The data_offset is defined by the kernel to be some number of bytes - // into the kvm_run structure, which we have fully mmap'd. - unsafe { - let data_ptr = run_start.offset(io.data_offset as isize); - copy_nonoverlapping( - data_ptr, - data.as_mut_ptr(), - min(size, data.len()), - ); - } - Ok(VcpuExit::IoOut { port, size, data }) - } - _ => Err(Error::new(EINVAL)), - } - } - KVM_EXIT_MMIO => { - // Safe because the exit_reason (which comes from the kernel) told us which - // union field to use. - let mmio = unsafe { &run.__bindgen_anon_1.mmio }; - let address = mmio.phys_addr; - let size = min(mmio.len as usize, mmio.data.len()); - if mmio.is_write != 0 { - Ok(VcpuExit::MmioWrite { - address, - size, - data: mmio.data, - }) - } else { - Ok(VcpuExit::MmioRead { address, size }) - } - } - KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown), - KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), - KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall), - KVM_EXIT_DEBUG => Ok(VcpuExit::Debug), - KVM_EXIT_HLT => Ok(VcpuExit::Hlt), - KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen), - KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown), - KVM_EXIT_FAIL_ENTRY => Ok(VcpuExit::FailEntry), - KVM_EXIT_INTR => Ok(VcpuExit::Intr), - KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr), - KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess), - KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic), - KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset), - KVM_EXIT_DCR => Ok(VcpuExit::Dcr), - KVM_EXIT_NMI => Ok(VcpuExit::Nmi), - KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError), - KVM_EXIT_OSI => Ok(VcpuExit::Osi), - KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall), - KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol), - KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog), - KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch), - KVM_EXIT_EPR => Ok(VcpuExit::Epr), - KVM_EXIT_SYSTEM_EVENT => { - // Safe because we know the exit reason told us this union - // field is valid - let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ }; - let event_flags = unsafe { run.__bindgen_anon_1.system_event.flags }; - Ok(VcpuExit::SystemEvent(event_type, event_flags)) - } - r => panic!("unknown kvm exit reason: {}", r), - } - } else { - errno_result() - } - } - /// Gets the VCPU registers. #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] pub fn get_regs(&self) -> Result { @@ -1787,35 +1703,151 @@ impl Vcpu { } } -impl Drop for Vcpu { - fn drop(&mut self) { - VCPU_THREAD.with(|v| { - let mut unblock = false; - let mut signal_num: c_int = -1; - if let Some(state) = &(*v.borrow()) { - if state.signal_num >= 0 { - unblock = true; - signal_num = state.signal_num; - // Assuming that a failure to block means it's already blocked. - if block_signal(signal_num).is_err() { - unblock = false; - } - } - }; - *v.borrow_mut() = None; - if unblock { - let _ = unblock_signal(signal_num).expect("failed to restore signal mask"); - } - }); - } -} - impl AsRawFd for Vcpu { fn as_raw_fd(&self) -> RawFd { self.vcpu.as_raw_fd() } } +/// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`. +/// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run` +/// function to execute the guest. +pub struct RunnableVcpu { + vcpu: Vcpu, + // vcpus must stay on the same thread once they start. + // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`. + phantom: std::marker::PhantomData<*mut u8>, +} + +impl RunnableVcpu { + /// Runs the VCPU until it exits, returning the reason for the exit. + /// + /// Note that the state of the VCPU and associated VM must be setup first for this to do + /// anything useful. + #[allow(clippy::cast_ptr_alignment)] + // The pointer is page aligned so casting to a different type is well defined, hence the clippy + // allow attribute. + pub fn run(&self) -> Result { + // Safe because we know that our file is a VCPU fd and we verify the return result. + let ret = unsafe { ioctl(self, KVM_RUN()) }; + if ret == 0 { + // Safe because we know we mapped enough memory to hold the kvm_run struct because the + // kernel told us how large it was. + let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) }; + match run.exit_reason { + KVM_EXIT_IO => { + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. + let io = unsafe { run.__bindgen_anon_1.io }; + let port = io.port; + let size = (io.count as usize) * (io.size as usize); + match io.direction as u32 { + KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }), + KVM_EXIT_IO_OUT => { + let mut data = [0; 8]; + let run_start = run as *const kvm_run as *const u8; + // The data_offset is defined by the kernel to be some number of bytes + // into the kvm_run structure, which we have fully mmap'd. + unsafe { + let data_ptr = run_start.offset(io.data_offset as isize); + copy_nonoverlapping( + data_ptr, + data.as_mut_ptr(), + min(size, data.len()), + ); + } + Ok(VcpuExit::IoOut { port, size, data }) + } + _ => Err(Error::new(EINVAL)), + } + } + KVM_EXIT_MMIO => { + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. + let mmio = unsafe { &run.__bindgen_anon_1.mmio }; + let address = mmio.phys_addr; + let size = min(mmio.len as usize, mmio.data.len()); + if mmio.is_write != 0 { + Ok(VcpuExit::MmioWrite { + address, + size, + data: mmio.data, + }) + } else { + Ok(VcpuExit::MmioRead { address, size }) + } + } + KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown), + KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), + KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall), + KVM_EXIT_DEBUG => Ok(VcpuExit::Debug), + KVM_EXIT_HLT => Ok(VcpuExit::Hlt), + KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen), + KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown), + KVM_EXIT_FAIL_ENTRY => Ok(VcpuExit::FailEntry), + KVM_EXIT_INTR => Ok(VcpuExit::Intr), + KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr), + KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess), + KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic), + KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset), + KVM_EXIT_DCR => Ok(VcpuExit::Dcr), + KVM_EXIT_NMI => Ok(VcpuExit::Nmi), + KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError), + KVM_EXIT_OSI => Ok(VcpuExit::Osi), + KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall), + KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol), + KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog), + KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch), + KVM_EXIT_EPR => Ok(VcpuExit::Epr), + KVM_EXIT_SYSTEM_EVENT => { + // Safe because we know the exit reason told us this union + // field is valid + let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ }; + let event_flags = unsafe { run.__bindgen_anon_1.system_event.flags }; + Ok(VcpuExit::SystemEvent(event_type, event_flags)) + } + r => panic!("unknown kvm exit reason: {}", r), + } + } else { + errno_result() + } + } +} + +impl Deref for RunnableVcpu { + type Target = Vcpu; + fn deref(&self) -> &Self::Target { + &self.vcpu + } +} + +impl DerefMut for RunnableVcpu { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.vcpu + } +} + +impl AsRawFd for RunnableVcpu { + fn as_raw_fd(&self) -> RawFd { + self.vcpu.as_raw_fd() + } +} + +impl Drop for RunnableVcpu { + fn drop(&mut self) { + VCPU_THREAD.with(|v| { + // This assumes that a failure in `BlockedSignal::new` means the signal is already + // blocked and there it should not be unblocked on exit. + let _blocked_signal = &(*v.borrow()) + .as_ref() + .and_then(|state| state.signal_num) + .map(BlockedSignal::new); + + *v.borrow_mut() = None; + }); + } +} + /// Wrapper for kvm_cpuid2 which has a zero length array at the end. /// Hides the zero length array behind a bounds check. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -1858,6 +1890,28 @@ impl CpuId { } } +// Represents a temporarily blocked signal. It will unblock the signal when dropped. +struct BlockedSignal { + signal_num: c_int, +} + +impl BlockedSignal { + // Returns a `BlockedSignal` if the specified signal can be blocked, otherwise None. + fn new(signal_num: c_int) -> Option { + if block_signal(signal_num).is_ok() { + Some(BlockedSignal { signal_num }) + } else { + None + } + } +} + +impl Drop for BlockedSignal { + fn drop(&mut self) { + let _ = unblock_signal(self.signal_num).expect("failed to restore signal mask"); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/kvm/tests/dirty_log.rs b/kvm/tests/dirty_log.rs index 527af462df..1efe135a7b 100644 --- a/kvm/tests/dirty_log.rs +++ b/kvm/tests/dirty_log.rs @@ -52,8 +52,9 @@ fn test_run() { ) .expect("failed to register memory"); + let runnable_vcpu = vcpu.to_runnable(None).unwrap(); loop { - match vcpu.run().expect("run failed") { + match runnable_vcpu.run().expect("run failed") { VcpuExit::Hlt => break, r => panic!("unexpected exit reason: {:?}", r), } diff --git a/kvm/tests/read_only_memory.rs b/kvm/tests/read_only_memory.rs index 5aa7bcc78f..d8c0e1d905 100644 --- a/kvm/tests/read_only_memory.rs +++ b/kvm/tests/read_only_memory.rs @@ -74,8 +74,9 @@ fn test_run() { // Ensure we get exactly 1 exit from attempting to write to read only memory. let mut exits = 0; + let runnable_vcpu = vcpu.to_runnable(None).unwrap(); loop { - match vcpu.run().expect("run failed") { + match runnable_vcpu.run().expect("run failed") { VcpuExit::Hlt => break, VcpuExit::MmioWrite { address, diff --git a/kvm/tests/real_run_adder.rs b/kvm/tests/real_run_adder.rs index a419ad92fc..60869ed2c1 100644 --- a/kvm/tests/real_run_adder.rs +++ b/kvm/tests/real_run_adder.rs @@ -49,8 +49,9 @@ fn test_run() { vcpu.set_regs(&vcpu_regs).expect("set regs failed"); let mut out = String::new(); + let runnable_vcpu = vcpu.to_runnable(None).unwrap(); loop { - match vcpu.run().expect("run failed") { + match runnable_vcpu.run().expect("run failed") { VcpuExit::IoOut { port: 0x3f8, size, diff --git a/src/linux.rs b/src/linux.rs index 1dd10048de..f692ea04f4 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -1207,8 +1207,38 @@ impl VcpuRunMode { } } +// Converts a vcpu into a runnable vcpu if possible. On failure, returns `None`. +fn runnable_vcpu(vcpu: Vcpu, use_kvm_signals: bool, cpu_id: u32) -> Option { + if use_kvm_signals { + match get_blocked_signals() { + Ok(mut v) => { + v.retain(|&x| x != SIGRTMIN() + 0); + if let Err(e) = vcpu.set_signal_mask(&v) { + error!( + "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}", + cpu_id, e + ); + return None; + } + } + Err(e) => { + error!("Failed to retrieve signal mask for vcpu {} : {}", cpu_id, e); + return None; + } + }; + } + + match vcpu.to_runnable(Some(SIGRTMIN() + 0)) { + Ok(v) => Some(v), + Err(e) => { + error!("Failed to set thread id for vcpu {} : {}", cpu_id, e); + None + } + } +} + fn run_vcpu( - mut vcpu: Vcpu, + vcpu: Vcpu, cpu_id: u32, vcpu_affinity: Vec, start_barrier: Arc, @@ -1228,34 +1258,11 @@ fn run_vcpu( } } - let mut sig_ok = true; - if use_kvm_signals { - match get_blocked_signals() { - Ok(mut v) => { - v.retain(|&x| x != SIGRTMIN() + 0); - if let Err(e) = vcpu.set_signal_mask(&v) { - error!( - "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}", - cpu_id, e - ); - sig_ok = false; - } - } - Err(e) => { - error!( - "Failed to retrieve signal mask for vcpu {} : {}", - cpu_id, e - ); - sig_ok = false; - } - }; - } else { - vcpu.set_thread_id(SIGRTMIN() + 0); - } + let vcpu = runnable_vcpu(vcpu, use_kvm_signals, cpu_id); start_barrier.wait(); - if sig_ok { + if let Some(vcpu) = vcpu { 'vcpu_loop: loop { let mut interrupted_by_signal = false; match vcpu.run() { diff --git a/src/plugin/mod.rs b/src/plugin/mod.rs index f0d6932e3f..5f9a5db4fa 100644 --- a/src/plugin/mod.rs +++ b/src/plugin/mod.rs @@ -420,7 +420,7 @@ pub fn run_vcpus( let vcpu_thread_barrier = vcpu_thread_barrier.clone(); let vcpu_exit_evt = exit_evt.try_clone().map_err(Error::CloneEventFd)?; let vcpu_plugin = plugin.create_vcpu(cpu_id)?; - let mut vcpu = Vcpu::new(cpu_id as c_ulong, kvm, vm).map_err(Error::CreateVcpu)?; + let vcpu = Vcpu::new(cpu_id as c_ulong, kvm, vm).map_err(Error::CreateVcpu)?; vcpu_handles.push( thread::Builder::new() @@ -431,10 +431,12 @@ pub fn run_vcpus( // because we will be using first RT signal to kick the VCPU. vcpu.set_signal_mask(&[]) .expect("failed to set up KVM VCPU signal mask"); - } else { - vcpu.set_thread_id(SIGRTMIN() + 0); } + let vcpu = vcpu + .to_runnable(Some(SIGRTMIN() + 0)) + .expect("Failed to set thread id"); + let res = vcpu_plugin.init(&vcpu); vcpu_thread_barrier.wait(); if let Err(e) = res {