mirror of
https://chromium.googlesource.com/crosvm/crosvm
synced 2025-02-06 10:32:10 +00:00
ce33acefff
New Vcpu methods get the vcpu ID for a vcpu and the current thread. A new VcpuX86_64 method injects NMIs. request_interrupt_window now takes a bool parameter, so the request window flag can be turned off. These methods are to support the userspace irqchip. The thread-local vcpu ID will help route MMIO requests to the appropriate APIC. Clearing the interrupt window request is needed because KVM won't clear the flag if the kernel APIC is disabled. BUG=chromium:1077058 TEST=boots with KVM kernel irqchip, KVM split irqchip Change-Id: I2a08988e71663f0d2a63985f3239568c7a9befea Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2419673 Reviewed-by: Daniel Verkamp <dverkamp@chromium.org> Tested-by: kokoro <noreply+kokoro@google.com> Commit-Queue: Colin Downs-Razouk <colindr@google.com>
418 lines
15 KiB
Rust
418 lines
15 KiB
Rust
// Copyright 2020 The Chromium OS Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
//! A crate for abstracting the underlying kernel hypervisor used in crosvm.
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
pub mod aarch64;
|
|
pub mod caps;
|
|
pub mod kvm;
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
pub mod x86_64;
|
|
|
|
use std::os::raw::c_int;
|
|
|
|
use base::{Event, MappedRegion, RawDescriptor, Result, SafeDescriptor};
|
|
use msg_socket::MsgOnSocket;
|
|
use vm_memory::{GuestAddress, GuestMemory};
|
|
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
pub use crate::aarch64::*;
|
|
pub use crate::caps::*;
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
pub use crate::x86_64::*;
|
|
|
|
/// An index in the list of guest-mapped memory regions.
|
|
pub type MemSlot = u32;
|
|
|
|
/// A trait for checking hypervisor capabilities.
|
|
pub trait Hypervisor: Send {
|
|
/// Makes a shallow clone of this `Hypervisor`.
|
|
fn try_clone(&self) -> Result<Self>
|
|
where
|
|
Self: Sized;
|
|
|
|
/// Checks if a particular `HypervisorCap` is available.
|
|
fn check_capability(&self, cap: &HypervisorCap) -> bool;
|
|
}
|
|
|
|
/// A wrapper for using a VM and getting/setting its state.
|
|
pub trait Vm: Send {
|
|
/// Makes a shallow clone of this `Vm`.
|
|
fn try_clone(&self) -> Result<Self>
|
|
where
|
|
Self: Sized;
|
|
|
|
/// Checks if a particular `VmCap` is available.
|
|
///
|
|
/// This is distinct from the `Hypervisor` version of this method because some extensions depend
|
|
/// on the particular `Vm` existence. This method is encouraged because it more accurately
|
|
/// reflects the usable capabilities.
|
|
fn check_capability(&self, c: VmCap) -> bool;
|
|
|
|
/// Checks if a particular hypervisor-specific capability is available.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `cap` - hypervisor-specific constant defined by the hypervisor API (e.g., kvm.h)
|
|
fn check_raw_capability(&self, cap: u32) -> bool;
|
|
|
|
/// Gets the guest-mapped memory for the Vm.
|
|
fn get_memory(&self) -> &GuestMemory;
|
|
|
|
/// Inserts the given `MappedRegion` into the VM's address space at `guest_addr`.
|
|
///
|
|
/// The slot that was assigned the memory mapping is returned on success. The slot can be given
|
|
/// to `Vm::remove_memory_region` to remove the memory from the VM's address space and take back
|
|
/// ownership of `mem_region`.
|
|
///
|
|
/// Note that memory inserted into the VM's address space must not overlap with any other memory
|
|
/// slot's region.
|
|
///
|
|
/// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
|
|
/// write will trigger a mmio VM exit, leaving the memory untouched.
|
|
///
|
|
/// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
|
|
/// by the guest with `get_dirty_log`.
|
|
fn add_memory_region(
|
|
&mut self,
|
|
guest_addr: GuestAddress,
|
|
mem_region: Box<dyn MappedRegion>,
|
|
read_only: bool,
|
|
log_dirty_pages: bool,
|
|
) -> Result<MemSlot>;
|
|
|
|
/// Does a synchronous msync of the memory mapped at `slot`, syncing `size` bytes starting at
|
|
/// `offset` from the start of the region. `offset` must be page aligned.
|
|
fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>;
|
|
|
|
/// Removes and drops the `UserMemoryRegion` that was previously added at the given slot.
|
|
fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>;
|
|
|
|
/// Creates an emulated device.
|
|
fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>;
|
|
|
|
/// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
|
|
/// `slot`. Only works on VMs that support `VmCap::DirtyLog`.
|
|
///
|
|
/// The size of `dirty_log` must be at least as many bits as there are pages in the memory
|
|
/// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
|
|
/// be 2 bytes or greater.
|
|
fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>;
|
|
|
|
/// Registers an event to be signaled whenever a certain address is written to.
|
|
///
|
|
/// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
|
|
/// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
|
|
/// and must match the expected size of the guest's write.
|
|
///
|
|
/// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
|
|
/// triggered is prevented.
|
|
fn register_ioevent(
|
|
&mut self,
|
|
evt: &Event,
|
|
addr: IoEventAddress,
|
|
datamatch: Datamatch,
|
|
) -> Result<()>;
|
|
|
|
/// Unregisters an event previously registered with `register_ioevent`.
|
|
///
|
|
/// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
|
|
/// `register_ioevent`.
|
|
fn unregister_ioevent(
|
|
&mut self,
|
|
evt: &Event,
|
|
addr: IoEventAddress,
|
|
datamatch: Datamatch,
|
|
) -> Result<()>;
|
|
|
|
/// Trigger any matching registered io events based on an MMIO or PIO write at `addr`. The
|
|
/// `data` slice represents the contents and length of the write, which is used to compare with
|
|
/// the registered io events' Datamatch values. If the hypervisor does in-kernel IO event
|
|
/// delivery, this is a no-op.
|
|
fn handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>;
|
|
|
|
/// Retrieves the current timestamp of the paravirtual clock as seen by the current guest.
|
|
/// Only works on VMs that support `VmCap::PvClock`.
|
|
fn get_pvclock(&self) -> Result<ClockState>;
|
|
|
|
/// Sets the current timestamp of the paravirtual clock as seen by the current guest.
|
|
/// Only works on VMs that support `VmCap::PvClock`.
|
|
fn set_pvclock(&self, state: &ClockState) -> Result<()>;
|
|
}
|
|
|
|
/// A unique fingerprint for a particular `VcpuRunHandle`, used in `Vcpu` impls to ensure the
|
|
/// `VcpuRunHandle ` they receive is the same one that was returned from `take_run_handle`.
|
|
#[derive(Clone, PartialEq, Eq)]
|
|
pub struct VcpuRunHandleFingerprint(u64);
|
|
|
|
impl VcpuRunHandleFingerprint {
|
|
pub fn as_u64(&self) -> u64 {
|
|
self.0
|
|
}
|
|
}
|
|
|
|
/// A handle returned by a `Vcpu` to be used with `Vcpu::run` to execute a virtual machine's VCPU.
|
|
///
|
|
/// This is used to ensure that the caller has bound the `Vcpu` to a thread with
|
|
/// `Vcpu::take_run_handle` and to execute hypervisor specific cleanup routines when dropped.
|
|
pub struct VcpuRunHandle {
|
|
drop_fn: fn(),
|
|
fingerprint: VcpuRunHandleFingerprint,
|
|
// Prevents Send+Sync for this type.
|
|
phantom: std::marker::PhantomData<*mut ()>,
|
|
}
|
|
|
|
impl VcpuRunHandle {
|
|
/// Used by `Vcpu` impls to create a unique run handle, that when dropped, will call the given
|
|
/// `drop_fn`.
|
|
pub fn new(drop_fn: fn()) -> Self {
|
|
// Creates a probably unique number with a hash of the current thread id and epoch time.
|
|
use std::hash::{Hash, Hasher};
|
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
std::time::Instant::now().hash(&mut hasher);
|
|
std::thread::current().id().hash(&mut hasher);
|
|
Self {
|
|
drop_fn,
|
|
fingerprint: VcpuRunHandleFingerprint(hasher.finish()),
|
|
phantom: std::marker::PhantomData,
|
|
}
|
|
}
|
|
|
|
/// Gets the unique fingerprint which may be copied and compared freely.
|
|
pub fn fingerprint(&self) -> &VcpuRunHandleFingerprint {
|
|
&self.fingerprint
|
|
}
|
|
}
|
|
|
|
impl Drop for VcpuRunHandle {
|
|
fn drop(&mut self) {
|
|
(self.drop_fn)();
|
|
}
|
|
}
|
|
|
|
/// A virtual CPU holding a virtualized hardware thread's state, such as registers and interrupt
|
|
/// state, which may be used to execute virtual machines.
|
|
///
|
|
/// To run, `take_run_handle` must be called to lock the vcpu to a thread. Then the returned
|
|
/// `VcpuRunHandle` can be used for running.
|
|
pub trait Vcpu: downcast_rs::DowncastSync {
|
|
/// Makes a shallow clone of this `Vcpu`.
|
|
fn try_clone(&self) -> Result<Self>
|
|
where
|
|
Self: Sized;
|
|
|
|
/// Casts this architecture specific trait object to the base trait object `Vcpu`.
|
|
fn as_vcpu(&self) -> &dyn Vcpu;
|
|
|
|
/// Returns a unique `VcpuRunHandle`. A `VcpuRunHandle` is required to run the guest.
|
|
///
|
|
/// Assigns a vcpu to the current thread so that signal handlers can call
|
|
/// set_local_immediate_exit(). An optional signal number will be temporarily blocked while
|
|
/// assigning the vcpu to the thread and later blocked when `VcpuRunHandle` is destroyed.
|
|
///
|
|
/// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
|
|
fn take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>;
|
|
|
|
/// Runs the VCPU until it exits, returning the reason for the exit.
|
|
///
|
|
/// Note that the state of the VCPU and associated VM must be setup first for this to do
|
|
/// anything useful. The given `run_handle` must be the same as the one returned by
|
|
/// `take_run_handle` for this `Vcpu`.
|
|
fn run(&self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>;
|
|
|
|
/// Returns the vcpu id.
|
|
fn id(&self) -> usize;
|
|
|
|
/// Sets the bit that requests an immediate exit.
|
|
fn set_immediate_exit(&self, exit: bool);
|
|
|
|
/// Sets/clears the bit for immediate exit for the vcpu on the current thread.
|
|
fn set_local_immediate_exit(exit: bool)
|
|
where
|
|
Self: Sized;
|
|
|
|
/// Returns a function pointer that invokes `set_local_immediate_exit` in a
|
|
/// signal-safe way when called.
|
|
fn set_local_immediate_exit_fn(&self) -> extern "C" fn();
|
|
|
|
/// Sets the data received by a mmio read, ioport in, or hypercall instruction.
|
|
///
|
|
/// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
|
|
/// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
|
|
fn set_data(&self, data: &[u8]) -> Result<()>;
|
|
|
|
/// Signals to the hypervisor that this guest is being paused by userspace. Only works on Vms
|
|
/// that support `VmCapability::PvClockSuspend`.
|
|
fn pvclock_ctrl(&self) -> Result<()>;
|
|
|
|
/// Specifies set of signals that are blocked during execution of `RunnableVcpu::run`. Signals
|
|
/// that are not blocked will cause run to return with `VcpuExit::Intr`. Only works on Vms that
|
|
/// support `VmCapability::SignalMask`.
|
|
fn set_signal_mask(&self, signals: &[c_int]) -> Result<()>;
|
|
|
|
/// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the
|
|
/// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any.
|
|
fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>;
|
|
}
|
|
|
|
downcast_rs::impl_downcast!(sync Vcpu);
|
|
|
|
/// An address either in programmable I/O space or in memory mapped I/O space.
|
|
#[derive(Copy, Clone, Debug, MsgOnSocket, PartialEq, Eq, std::hash::Hash)]
|
|
pub enum IoEventAddress {
|
|
Pio(u64),
|
|
Mmio(u64),
|
|
}
|
|
|
|
/// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
|
|
#[derive(PartialEq, Eq)]
|
|
pub enum Datamatch {
|
|
AnyLength,
|
|
U8(Option<u8>),
|
|
U16(Option<u16>),
|
|
U32(Option<u32>),
|
|
U64(Option<u64>),
|
|
}
|
|
|
|
/// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
|
|
#[derive(Debug)]
|
|
pub enum VcpuExit {
|
|
/// An out port instruction was run on the given port with the given data.
|
|
IoOut {
|
|
port: u16,
|
|
size: usize,
|
|
data: [u8; 8],
|
|
},
|
|
/// An in port instruction was run on the given port.
|
|
///
|
|
/// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is
|
|
/// called again.
|
|
IoIn {
|
|
port: u16,
|
|
size: usize,
|
|
},
|
|
/// A read instruction was run against the given MMIO address.
|
|
///
|
|
/// The data that the instruction receives should be set with `set_data` before `Vcpu::run` is
|
|
/// called again.
|
|
MmioRead {
|
|
address: u64,
|
|
size: usize,
|
|
},
|
|
/// A write instruction was run against the given MMIO address with the given data.
|
|
MmioWrite {
|
|
address: u64,
|
|
size: usize,
|
|
data: [u8; 8],
|
|
},
|
|
IoapicEoi {
|
|
vector: u8,
|
|
},
|
|
HypervSynic {
|
|
msr: u32,
|
|
control: u64,
|
|
evt_page: u64,
|
|
msg_page: u64,
|
|
},
|
|
HypervHcall {
|
|
input: u64,
|
|
params: [u64; 2],
|
|
},
|
|
Unknown,
|
|
Exception,
|
|
Hypercall,
|
|
Debug,
|
|
Hlt,
|
|
IrqWindowOpen,
|
|
Shutdown,
|
|
FailEntry {
|
|
hardware_entry_failure_reason: u64,
|
|
},
|
|
Intr,
|
|
SetTpr,
|
|
TprAccess,
|
|
S390Sieic,
|
|
S390Reset,
|
|
Dcr,
|
|
Nmi,
|
|
InternalError,
|
|
Osi,
|
|
PaprHcall,
|
|
S390Ucontrol,
|
|
Watchdog,
|
|
S390Tsch,
|
|
Epr,
|
|
/// The cpu triggered a system level event which is specified by the type field.
|
|
/// The first field is the event type and the second field is flags.
|
|
/// The possible event types are shutdown, reset, or crash. So far there
|
|
/// are not any flags defined.
|
|
SystemEvent(u32 /* event_type */, u64 /* flags */),
|
|
}
|
|
|
|
/// A device type to create with `Vm.create_device`.
|
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
|
pub enum DeviceKind {
|
|
/// VFIO device for direct access to devices from userspace
|
|
Vfio,
|
|
/// ARM virtual general interrupt controller v2
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
ArmVgicV2,
|
|
/// ARM virtual general interrupt controller v3
|
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
ArmVgicV3,
|
|
}
|
|
|
|
/// The source chip of an `IrqSource`
|
|
#[repr(C)]
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub enum IrqSourceChip {
|
|
PicPrimary,
|
|
PicSecondary,
|
|
Ioapic,
|
|
Gic,
|
|
}
|
|
|
|
/// A source of IRQs in an `IrqRoute`.
|
|
#[repr(C)]
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub enum IrqSource {
|
|
Irqchip { chip: IrqSourceChip, pin: u32 },
|
|
Msi { address: u64, data: u32 },
|
|
}
|
|
|
|
/// A single route for an IRQ.
|
|
#[repr(C)]
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub struct IrqRoute {
|
|
pub gsi: u32,
|
|
pub source: IrqSource,
|
|
}
|
|
|
|
/// The state of the paravirtual clock.
|
|
#[derive(Debug, Default, Copy, Clone)]
|
|
pub struct ClockState {
|
|
/// Current pv clock timestamp, as seen by the guest
|
|
pub clock: u64,
|
|
/// Hypervisor-specific feature flags for the pv clock
|
|
pub flags: u32,
|
|
}
|
|
|
|
/// The MPState represents the state of a processor.
|
|
#[repr(C)]
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub enum MPState {
|
|
/// the vcpu is currently running (x86/x86_64,arm/arm64)
|
|
Runnable,
|
|
/// the vcpu is an application processor (AP) which has not yet received an INIT signal
|
|
/// (x86/x86_64)
|
|
Uninitialized,
|
|
/// the vcpu has received an INIT signal, and is now ready for a SIPI (x86/x86_64)
|
|
InitReceived,
|
|
/// the vcpu has executed a HLT instruction and is waiting for an interrupt (x86/x86_64)
|
|
Halted,
|
|
/// the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) (x86/x86_64)
|
|
SipiReceived,
|
|
/// the vcpu is stopped (arm/arm64)
|
|
Stopped,
|
|
}
|