devices: vfio: make global container state explicit

The vfio code previously managed singleton instances of VfioContainer
using thread-local variables hidden inside the implementation of
vfio_get_container().

Make the caller hold onto a VfioContainerManager rather than implicitly
mutating global state inside a library function in the vfio crate. This
also removes the possibility of accidentally creating new VfioContainer
instances by calling from a different thread, since the containers were
previously thread-local rather than truly global for some reason.

This requires threading the vfio_container_manager variable through a
few function calls, but this also helps clarify ownership and lifetime
of the objects involved.

BUG=None
TEST=tools/dev_container tools/presubmit

Change-Id: I2ba1be561c48143ff14e391c23bad1c6783b73ec
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5601348
Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
Reviewed-by: Frederick Mayle <fmayle@google.com>
Reviewed-by: Pierre-Clément Tosi <ptosi@google.com>
This commit is contained in:
Daniel Verkamp 2024-06-05 15:35:22 -07:00 committed by crosvm LUCI
parent 66e37a886d
commit 809b1a718c
4 changed files with 85 additions and 105 deletions

View file

@ -337,7 +337,6 @@ pub enum Exit {
ConnectTube = 0xE0000064,
BalloonDeviceNew = 0xE0000065,
BalloonStats = 0xE0000066,
BorrowVfioContainer = 0xE0000067,
OpenCompositeFooterFile = 0xE0000068,
OpenCompositeHeaderFile = 0xE0000069,
OpenCompositeImageFile = 0xE0000070,

View file

@ -2,7 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::cell::RefCell;
use std::collections::HashMap;
use std::ffi::CString;
use std::fs::File;
@ -58,8 +57,6 @@ use crate::IommuDevType;
#[sorted]
#[derive(Error, Debug)]
pub enum VfioError {
#[error("failed to borrow global vfio container")]
BorrowVfioContainer,
#[error("failed to duplicate VfioContainer")]
ContainerDupError,
#[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
@ -867,122 +864,91 @@ impl AsRawDescriptor for VfioGroup {
}
}
/// A helper trait for managing VFIO setup
pub trait VfioCommonTrait: Send + Sync {
/// A helper struct for managing VFIO containers
#[derive(Default)]
pub struct VfioContainerManager {
/// One VFIO container shared by all VFIO devices that don't attach to any IOMMU device.
no_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
/// For IOMMU enabled devices, all VFIO groups that share the same IOVA space are managed by
/// one VFIO container.
iommu_containers: Vec<Arc<Mutex<VfioContainer>>>,
/// One VFIO container shared by all VFIO devices that attach to the CoIOMMU device.
coiommu_container: Option<Arc<Mutex<VfioContainer>>>,
/// One VFIO container shared by all VFIO devices that attach to pKVM.
pkvm_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
}
impl VfioContainerManager {
pub fn new() -> Self {
Self::default()
}
/// The single place to create a VFIO container for a PCI endpoint.
///
/// The policy to determine whether an individual or a shared VFIO container
/// will be created for this device is governed by the physical PCI topology,
/// and the argument iommu_enabled.
/// and the argument iommu_type.
///
/// # Arguments
///
/// * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
/// * `iommu_enabled` - whether virtio IOMMU is enabled on this device
fn vfio_get_container<P: AsRef<Path>>(
iommu_dev: IommuDevType,
sysfspath: Option<P>,
) -> Result<Arc<Mutex<VfioContainer>>>;
}
thread_local! {
// One VFIO container is shared by all VFIO devices that don't
// attach to the virtio IOMMU device
static NO_IOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
// For IOMMU enabled devices, all VFIO groups that share the same IOVA space
// are managed by one VFIO container
static IOMMU_CONTAINERS: RefCell<Option<Vec<Arc<Mutex<VfioContainer>>>>> = RefCell::new(Some(Default::default()));
// One VFIO container is shared by all VFIO devices that
// attach to the CoIOMMU device
static COIOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
// One VFIO container is shared by all VFIO devices that attach to pKVM
static PKVM_IOMMU_CONTAINER: RefCell<Option<Arc<Mutex<VfioContainer>>>> = RefCell::new(None);
}
pub struct VfioCommonSetup;
impl VfioCommonTrait for VfioCommonSetup {
fn vfio_get_container<P: AsRef<Path>>(
iommu_dev: IommuDevType,
/// * `iommu_type` - which type of IOMMU is enabled on this device
pub fn get_container<P: AsRef<Path>>(
&mut self,
iommu_type: IommuDevType,
sysfspath: Option<P>,
) -> Result<Arc<Mutex<VfioContainer>>> {
match iommu_dev {
match iommu_type {
IommuDevType::NoIommu => {
// One VFIO container is used for all IOMMU disabled groups
NO_IOMMU_CONTAINER.with(|v| {
if v.borrow().is_some() {
if let Some(ref container) = *v.borrow() {
Ok(container.clone())
} else {
Err(VfioError::BorrowVfioContainer)
}
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
*v.borrow_mut() = Some(container.clone());
Ok(container)
}
})
// One VFIO container is used for all IOMMU disabled groups.
if let Some(container) = &self.no_iommu_container {
Ok(container.clone())
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
self.no_iommu_container = Some(container.clone());
Ok(container)
}
}
IommuDevType::VirtioIommu => {
let path = sysfspath.ok_or(VfioError::InvalidPath)?;
let group_id = VfioGroup::get_group_id(path)?;
// One VFIO container is used for all devices belong to one VFIO group
// One VFIO container is used for all devices that belong to one VFIO group.
// NOTE: vfio_wrapper relies on each container containing exactly one group.
IOMMU_CONTAINERS.with(|v| {
if let Some(ref mut containers) = *v.borrow_mut() {
let container = containers
.iter()
.find(|container| container.lock().is_group_set(group_id));
match container {
None => {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
containers.push(container.clone());
Ok(container)
}
Some(container) => Ok(container.clone()),
}
} else {
Err(VfioError::BorrowVfioContainer)
}
})
if let Some(container) = self
.iommu_containers
.iter()
.find(|container| container.lock().is_group_set(group_id))
{
Ok(container.clone())
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
self.iommu_containers.push(container.clone());
Ok(container)
}
}
IommuDevType::CoIommu => {
// One VFIO container is used for devices attached to CoIommu
COIOMMU_CONTAINER.with(|v| {
if v.borrow().is_some() {
if let Some(ref container) = *v.borrow() {
Ok(container.clone())
} else {
Err(VfioError::BorrowVfioContainer)
}
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
*v.borrow_mut() = Some(container.clone());
Ok(container)
}
})
if let Some(container) = &self.coiommu_container {
Ok(container.clone())
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
self.coiommu_container = Some(container.clone());
Ok(container)
}
}
IommuDevType::PkvmPviommu => {
// One VFIO container is used for devices attached to pKVM
PKVM_IOMMU_CONTAINER.with(|v| {
if v.borrow().is_some() {
if let Some(ref container) = *v.borrow() {
Ok(container.clone())
} else {
Err(VfioError::BorrowVfioContainer)
}
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
*v.borrow_mut() = Some(container.clone());
Ok(container)
}
})
if let Some(container) = &self.pkvm_iommu_container {
Ok(container.clone())
} else {
let container = Arc::new(Mutex::new(VfioContainer::new()?));
self.pkvm_iommu_container = Some(container.clone());
Ok(container)
}
}
}
}

View file

@ -79,8 +79,7 @@ use devices::create_devices_worker_thread;
use devices::serial_device::SerialHardware;
#[cfg(all(feature = "pvclock", target_arch = "x86_64"))]
use devices::tsc::get_tsc_sync_mitigations;
use devices::vfio::VfioCommonSetup;
use devices::vfio::VfioCommonTrait;
use devices::vfio::VfioContainerManager;
#[cfg(feature = "gpu")]
use devices::virtio;
#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
@ -767,6 +766,7 @@ fn create_devices(
iova_max_addr: &mut Option<u64>,
#[cfg(feature = "registered_events")] registered_evt_q: &SendTube,
#[cfg(feature = "pvclock")] pvclock_device_tube: Option<Tube>,
vfio_container_manager: &mut VfioContainerManager,
) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
#[cfg(feature = "balloon")]
@ -791,6 +791,7 @@ fn create_devices(
Some(&mut coiommu_attached_endpoints),
vfio_dev.iommu,
vfio_dev.dt_symbol.clone(),
vfio_container_manager,
)?;
match dev {
VfioDeviceVariant::Pci(vfio_pci_device) => {
@ -851,9 +852,9 @@ fn create_devices(
#[cfg(not(feature = "balloon"))]
let coiommu_tube: Option<Tube> = None;
if !coiommu_attached_endpoints.is_empty() {
let vfio_container =
VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
.context("failed to get vfio container")?;
let vfio_container = vfio_container_manager
.get_container(IommuDevType::CoIommu, None as Option<&Path>)
.context("failed to get vfio container")?;
let (coiommu_host_tube, coiommu_device_tube) =
Tube::pair().context("failed to create coiommu tube")?;
vm_memory_control_tubes.push(VmMemoryTube {
@ -1901,6 +1902,8 @@ where
BTreeMap::new();
let mut iova_max_addr: Option<u64> = None;
let mut vfio_container_manager = VfioContainerManager::new();
// pvclock gets a tube for handling suspend/resume requests from the main thread.
#[cfg(feature = "pvclock")]
let (pvclock_host_tube, pvclock_device_tube) = if cfg.pvclock {
@ -1941,6 +1944,7 @@ where
&reg_evt_wrtube,
#[cfg(feature = "pvclock")]
pvclock_device_tube,
&mut vfio_container_manager,
)?;
#[cfg(feature = "pci-hotplug")]
@ -2220,6 +2224,7 @@ where
#[cfg(feature = "pvclock")]
pvclock_host_tube,
metrics_recv,
vfio_container_manager,
)
}
@ -2332,6 +2337,7 @@ fn add_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
iommu_host_tube: Option<&Tube>,
device: &HotPlugDeviceInfo,
#[cfg(feature = "swap")] swap_controller: &mut Option<SwapController>,
vfio_container_manager: &mut VfioContainerManager,
) -> Result<()> {
let host_addr = PciAddress::from_path(&device.path)
.context("failed to parse hotplug device's PCI address")?;
@ -2407,6 +2413,7 @@ fn add_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
IommuDevType::NoIommu
},
None,
vfio_container_manager,
)?;
let vfio_pci_device = match vfio_device {
VfioDeviceVariant::Pci(pci) => Box::new(pci),
@ -2811,6 +2818,7 @@ fn handle_hotplug_command<V: VmArch, Vcpu: VcpuArch>(
device: &HotPlugDeviceInfo,
add: bool,
#[cfg(feature = "swap")] swap_controller: &mut Option<SwapController>,
vfio_container_manager: &mut VfioContainerManager,
) -> VmResponse {
let iommu_host_tube = if cfg.vfio_isolate_hotplug {
iommu_host_tube
@ -2831,6 +2839,7 @@ fn handle_hotplug_command<V: VmArch, Vcpu: VcpuArch>(
device,
#[cfg(feature = "swap")]
swap_controller,
vfio_container_manager,
)
} else {
remove_hotplug_device(linux, sys_allocator, iommu_host_tube, device)
@ -2876,6 +2885,7 @@ struct ControlLoopState<'a, V: VmArch, Vcpu: VcpuArch> {
registered_evt_tubes: &'a mut HashMap<RegisteredEvent, HashSet<AddressedProtoTube>>,
#[cfg(feature = "pvclock")]
pvclock_host_tube: Option<Arc<Tube>>,
vfio_container_manager: &'a mut VfioContainerManager,
}
fn process_vm_request<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
@ -2914,6 +2924,7 @@ fn process_vm_request<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
add,
#[cfg(feature = "swap")]
state.swap_controller,
state.vfio_container_manager,
)
}
@ -2921,6 +2932,7 @@ fn process_vm_request<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
{
// Suppress warnings.
let _ = (device, add);
let _ = &state.vfio_container_manager;
VmResponse::Ok
}
}
@ -3309,6 +3321,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
#[cfg(feature = "pvclock")] pvclock_host_tube: Option<Tube>,
metrics_tube: RecvTube,
mut vfio_container_manager: VfioContainerManager,
) -> Result<ExitState> {
#[derive(EventToken)]
enum Token {
@ -3878,6 +3891,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
registered_evt_tubes: &mut registered_evt_tubes,
#[cfg(feature = "pvclock")]
pvclock_host_tube: pvclock_host_tube.clone(),
vfio_container_manager: &mut vfio_container_manager,
};
let (exit_requested, mut ids_to_remove, add_tubes) =
process_vm_control_event(&mut state, id, socket)?;

View file

@ -25,8 +25,7 @@ use base::*;
use devices::serial_device::SerialHardware;
use devices::serial_device::SerialParameters;
use devices::serial_device::SerialType;
use devices::vfio::VfioCommonSetup;
use devices::vfio::VfioCommonTrait;
use devices::vfio::VfioContainerManager;
use devices::virtio;
use devices::virtio::block::DiskOption;
use devices::virtio::console::asynchronous::AsyncConsole;
@ -1331,8 +1330,10 @@ pub fn create_vfio_device(
coiommu_endpoints: Option<&mut Vec<u16>>,
iommu_dev: IommuDevType,
dt_symbol: Option<String>,
vfio_container_manager: &mut VfioContainerManager,
) -> DeviceResult<(VfioDeviceVariant, Option<Minijail>, Option<VfioWrapper>)> {
let vfio_container = VfioCommonSetup::vfio_get_container(iommu_dev, Some(vfio_path))
let vfio_container = vfio_container_manager
.get_container(iommu_dev, Some(vfio_path))
.context("failed to get vfio container")?;
let (vfio_host_tube_mem, vfio_device_tube_mem) =