mirror of
https://chromium.googlesource.com/crosvm/crosvm
synced 2025-02-09 03:57:24 +00:00
crosvm: improve suspend implementation
As part of commit cadc84b32a
the crosvm
suspend command has been extended to generating sleep button injection
in order to put VM in suspend state. Additionally the crosvm suspend
command completed after the guest entered proper power state.
Nevertheless in some circumstances above implementation caused issues.
E.g. when the guest with pass-through devices entered run-time device
suspension, triggering a subsequent system suspend request fails and
makes VM inaccessible. It is because before entering system suspension,
devices which entered device run-time suspension needs to be resumed.
Therefore in problematic scenario:
- VmRequest::Suspend was triggered and it resulted with blocking till
guest entered proper suspend state
- during VM suspension process some pci config write accesses are made,
which in some cases triggers write_msi_reg-> write_msi_capability->
add_msi_route, which underneath sends VmIrqRequest::AddMsiRoute
request
- above VmIrqRequest::AddMsiRoute request can't be satisfied since
VmRequest::Suspend made the run_control wait loop busy waiting and
preventing further requests to be processed
In order to fix the above issue, instead of busy waiting directly in the
run_control wait loop for suspend notification, spawn a new thread, which
will generate the acpi power button and start busy waiting for a
notification that the VM actually entered suspend state. This allows to
postpone sending response over the control tube indicating that the
'crosvm suspend ...' command completed and in the same time process
other Vm*Requests.
Mentioned postponement of 'crosvm suspend ..' completion is crucial
since the full system suspension needs to be blocked till VM actually
enters proper suspend state.
BUG=b:248462336
TEST=With a setup where some device is pass-through to the VM and
--force-s2idle flag is used: wait till pass-through device enters
run-time suspend state and trigger "crosvm suspend
/run/vm/vm.<hash>/crosvm.sock" followed by "crosvm resume ..". Also make
sure that full system suspension/resumption doesn't make VM inaccessible
anymore.
Change-Id: Ic23461a78a62d2116cf5674c71d89f4f86ad96c3
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/3944915
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Commit-Queue: Grzegorz Jaszczyk <jaszczyk@google.com>
Reviewed-by: Noah Gold <nkgold@google.com>
This commit is contained in:
parent
14c9efb2b9
commit
8eae8e0d2d
5 changed files with 119 additions and 70 deletions
|
@ -198,7 +198,7 @@ pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
|
|||
pub pid_debug_label_map: BTreeMap<u32, String>,
|
||||
#[cfg(unix)]
|
||||
pub platform_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
|
||||
pub pm: Option<Arc<Mutex<dyn PmResource>>>,
|
||||
pub pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
|
||||
/// Devices to be notified before the system resumes from the S3 suspended state.
|
||||
pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
|
||||
pub root_config: Arc<Mutex<PciRoot>>,
|
||||
|
|
|
@ -29,7 +29,7 @@ pub enum BalloonTubeCommand {
|
|||
}
|
||||
|
||||
// BalloonStats holds stats returned from the stats_queue.
|
||||
#[derive(Default, Serialize, Deserialize, Debug)]
|
||||
#[derive(Default, Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct BalloonStats {
|
||||
pub swap_in: Option<u64>,
|
||||
pub swap_out: Option<u64>,
|
||||
|
|
|
@ -2214,6 +2214,46 @@ fn remove_hotplug_device<V: VmArch, Vcpu: VcpuArch>(
|
|||
))
|
||||
}
|
||||
|
||||
pub fn trigger_vm_suspend_and_wait_for_entry(
|
||||
guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>,
|
||||
tube: &SendTube,
|
||||
response: vm_control::VmResponse,
|
||||
suspend_evt: Event,
|
||||
pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
|
||||
) {
|
||||
let (lock, cvar) = &*guest_suspended_cvar;
|
||||
let mut guest_suspended = lock.lock();
|
||||
|
||||
*guest_suspended = false;
|
||||
|
||||
// During suspend also emulate sleepbtn, which allows to suspend VM (if running e.g. acpid and
|
||||
// reacts on sleep button events)
|
||||
if let Some(pm) = pm {
|
||||
pm.lock().slpbtn_evt();
|
||||
} else {
|
||||
error!("generating sleepbtn during suspend not supported");
|
||||
}
|
||||
|
||||
// Wait for notification about guest suspension, if not received after 15sec,
|
||||
// proceed anyway.
|
||||
let result = cvar.wait_timeout(guest_suspended, std::time::Duration::from_secs(15));
|
||||
guest_suspended = result.0;
|
||||
|
||||
if result.1.timed_out() {
|
||||
warn!("Guest suspension timeout - proceeding anyway");
|
||||
} else if *guest_suspended {
|
||||
info!("Guest suspended");
|
||||
}
|
||||
|
||||
if let Err(e) = suspend_evt.signal() {
|
||||
error!("failed to trigger suspend event: {}", e);
|
||||
}
|
||||
// Now we ready to send response over the tube and communicate that VM suspend has finished
|
||||
if let Err(e) = tube.send(&response) {
|
||||
error!("failed to send VmResponse: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
fn handle_hotplug_command<V: VmArch, Vcpu: VcpuArch>(
|
||||
linux: &mut RunnableLinuxVm<V, Vcpu>,
|
||||
|
@ -2610,6 +2650,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
|
|||
match socket {
|
||||
TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
|
||||
Ok(request) => {
|
||||
let mut suspend_requested = false;
|
||||
let mut run_mode_opt = None;
|
||||
let response = match request {
|
||||
VmRequest::HotPlugCommand { device, add } => {
|
||||
|
@ -2640,30 +2681,70 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
|
|||
VmResponse::Ok
|
||||
}
|
||||
}
|
||||
_ => request.execute(
|
||||
&mut run_mode_opt,
|
||||
#[cfg(feature = "balloon")]
|
||||
balloon_host_tube.as_ref(),
|
||||
#[cfg(feature = "balloon")]
|
||||
&mut balloon_stats_id,
|
||||
disk_host_tubes,
|
||||
&mut linux.pm,
|
||||
#[cfg(feature = "gpu")]
|
||||
&gpu_control_tube,
|
||||
#[cfg(feature = "usb")]
|
||||
Some(&usb_control_tube),
|
||||
#[cfg(not(feature = "usb"))]
|
||||
None,
|
||||
&mut linux.bat_control,
|
||||
&vcpu_handles,
|
||||
cfg.force_s2idle,
|
||||
guest_suspended_cvar.clone(),
|
||||
),
|
||||
_ => {
|
||||
let response = request.execute(
|
||||
&mut run_mode_opt,
|
||||
#[cfg(feature = "balloon")]
|
||||
balloon_host_tube.as_ref(),
|
||||
#[cfg(feature = "balloon")]
|
||||
&mut balloon_stats_id,
|
||||
disk_host_tubes,
|
||||
&mut linux.pm,
|
||||
#[cfg(feature = "gpu")]
|
||||
&gpu_control_tube,
|
||||
#[cfg(feature = "usb")]
|
||||
Some(&usb_control_tube),
|
||||
#[cfg(not(feature = "usb"))]
|
||||
None,
|
||||
&mut linux.bat_control,
|
||||
&vcpu_handles,
|
||||
cfg.force_s2idle,
|
||||
);
|
||||
|
||||
// For non s2idle guest suspension we are done
|
||||
if let VmRequest::Suspend = request {
|
||||
if cfg.force_s2idle {
|
||||
suspend_requested = true;
|
||||
|
||||
// Spawn s2idle wait thread.
|
||||
let send_tube =
|
||||
tube.try_clone_send_tube().unwrap();
|
||||
let suspend_evt =
|
||||
linux.suspend_evt.try_clone().unwrap();
|
||||
let guest_suspended_cvar =
|
||||
guest_suspended_cvar.clone();
|
||||
let delayed_response = response.clone();
|
||||
let pm = linux.pm.clone();
|
||||
|
||||
std::thread::Builder::new()
|
||||
.name("s2idle_wait".to_owned())
|
||||
.spawn(move || {
|
||||
trigger_vm_suspend_and_wait_for_entry(
|
||||
guest_suspended_cvar,
|
||||
&send_tube,
|
||||
delayed_response,
|
||||
suspend_evt,
|
||||
pm,
|
||||
)
|
||||
})
|
||||
.context(
|
||||
"failed to spawn s2idle_wait thread",
|
||||
)?;
|
||||
}
|
||||
}
|
||||
response
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = tube.send(&response) {
|
||||
error!("failed to send VmResponse: {}", e);
|
||||
// If suspend requested skip that step since it will be
|
||||
// performed by s2idle_wait thread when suspension actually
|
||||
// happens.
|
||||
if !suspend_requested {
|
||||
if let Err(e) = tube.send(&response) {
|
||||
error!("failed to send VmResponse: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(run_mode) = run_mode_opt {
|
||||
info!("control socket changed run mode to {}", run_mode);
|
||||
match run_mode {
|
||||
|
@ -2676,11 +2757,16 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
|
|||
dev.lock().resume_imminent();
|
||||
}
|
||||
}
|
||||
vcpu::kick_all_vcpus(
|
||||
&vcpu_handles,
|
||||
linux.irq_chip.as_irq_chip(),
|
||||
VcpuControl::RunState(other),
|
||||
);
|
||||
// If suspend requested skip that step since it
|
||||
// will be performed by s2idle_wait thread when
|
||||
// needed.
|
||||
if !suspend_requested {
|
||||
vcpu::kick_all_vcpus(
|
||||
&vcpu_handles,
|
||||
linux.irq_chip.as_irq_chip(),
|
||||
VcpuControl::RunState(other),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ pub enum GpuControlCommand {
|
|||
RemoveDisplays { display_ids: Vec<u32> },
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub enum GpuControlResult {
|
||||
DisplaysUpdated,
|
||||
DisplayList {
|
||||
|
|
|
@ -42,8 +42,6 @@ use balloon_control::BalloonTubeCommand;
|
|||
#[cfg(feature = "balloon")]
|
||||
use balloon_control::BalloonTubeResult;
|
||||
use base::error;
|
||||
use base::info;
|
||||
use base::warn;
|
||||
use base::with_as_descriptor;
|
||||
use base::AsRawDescriptor;
|
||||
use base::Error as SysError;
|
||||
|
@ -77,7 +75,6 @@ use rutabaga_gfx::RutabagaHandle;
|
|||
use rutabaga_gfx::VulkanInfo;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use sync::Condvar;
|
||||
use sync::Mutex;
|
||||
use sys::kill_handle;
|
||||
#[cfg(unix)]
|
||||
|
@ -235,7 +232,7 @@ impl UsbControlAttachedDevice {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub enum UsbControlResult {
|
||||
Ok { port: u8 },
|
||||
NoAvailablePort,
|
||||
|
@ -682,7 +679,7 @@ pub enum VmIrqResponse {
|
|||
Err(SysError),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub enum BatControlResult {
|
||||
Ok,
|
||||
NoBatDevice,
|
||||
|
@ -989,35 +986,6 @@ fn map_descriptor(
|
|||
}
|
||||
}
|
||||
|
||||
fn generate_sleep_button_event(
|
||||
pm: &mut Option<Arc<Mutex<dyn PmResource>>>,
|
||||
guest_suspended_cvar: &Arc<(Mutex<bool>, Condvar)>,
|
||||
) {
|
||||
// During suspend also emulate sleepbtn, which allows to suspend VM (if running e.g. acpid and
|
||||
// reacts on sleep button events)
|
||||
if let Some(pm) = pm {
|
||||
pm.lock().slpbtn_evt();
|
||||
} else {
|
||||
error!("generating sleepbtn during suspend not supported");
|
||||
}
|
||||
|
||||
let (lock, cvar) = &**guest_suspended_cvar;
|
||||
let mut guest_suspended = lock.lock();
|
||||
|
||||
*guest_suspended = false;
|
||||
|
||||
// Wait for notification about guest suspension, if not received after 15sec,
|
||||
// proceed anyway.
|
||||
let result = cvar.wait_timeout(guest_suspended, std::time::Duration::from_secs(15));
|
||||
guest_suspended = result.0;
|
||||
|
||||
if result.1.timed_out() {
|
||||
warn!("Guest suspension timeout - proceeding anyway");
|
||||
} else if *guest_suspended {
|
||||
info!("Guest suspended");
|
||||
}
|
||||
}
|
||||
|
||||
impl VmRequest {
|
||||
/// Executes this request on the given Vm and other mutable state.
|
||||
///
|
||||
|
@ -1030,13 +998,12 @@ impl VmRequest {
|
|||
#[cfg(feature = "balloon")] balloon_host_tube: Option<&Tube>,
|
||||
#[cfg(feature = "balloon")] balloon_stats_id: &mut u64,
|
||||
disk_host_tubes: &[Tube],
|
||||
pm: &mut Option<Arc<Mutex<dyn PmResource>>>,
|
||||
pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
|
||||
#[cfg(feature = "gpu")] gpu_control_tube: &Tube,
|
||||
usb_control_tube: Option<&Tube>,
|
||||
bat_control: &mut Option<BatControl>,
|
||||
vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<VcpuControl>)],
|
||||
force_s2idle: bool,
|
||||
guest_suspended_cvar: Arc<(Mutex<bool>, Condvar)>,
|
||||
) -> VmResponse {
|
||||
match *self {
|
||||
VmRequest::Exit => {
|
||||
|
@ -1062,10 +1029,6 @@ impl VmRequest {
|
|||
}
|
||||
}
|
||||
VmRequest::Suspend => {
|
||||
if force_s2idle {
|
||||
generate_sleep_button_event(pm, &guest_suspended_cvar);
|
||||
}
|
||||
|
||||
*run_mode = Some(VmRunMode::Suspending);
|
||||
VmResponse::Ok
|
||||
}
|
||||
|
@ -1246,7 +1209,7 @@ impl VmRequest {
|
|||
/// Indication of success or failure of a `VmRequest`.
|
||||
///
|
||||
/// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub enum VmResponse {
|
||||
/// Indicates the request was executed successfully.
|
||||
Ok,
|
||||
|
|
Loading…
Reference in a new issue