From 4f5ba4bd281716dd94b4d59a02499535fc6ccb1c Mon Sep 17 00:00:00 2001 From: Hikaru Nishida Date: Thu, 2 Jun 2022 10:07:57 +0000 Subject: [PATCH] Revert "crosvm: remove balloon stats request timeout" This reverts commit 56c0d027603218dd1e3d4859cc4c2024ba891e5e. Reason for revert: Break ARCVM after suspend/resume BUG=b:234067421 TEST=Checked that ARCVM kept working after suspend/resume Original change's description: > crosvm: remove balloon stats request timeout > > It was done to avoid deadlock when stats are requested before guest is > up. Implement a stub BalloonStats::NotReady replier until host is up so > that timeout is no longer necessary. > > BUG=b:232289535 > TEST=few tast crostini/arc tests > > Change-Id: I6731b4ee9eaecdd65aebdd3f530f0932b0660c85 > Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3652887 > Reviewed-by: Daniel Verkamp > Tested-by: kokoro > Commit-Queue: Anton Romanov > Auto-Submit: Anton Romanov Bug: b:232289535 Change-Id: Iffafaef1e1136ebc050d69d001d384eedd0a2319 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3686789 Commit-Queue: Keiichi Watanabe Auto-Submit: Hikaru Nishida Tested-by: kokoro Reviewed-by: Keiichi Watanabe Reviewed-by: Hikaru Nishida --- common/balloon_control/src/lib.rs | 3 - devices/src/virtio/balloon.rs | 100 +++++------------------------- src/linux/mod.rs | 5 ++ vm_control/src/lib.rs | 11 +--- 4 files changed, 22 insertions(+), 97 deletions(-) diff --git a/common/balloon_control/src/lib.rs b/common/balloon_control/src/lib.rs index 9372329e7e..0ea428302d 100644 --- a/common/balloon_control/src/lib.rs +++ b/common/balloon_control/src/lib.rs @@ -55,7 +55,4 @@ pub enum BalloonTubeResult { Adjusted { num_bytes: u64, }, - NotReady { - id: u64, - }, } diff --git a/devices/src/virtio/balloon.rs b/devices/src/virtio/balloon.rs index f0c1fdeb47..bbc4461e2c 100644 --- a/devices/src/virtio/balloon.rs +++ b/devices/src/virtio/balloon.rs @@ -9,17 +9,14 @@ use std::rc::Rc; use std::sync::Arc; use std::thread; -use futures::{ - channel::{mpsc, oneshot}, - pin_mut, StreamExt, -}; +use futures::{channel::mpsc, pin_mut, StreamExt}; use remain::sorted; use thiserror::Error as ThisError; use balloon_control::{BalloonStats, BalloonTubeCommand, BalloonTubeResult}; -use base::{self, error, trace, warn, AsRawDescriptor, Event, RawDescriptor, Tube}; +use base::{self, error, warn, AsRawDescriptor, Event, RawDescriptor, Tube}; use cros_async::{ - block_on, select2, select6, select7, sync::Mutex as AsyncMutex, AsyncTube, EventAsync, Executor, + block_on, select6, select7, sync::Mutex as AsyncMutex, AsyncTube, EventAsync, Executor, }; use data_model::{DataInit, Le16, Le32, Le64}; use vm_memory::{GuestAddress, GuestMemory}; @@ -39,9 +36,6 @@ pub enum BalloonError { /// Failed to create async message receiver. #[error("failed to create async message receiver: {0}")] CreatingMessageReceiver(base::TubeError), - /// IO Error. - #[error("IO error: {0}")] - IoError(#[from] std::io::Error), /// Failed to receive command message. #[error("failed to receive command message: {0}")] ReceivingCommand(base::TubeError), @@ -52,7 +46,6 @@ pub enum BalloonError { #[error("failed to write config event: {0}")] WritingConfigEvent(base::Error), } - pub type Result = std::result::Result; // Balloon implements four virt IO queues: Inflate, Deflate, Stats, Event. @@ -309,7 +302,6 @@ async fn handle_stats_queue( } Ok(d) => d.index, }; - loop { // Wait for a request to read the stats. let id = match stats_rx.next().await { @@ -454,30 +446,6 @@ async fn handle_command_tube( } } -// Stub worker thread to reply with `NotReady` -fn run_stub_worker(signal: oneshot::Receiver<()>, command_tube: Tube) -> Tube { - trace!("spawning ballon_stats stub worker"); - - let ex = Executor::new().unwrap(); - let command_tube = AsyncTube::new(&ex, command_tube).unwrap(); - { - let stub_replier = async { - loop { - if let Ok(BalloonTubeCommand::Stats { id }) = command_tube.next().await { - trace!("Sending not ready response to balloon stats request"); - let res = BalloonTubeResult::NotReady { id }; - if let Err(e) = command_tube.send(res).await { - error!("failed to send stats result: {}", e); - } - } - } - }; - pin_mut!(stub_replier); - let _ = ex.run_until(select2(signal, stub_replier)); - } - command_tube.into() -} - // The main worker thread. Initialized the asynchronous worker tasks and passes them to the executor // to be processed. fn run_worker( @@ -590,15 +558,13 @@ fn run_worker( /// Virtio device for memory balloon inflation/deflation. pub struct Balloon { - command_tube: Option, + command_tube: Tube, inflate_tube: Option, state: Arc>, features: u64, acked_features: u64, kill_evt: Option, worker_thread: Option>>, - stub_worker_thread: Option>, - stub_signal: Option>, } /// Operation mode of the balloon. @@ -634,7 +600,7 @@ impl Balloon { }; Ok(Balloon { - command_tube: Some(command_tube), + command_tube, inflate_tube, state: Arc::new(AsyncMutex::new(BalloonState { num_pages: (init_balloon_size >> VIRTIO_BALLOON_PFN_SHIFT) as u32, @@ -643,10 +609,8 @@ impl Balloon { })), kill_evt: None, worker_thread: None, - stub_worker_thread: None, features, acked_features: 0, - stub_signal: None, }) } @@ -678,11 +642,11 @@ impl Drop for Balloon { impl VirtioDevice for Balloon { fn keep_rds(&self) -> Vec { - self.command_tube - .iter() - .chain(self.inflate_tube.iter()) - .map(AsRawDescriptor::as_raw_descriptor) - .collect() + let mut rds = vec![self.command_tube.as_raw_descriptor()]; + if let Some(inflate_tube) = &self.inflate_tube { + rds.push(inflate_tube.as_raw_descriptor()); + } + rds } fn device_type(&self) -> DeviceType { @@ -705,12 +669,8 @@ impl VirtioDevice for Balloon { if state.failable_update && state.actual_pages == state.num_pages { state.failable_update = false; - if let Some(ref command_tube) = self.command_tube { - if let Err(e) = send_adjusted_response(command_tube, state.num_pages) { - error!("Failed to send response {:?}", e); - } - } else { - panic!("Command tube missing!"); + if let Err(e) = send_adjusted_response(&self.command_tube, state.num_pages) { + error!("Failed to send response {:?}", e); } } } @@ -727,21 +687,6 @@ impl VirtioDevice for Balloon { self.acked_features |= value; } - fn on_device_sandboxed(&mut self) { - if let Some(command_tube) = self.command_tube.take() { - let (tx, rx) = oneshot::channel(); - - let worker_stub_thread = thread::Builder::new() - .name("virtio_balloon_stub".to_string()) - .spawn(move || run_stub_worker(rx, command_tube)) - .expect("Failed to spawn balloon stub worker thread"); - let _ = self.stub_worker_thread.insert(worker_stub_thread); - let _ = self.stub_signal.insert(tx); - } else { - panic!("Command tube missing!"); - } - } - fn activate( &mut self, mem: GuestMemory, @@ -749,13 +694,6 @@ impl VirtioDevice for Balloon { queues: Vec, queue_evts: Vec, ) { - // Kill stub thread - std::mem::drop(self.stub_signal.take()); - - if let Some(Ok(handle)) = self.stub_worker_thread.take().map(thread::JoinHandle::join) { - let _ = self.command_tube.insert(handle); - } - let expected_queues = if self.event_queue_enabled() { 4 } else { 3 }; if queues.len() != expected_queues || queue_evts.len() != expected_queues { return; @@ -771,18 +709,12 @@ impl VirtioDevice for Balloon { self.kill_evt = Some(self_kill_evt); let state = self.state.clone(); - let command_tube = match self.command_tube { - Some(ref tube) => tube, - None => { - panic!("Command tube missing!"); - } - }; - #[allow(deprecated)] - let command_tube = match command_tube.try_clone() { + let command_tube = match self.command_tube.try_clone() { Ok(tube) => tube, Err(e) => { - panic!("failed to clone command tube {:?}", e); + error!("failed to clone command tube {:?}", e); + return; } }; let inflate_tube = self.inflate_tube.take(); @@ -803,7 +735,7 @@ impl VirtioDevice for Balloon { match worker_result { Err(e) => { - panic!("failed to spawn virtio_balloon worker: {}", e); + error!("failed to spawn virtio_balloon worker: {}", e); } Ok(join_handle) => { self.worker_thread = Some(join_handle); diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 2d9199c689..1561e6da1d 100644 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -17,6 +17,7 @@ use std::os::unix::prelude::OpenOptionsExt; use std::path::Path; use std::str::FromStr; use std::sync::{mpsc, Arc, Barrier}; +use std::time::Duration; use std::process; #[cfg(all(target_arch = "x86_64", feature = "gdb"))] @@ -1154,6 +1155,10 @@ where // Balloon gets a special socket so balloon requests can be forwarded // from the main process. let (host, device) = Tube::pair().context("failed to create tube")?; + // Set recv timeout to avoid deadlock on sending BalloonControlCommand + // before the guest is ready. + host.set_recv_timeout(Some(Duration::from_millis(100))) + .context("failed to set timeout")?; (Some(host), Some(device)) } } else { diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs index 3d99b4f300..550c1ad5bb 100644 --- a/vm_control/src/lib.rs +++ b/vm_control/src/lib.rs @@ -36,7 +36,7 @@ pub use balloon_control::BalloonStats; use balloon_control::{BalloonTubeCommand, BalloonTubeResult}; use base::{ - error, info, trace, warn, with_as_descriptor, AsRawDescriptor, Error as SysError, Event, + error, info, warn, with_as_descriptor, AsRawDescriptor, Error as SysError, Event, ExternalMapping, FromRawDescriptor, IntoRawDescriptor, Killable, MappedRegion, MemoryMappingArena, MemoryMappingBuilder, MemoryMappingBuilderUnix, MmapError, Protection, Result, SafeDescriptor, SharedMemory, Tube, SIGRTMIN, @@ -1181,15 +1181,6 @@ impl VmRequest { balloon_actual, }; } - Ok(BalloonTubeResult::NotReady { id }) => { - if sent_id != id { - trace!("Wrong id for balloon stats"); - // Keep trying to get the fresh stats. - continue; - } - warn!("balloon device not ready"); - break VmResponse::Err(SysError::new(libc::EAGAIN)); - } Err(e) => { error!("balloon socket recv failed: {}", e); break VmResponse::Err(SysError::last());