Revert "crosvm: remove balloon stats request timeout"

This reverts commit 56c0d02760.

Reason for revert: Break ARCVM after suspend/resume

BUG=b:234067421
TEST=Checked that ARCVM kept working after suspend/resume

Original change's description:
> crosvm: remove balloon stats request timeout
>
> It was done to avoid deadlock when stats are requested before guest is
> up. Implement a stub BalloonStats::NotReady replier until host is up so
> that timeout is no longer necessary.
>
> BUG=b:232289535
> TEST=few tast crostini/arc tests
>
> Change-Id: I6731b4ee9eaecdd65aebdd3f530f0932b0660c85
> Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3652887
> Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
> Tested-by: kokoro <noreply+kokoro@google.com>
> Commit-Queue: Anton Romanov <romanton@google.com>
> Auto-Submit: Anton Romanov <romanton@google.com>

Bug: b:232289535
Change-Id: Iffafaef1e1136ebc050d69d001d384eedd0a2319
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3686789
Commit-Queue: Keiichi Watanabe <keiichiw@chromium.org>
Auto-Submit: Hikaru Nishida <hikalium@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Keiichi Watanabe <keiichiw@chromium.org>
Reviewed-by: Hikaru Nishida <hikalium@chromium.org>
This commit is contained in:
Hikaru Nishida 2022-06-02 10:07:57 +00:00 committed by Chromeos LUCI
parent d160cd22cf
commit 4f5ba4bd28
4 changed files with 22 additions and 97 deletions

View file

@ -55,7 +55,4 @@ pub enum BalloonTubeResult {
Adjusted {
num_bytes: u64,
},
NotReady {
id: u64,
},
}

View file

@ -9,17 +9,14 @@ use std::rc::Rc;
use std::sync::Arc;
use std::thread;
use futures::{
channel::{mpsc, oneshot},
pin_mut, StreamExt,
};
use futures::{channel::mpsc, pin_mut, StreamExt};
use remain::sorted;
use thiserror::Error as ThisError;
use balloon_control::{BalloonStats, BalloonTubeCommand, BalloonTubeResult};
use base::{self, error, trace, warn, AsRawDescriptor, Event, RawDescriptor, Tube};
use base::{self, error, warn, AsRawDescriptor, Event, RawDescriptor, Tube};
use cros_async::{
block_on, select2, select6, select7, sync::Mutex as AsyncMutex, AsyncTube, EventAsync, Executor,
block_on, select6, select7, sync::Mutex as AsyncMutex, AsyncTube, EventAsync, Executor,
};
use data_model::{DataInit, Le16, Le32, Le64};
use vm_memory::{GuestAddress, GuestMemory};
@ -39,9 +36,6 @@ pub enum BalloonError {
/// Failed to create async message receiver.
#[error("failed to create async message receiver: {0}")]
CreatingMessageReceiver(base::TubeError),
/// IO Error.
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
/// Failed to receive command message.
#[error("failed to receive command message: {0}")]
ReceivingCommand(base::TubeError),
@ -52,7 +46,6 @@ pub enum BalloonError {
#[error("failed to write config event: {0}")]
WritingConfigEvent(base::Error),
}
pub type Result<T> = std::result::Result<T, BalloonError>;
// Balloon implements four virt IO queues: Inflate, Deflate, Stats, Event.
@ -309,7 +302,6 @@ async fn handle_stats_queue(
}
Ok(d) => d.index,
};
loop {
// Wait for a request to read the stats.
let id = match stats_rx.next().await {
@ -454,30 +446,6 @@ async fn handle_command_tube(
}
}
// Stub worker thread to reply with `NotReady`
fn run_stub_worker(signal: oneshot::Receiver<()>, command_tube: Tube) -> Tube {
trace!("spawning ballon_stats stub worker");
let ex = Executor::new().unwrap();
let command_tube = AsyncTube::new(&ex, command_tube).unwrap();
{
let stub_replier = async {
loop {
if let Ok(BalloonTubeCommand::Stats { id }) = command_tube.next().await {
trace!("Sending not ready response to balloon stats request");
let res = BalloonTubeResult::NotReady { id };
if let Err(e) = command_tube.send(res).await {
error!("failed to send stats result: {}", e);
}
}
}
};
pin_mut!(stub_replier);
let _ = ex.run_until(select2(signal, stub_replier));
}
command_tube.into()
}
// The main worker thread. Initialized the asynchronous worker tasks and passes them to the executor
// to be processed.
fn run_worker(
@ -590,15 +558,13 @@ fn run_worker(
/// Virtio device for memory balloon inflation/deflation.
pub struct Balloon {
command_tube: Option<Tube>,
command_tube: Tube,
inflate_tube: Option<Tube>,
state: Arc<AsyncMutex<BalloonState>>,
features: u64,
acked_features: u64,
kill_evt: Option<Event>,
worker_thread: Option<thread::JoinHandle<Option<Tube>>>,
stub_worker_thread: Option<thread::JoinHandle<Tube>>,
stub_signal: Option<oneshot::Sender<()>>,
}
/// Operation mode of the balloon.
@ -634,7 +600,7 @@ impl Balloon {
};
Ok(Balloon {
command_tube: Some(command_tube),
command_tube,
inflate_tube,
state: Arc::new(AsyncMutex::new(BalloonState {
num_pages: (init_balloon_size >> VIRTIO_BALLOON_PFN_SHIFT) as u32,
@ -643,10 +609,8 @@ impl Balloon {
})),
kill_evt: None,
worker_thread: None,
stub_worker_thread: None,
features,
acked_features: 0,
stub_signal: None,
})
}
@ -678,11 +642,11 @@ impl Drop for Balloon {
impl VirtioDevice for Balloon {
fn keep_rds(&self) -> Vec<RawDescriptor> {
self.command_tube
.iter()
.chain(self.inflate_tube.iter())
.map(AsRawDescriptor::as_raw_descriptor)
.collect()
let mut rds = vec![self.command_tube.as_raw_descriptor()];
if let Some(inflate_tube) = &self.inflate_tube {
rds.push(inflate_tube.as_raw_descriptor());
}
rds
}
fn device_type(&self) -> DeviceType {
@ -705,12 +669,8 @@ impl VirtioDevice for Balloon {
if state.failable_update && state.actual_pages == state.num_pages {
state.failable_update = false;
if let Some(ref command_tube) = self.command_tube {
if let Err(e) = send_adjusted_response(command_tube, state.num_pages) {
error!("Failed to send response {:?}", e);
}
} else {
panic!("Command tube missing!");
if let Err(e) = send_adjusted_response(&self.command_tube, state.num_pages) {
error!("Failed to send response {:?}", e);
}
}
}
@ -727,21 +687,6 @@ impl VirtioDevice for Balloon {
self.acked_features |= value;
}
fn on_device_sandboxed(&mut self) {
if let Some(command_tube) = self.command_tube.take() {
let (tx, rx) = oneshot::channel();
let worker_stub_thread = thread::Builder::new()
.name("virtio_balloon_stub".to_string())
.spawn(move || run_stub_worker(rx, command_tube))
.expect("Failed to spawn balloon stub worker thread");
let _ = self.stub_worker_thread.insert(worker_stub_thread);
let _ = self.stub_signal.insert(tx);
} else {
panic!("Command tube missing!");
}
}
fn activate(
&mut self,
mem: GuestMemory,
@ -749,13 +694,6 @@ impl VirtioDevice for Balloon {
queues: Vec<Queue>,
queue_evts: Vec<Event>,
) {
// Kill stub thread
std::mem::drop(self.stub_signal.take());
if let Some(Ok(handle)) = self.stub_worker_thread.take().map(thread::JoinHandle::join) {
let _ = self.command_tube.insert(handle);
}
let expected_queues = if self.event_queue_enabled() { 4 } else { 3 };
if queues.len() != expected_queues || queue_evts.len() != expected_queues {
return;
@ -771,18 +709,12 @@ impl VirtioDevice for Balloon {
self.kill_evt = Some(self_kill_evt);
let state = self.state.clone();
let command_tube = match self.command_tube {
Some(ref tube) => tube,
None => {
panic!("Command tube missing!");
}
};
#[allow(deprecated)]
let command_tube = match command_tube.try_clone() {
let command_tube = match self.command_tube.try_clone() {
Ok(tube) => tube,
Err(e) => {
panic!("failed to clone command tube {:?}", e);
error!("failed to clone command tube {:?}", e);
return;
}
};
let inflate_tube = self.inflate_tube.take();
@ -803,7 +735,7 @@ impl VirtioDevice for Balloon {
match worker_result {
Err(e) => {
panic!("failed to spawn virtio_balloon worker: {}", e);
error!("failed to spawn virtio_balloon worker: {}", e);
}
Ok(join_handle) => {
self.worker_thread = Some(join_handle);

View file

@ -17,6 +17,7 @@ use std::os::unix::prelude::OpenOptionsExt;
use std::path::Path;
use std::str::FromStr;
use std::sync::{mpsc, Arc, Barrier};
use std::time::Duration;
use std::process;
#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
@ -1154,6 +1155,10 @@ where
// Balloon gets a special socket so balloon requests can be forwarded
// from the main process.
let (host, device) = Tube::pair().context("failed to create tube")?;
// Set recv timeout to avoid deadlock on sending BalloonControlCommand
// before the guest is ready.
host.set_recv_timeout(Some(Duration::from_millis(100)))
.context("failed to set timeout")?;
(Some(host), Some(device))
}
} else {

View file

@ -36,7 +36,7 @@ pub use balloon_control::BalloonStats;
use balloon_control::{BalloonTubeCommand, BalloonTubeResult};
use base::{
error, info, trace, warn, with_as_descriptor, AsRawDescriptor, Error as SysError, Event,
error, info, warn, with_as_descriptor, AsRawDescriptor, Error as SysError, Event,
ExternalMapping, FromRawDescriptor, IntoRawDescriptor, Killable, MappedRegion,
MemoryMappingArena, MemoryMappingBuilder, MemoryMappingBuilderUnix, MmapError, Protection,
Result, SafeDescriptor, SharedMemory, Tube, SIGRTMIN,
@ -1181,15 +1181,6 @@ impl VmRequest {
balloon_actual,
};
}
Ok(BalloonTubeResult::NotReady { id }) => {
if sent_id != id {
trace!("Wrong id for balloon stats");
// Keep trying to get the fresh stats.
continue;
}
warn!("balloon device not ready");
break VmResponse::Err(SysError::new(libc::EAGAIN));
}
Err(e) => {
error!("balloon socket recv failed: {}", e);
break VmResponse::Err(SysError::last());