mirror of
https://chromium.googlesource.com/crosvm/crosvm
synced 2025-02-09 20:04:20 +00:00
gpu: remove virtio-gpu event trigger upon descriptor hang-up
Prevent runaway CPU usage and rapid power consumption when an event source unexpectedly hangs-up by removing the descriptor from WaitContext. BUG=b:250923109 TEST=CQ TEST=glxgears in crosvm linux guest Change-Id: Idbf4becd7c3195cb57c0e780c0957201c3d9ba9f Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/3928618 Reviewed-by: Yiwei Zhang <zzyiwei@chromium.org> Reviewed-by: Daniel Verkamp <dverkamp@chromium.org> Commit-Queue: Ryan Neph <ryanneph@google.com> Reviewed-by: Gurchetan Singh <gurchetansingh@chromium.org>
This commit is contained in:
parent
e91b0fd604
commit
21c739216e
1 changed files with 79 additions and 22 deletions
|
@ -26,7 +26,7 @@ use base::AsRawDescriptor;
|
|||
use base::Event;
|
||||
use base::EventToken;
|
||||
use base::RawDescriptor;
|
||||
#[cfg(feature = "virgl_renderer_next")]
|
||||
use base::Result;
|
||||
use base::SafeDescriptor;
|
||||
use base::SendTube;
|
||||
use base::Tube;
|
||||
|
@ -742,7 +742,7 @@ impl Frontend {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(EventToken)]
|
||||
#[derive(EventToken, PartialEq, Clone, Copy, Debug)]
|
||||
enum WorkerToken {
|
||||
CtrlQueue,
|
||||
CursorQueue,
|
||||
|
@ -753,6 +753,48 @@ enum WorkerToken {
|
|||
VirtioGpuPoll,
|
||||
}
|
||||
|
||||
struct EventManager<'a> {
|
||||
pub wait_ctx: WaitContext<WorkerToken>,
|
||||
events: Vec<(&'a dyn AsRawDescriptor, WorkerToken)>,
|
||||
}
|
||||
|
||||
impl<'a> EventManager<'a> {
|
||||
pub fn new() -> Result<EventManager<'a>> {
|
||||
Ok(EventManager {
|
||||
wait_ctx: WaitContext::new()?,
|
||||
events: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_with(
|
||||
triggers: &[(&'a dyn AsRawDescriptor, WorkerToken)],
|
||||
) -> Result<EventManager<'a>> {
|
||||
let mut manager = EventManager::new()?;
|
||||
manager.wait_ctx.add_many(triggers)?;
|
||||
|
||||
for (descriptor, token) in triggers {
|
||||
manager.events.push((*descriptor, *token));
|
||||
}
|
||||
Ok(manager)
|
||||
}
|
||||
|
||||
pub fn add(&mut self, descriptor: &'a dyn AsRawDescriptor, token: WorkerToken) -> Result<()> {
|
||||
self.wait_ctx.add(descriptor, token)?;
|
||||
self.events.push((descriptor, token));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn delete(&mut self, token: WorkerToken) {
|
||||
self.events.retain(|event| {
|
||||
if event.1 == token {
|
||||
self.wait_ctx.delete(event.0).ok();
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
struct Worker {
|
||||
interrupt: Arc<Interrupt>,
|
||||
exit_evt_wrtube: SendTube,
|
||||
|
@ -768,36 +810,50 @@ struct Worker {
|
|||
|
||||
impl Worker {
|
||||
fn run(&mut self) {
|
||||
let mut wait_ctx: WaitContext<WorkerToken> = match WaitContext::build_with(&[
|
||||
let display_desc =
|
||||
match SafeDescriptor::try_from(&*self.state.display().borrow() as &dyn AsRawDescriptor)
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
error!("failed getting event descriptor for display: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut event_manager = match EventManager::build_with(&[
|
||||
(&self.ctrl_evt, WorkerToken::CtrlQueue),
|
||||
(&self.cursor_evt, WorkerToken::CursorQueue),
|
||||
(&*self.state.display().borrow(), WorkerToken::Display),
|
||||
(&display_desc, WorkerToken::Display),
|
||||
(&self.kill_evt, WorkerToken::Kill),
|
||||
]) {
|
||||
Ok(pc) => pc,
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
error!("failed creating WaitContext: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(resample_evt) = self.interrupt.get_resample_evt() {
|
||||
if wait_ctx
|
||||
.add(resample_evt, WorkerToken::InterruptResample)
|
||||
.is_err()
|
||||
{
|
||||
error!("failed creating WaitContext");
|
||||
if let Err(e) = event_manager.add(resample_evt, WorkerToken::InterruptResample) {
|
||||
error!(
|
||||
"failed adding interrupt resample event to WaitContext: {}",
|
||||
e
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(poll_desc) = self.state.virtio_gpu.poll_descriptor() {
|
||||
if let Err(e) = wait_ctx.add(&poll_desc, WorkerToken::VirtioGpuPoll) {
|
||||
error!("failed adding poll eventfd to WaitContext: {}", e);
|
||||
let poll_desc: SafeDescriptor;
|
||||
if let Some(desc) = self.state.virtio_gpu.poll_descriptor() {
|
||||
poll_desc = desc;
|
||||
if let Err(e) = event_manager.add(&poll_desc, WorkerToken::VirtioGpuPoll) {
|
||||
error!("failed adding poll event to WaitContext: {}", e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.resource_bridges.add_to_wait_context(&mut wait_ctx);
|
||||
self.resource_bridges
|
||||
.add_to_wait_context(&mut event_manager.wait_ctx);
|
||||
|
||||
// TODO(davidriley): The entire main loop processing is somewhat racey and incorrect with
|
||||
// respect to cursor vs control queue processing. As both currently and originally
|
||||
|
@ -808,7 +864,7 @@ impl Worker {
|
|||
// isn't used so this isn't a huge issue.
|
||||
|
||||
'wait: loop {
|
||||
let events = match wait_ctx.wait() {
|
||||
let events = match event_manager.wait_ctx.wait() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
error!("failed polling for events: {}", e);
|
||||
|
@ -819,13 +875,14 @@ impl Worker {
|
|||
let mut signal_used_ctrl = false;
|
||||
let mut ctrl_available = false;
|
||||
|
||||
// This display isn't typically used when the virt-wl device is available and it can
|
||||
// lead to hung fds (crbug.com/1027379). Disable if it's hung.
|
||||
// Remove event triggers that have been hung-up to prevent unnecessary worker wake-ups
|
||||
// (see b/244486346#comment62 for context).
|
||||
for event in events.iter().filter(|e| e.is_hungup) {
|
||||
if let WorkerToken::Display = event.token {
|
||||
error!("default display hang-up detected");
|
||||
let _ = wait_ctx.delete(&*self.state.display().borrow());
|
||||
}
|
||||
error!(
|
||||
"unhandled virtio-gpu worker event hang-up detected: {:?}",
|
||||
event.token
|
||||
);
|
||||
event_manager.delete(event.token);
|
||||
}
|
||||
|
||||
for event in events.iter().filter(|e| e.is_readable) {
|
||||
|
@ -880,7 +937,7 @@ impl Worker {
|
|||
// and the resource bridge request come in at the same time after the control queue is
|
||||
// processed above and before the corresponding bridge is processed below.
|
||||
self.resource_bridges
|
||||
.process_resource_bridges(&mut self.state, &mut wait_ctx);
|
||||
.process_resource_bridges(&mut self.state, &mut event_manager.wait_ctx);
|
||||
|
||||
if signal_used_ctrl {
|
||||
self.ctrl_queue.signal_used(&self.mem);
|
||||
|
|
Loading…
Reference in a new issue