From 0e3d4b647a9ffc751b3f109fe4d4eec3e7fe51c5 Mon Sep 17 00:00:00 2001 From: Charles William Dick Date: Mon, 14 Dec 2020 12:16:46 +0900 Subject: [PATCH] crosvm make balloon logic more fair Introduces new struct/impl BalloonPolicy that manages the size of the virtio balloon. Instead of the balloon balancing available memory between the host and guest, available memory above the critical margin is balanced. Where critical margin is the level that the host/guest starts reclaiming memory or killing processes. See comments above impl BalloonPolicy for explanation. In addition, balloon size dependent bias is introduced to encourage large balloons to deflate, and small balloons to inflate. This bias is needed for multivm.LifecycleShifting.arc_host to pass, as sometimes without the bias the balloon is inflated so much that the first App in a streak times out when launching. The bias also reduces variance in multivm.Lifecycle.arc_host. BUG=b:172870597 TEST=tast run dut mutivm.Lifecycle.* multivm.LifecycleShifting.* x10 Change-Id: I241e470aece56e3dc53fe59f833a1f7f4f684299 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2596293 Tested-by: Charles Dueck Tested-by: kokoro Reviewed-by: Chirantan Ekbote Commit-Queue: Charles Dueck --- src/crosvm.rs | 2 + src/linux.rs | 301 +++++++++++++++++++++++++++++++++++++------------- src/main.rs | 13 +++ 3 files changed, 240 insertions(+), 76 deletions(-) diff --git a/src/crosvm.rs b/src/crosvm.rs index 1074a6e253..04e267d6cc 100644 --- a/src/crosvm.rs +++ b/src/crosvm.rs @@ -226,6 +226,7 @@ pub struct Config { pub battery_type: Option, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] pub gdb: Option, + pub balloon_bias: i64, } impl Default for Config { @@ -284,6 +285,7 @@ impl Default for Config { battery_type: None, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] gdb: None, + balloon_bias: 0, } } } diff --git a/src/linux.rs b/src/linux.rs index b7fecb5409..f452fef3f2 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -use std::cmp::{max, Reverse}; +use std::cmp::{max, min, Reverse}; use std::convert::TryFrom; #[cfg(feature = "gpu")] use std::env; @@ -61,12 +61,13 @@ use base::{ }; use vm_control::{ BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket, - BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, - DiskControlResult, FsMappingRequest, FsMappingRequestSocket, FsMappingResponseSocket, IrqSetup, - UsbControlSocket, VcpuControl, VmControlResponseSocket, VmIrqRequest, VmIrqRequestSocket, - VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket, - VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, VmMsyncRequest, - VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmResponse, VmRunMode, + BalloonControlResult, BalloonStats, DiskControlCommand, DiskControlRequestSocket, + DiskControlResponseSocket, DiskControlResult, FsMappingRequest, FsMappingRequestSocket, + FsMappingResponseSocket, IrqSetup, UsbControlSocket, VcpuControl, VmControlResponseSocket, + VmIrqRequest, VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket, + VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, + VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmResponse, + VmRunMode, }; #[cfg(all(target_arch = "x86_64", feature = "gdb"))] use vm_control::{VcpuDebug, VcpuDebugStatus, VcpuDebugStatusMessage, VmRequest}; @@ -101,6 +102,7 @@ pub enum Error { AddPmemDeviceMemory(base::Error), AllocateGpuDeviceAddress, AllocatePmemDeviceAddress(resources::Error), + BalloonActualTooLarge, BalloonDeviceNew(virtio::BalloonError), BlockDeviceNew(base::Error), BlockSignal(base::signal::Error), @@ -131,6 +133,10 @@ pub enum Error { FsDeviceNew(virtio::fs::Error), GetMaxOpenFiles(io::Error), GetSignalMask(signal::Error), + GuestCachedMissing(), + GuestCachedTooLarge(std::num::TryFromIntError), + GuestFreeMissing(), + GuestFreeTooLarge(std::num::TryFromIntError), #[cfg(all(target_arch = "x86_64", feature = "gdb"))] HandleDebugCommand(::Error), InputDeviceNew(virtio::InputError), @@ -153,6 +159,7 @@ pub enum Error { PmemDeviceImageTooBig, PmemDeviceNew(base::Error), ReadMemAvailable(io::Error), + ReadStatm(io::Error), RegisterBalloon(arch::DeviceRegistrationError), RegisterBlock(arch::DeviceRegistrationError), RegisterGpu(arch::DeviceRegistrationError), @@ -201,6 +208,7 @@ impl Display for Error { AllocatePmemDeviceAddress(e) => { write!(f, "failed to allocate memory for pmem device: {}", e) } + BalloonActualTooLarge => write!(f, "balloon actual size is too large"), BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e), BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e), BlockSignal(e) => write!(f, "failed to block signal: {}", e), @@ -233,6 +241,10 @@ impl Display for Error { FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e), GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e), GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e), + GuestCachedMissing() => write!(f, "guest cached is missing from balloon stats"), + GuestCachedTooLarge(e) => write!(f, "guest cached is too large: {}", e), + GuestFreeMissing() => write!(f, "guest free is missing from balloon stats"), + GuestFreeTooLarge(e) => write!(f, "guest free is too large: {}", e), #[cfg(all(target_arch = "x86_64", feature = "gdb"))] HandleDebugCommand(e) => write!(f, "failed to handle a gdb command: {}", e), InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e), @@ -261,7 +273,12 @@ impl Display for Error { write!(f, "failed to create pmem device: pmem device image too big") } PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e), - ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e), + ReadMemAvailable(e) => write!( + f, + "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}", + e + ), + ReadStatm(e) => write!(f, "failed to read /proc/self/statm: {}", e), RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e), RegisterBlock(e) => write!(f, "error registering block device: {}", e), RegisterGpu(e) => write!(f, "error registering gpu device: {}", e), @@ -2075,10 +2092,10 @@ fn file_fields_to_i64>(path: P) -> io::Result> { // Reads the contents of a file and converts them into a u64, and if there // are multiple fields it only returns the first one. -fn file_to_i64>(path: P) -> io::Result { +fn file_to_i64>(path: P, nth: usize) -> io::Result { file_fields_to_i64(path)? .into_iter() - .next() + .nth(nth) .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file")) } @@ -2334,6 +2351,7 @@ where sigchld_fd, cfg.sandbox, Arc::clone(&map_request), + cfg.balloon_bias, ) } @@ -2355,6 +2373,170 @@ fn kick_all_vcpus( irq_chip.kick_halted_vcpus(); } +// BalloonPolicy determines the size to set the balloon. +struct BalloonPolicy { + // Estimate for when the guest starts aggressivly freeing memory. + critical_guest_available: i64, + critical_host_available: i64, // ChromeOS critical margin. + guest_available_bias: i64, + max_balloon_actual: i64, // The largest the balloon has ever been observed. + prev_balloon_full_percent: i64, // How full was the balloon at the previous timestep. + prev_guest_available: i64, // Available memory in the guest at the previous timestep. +} + +const ONE_KB: i64 = 1024; +const ONE_MB: i64 = 1024 * ONE_KB; + +const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available"; +const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin"; + +// BalloonPolicy implements the virtio balloon sizing logic. +// The balloon is sized with the following heuristics: +// Balance Available +// The balloon is sized to balance the amount of available memory above a +// critical margin. The critical margin is the level at which memory is +// freed. In the host, this is the ChromeOS available critical margin, which +// is the trigger to kill tabs. In the guest, we estimate this level by +// tracking the minimum amount of available memory, discounting sharp +// 'valleys'. If the guest manages to keep available memory above a given +// level even with some pressure, then we determine that this is the +// 'critical' level for the guest. We don't update this critical value if +// the balloon is fully inflated because in that case, the guest may be out +// of memory to free. +// guest_available_bias +// Even if available memory is perfectly balanced between host and guest, +// The size of the balloon will still drift randomly depending on whether +// those host or guest reclaims memory first/faster every time memory is +// low. To encourage large balloons to shrink and small balloons to grow, +// the following bias is added to the guest critical margin: +// (guest_available_bias * balloon_full_percent) / 100 +// This give the guest more memory when the balloon is full. +impl BalloonPolicy { + fn new( + memory_size: i64, + critical_host_available: i64, + guest_available_bias: i64, + ) -> BalloonPolicy { + // Estimate some reasonable initial maximum for balloon size. + let max_balloon_actual = (memory_size * 3) / 4; + // 400MB is above the zone min margin even for Crostini VMs on 16GB + // devices (~85MB), and is above when Android Low Memory Killer kills + // apps (~250MB). + let critical_guest_available = 400 * ONE_MB; + + BalloonPolicy { + critical_guest_available, + critical_host_available, + guest_available_bias, + max_balloon_actual, + prev_balloon_full_percent: 0, + prev_guest_available: 0, + } + } + fn delta(&mut self, stats: BalloonStats, balloon_actual_u: u64) -> Result { + let guest_free = stats + .free_memory + .map(i64::try_from) + .ok_or(Error::GuestFreeMissing())? + .map_err(Error::GuestFreeTooLarge)?; + let guest_cached = stats + .disk_caches + .map(i64::try_from) + .ok_or(Error::GuestFreeMissing())? + .map_err(Error::GuestFreeTooLarge)?; + let balloon_actual = match balloon_actual_u { + size if size < i64::max_value() as u64 => size as i64, + _ => return Err(Error::BalloonActualTooLarge), + }; + let guest_available = guest_free + guest_cached; + // Available memory is reported in MB, and we need bytes. + let host_available = + file_to_i64(LOWMEM_AVAILABLE, 0).map_err(Error::ReadMemAvailable)? * ONE_MB; + if self.max_balloon_actual < balloon_actual { + self.max_balloon_actual = balloon_actual; + info!( + "balloon updated max_balloon_actual to {} MiB", + self.max_balloon_actual / ONE_MB, + ); + } + let balloon_full_percent = balloon_actual * 100 / self.max_balloon_actual; + // Update critical_guest_available if we see a lower available with the + // balloon not fully inflated. If the balloon is completely inflated + // there is a risk that the low available level we see comes at the cost + // of stability. The Linux OOM Killer might have been forced to kill + // something important, or page reclaim was so aggressive that there are + // long UI hangs. + if guest_available < self.critical_guest_available && balloon_full_percent < 95 { + // To ignore temporary low memory states, we require that two guest + // available measurements in a row are low. + if self.prev_guest_available < self.critical_guest_available + && self.prev_balloon_full_percent < 95 + { + self.critical_guest_available = self.prev_guest_available; + info!( + "balloon updated critical_guest_available to {} MiB", + self.critical_guest_available / ONE_MB, + ); + } + } + + // Compute the difference in available memory above the host and guest + // critical thresholds. + let bias = (self.guest_available_bias * balloon_full_percent) / 100; + let guest_above_critical = guest_available - self.critical_guest_available - bias; + let host_above_critical = host_available - self.critical_host_available; + let balloon_delta = guest_above_critical - host_above_critical; + // Only let the balloon take up MAX_CRITICAL_DELTA of available memory + // below the critical level in host or guest. + const MAX_CRITICAL_DELTA: i64 = 10 * ONE_MB; + let balloon_delta_capped = if balloon_delta < 0 { + // The balloon is deflating, taking memory from the host. Don't let + // it take more than the amount of available memory above the + // critical margin, plus MAX_CRITICAL_DELTA. + max( + balloon_delta, + -(host_available - self.critical_host_available + MAX_CRITICAL_DELTA), + ) + } else { + // The balloon is inflating, taking memory from the guest. Don't let + // it take more than the amount of available memory above the + // critical margin, plus MAX_CRITICAL_DELTA. + min( + balloon_delta, + guest_available - self.critical_guest_available + MAX_CRITICAL_DELTA, + ) + }; + + self.prev_balloon_full_percent = balloon_full_percent; + self.prev_guest_available = guest_available; + + // Only return a value if target would change available above critical + // by more than 1%, or we are within 1 MB of critical in host or guest. + if guest_above_critical < ONE_MB + || host_above_critical < ONE_MB + || (balloon_delta.abs() * 100) / guest_above_critical > 1 + || (balloon_delta.abs() * 100) / host_above_critical > 1 + { + // Finally, make sure the balloon delta won't cause a negative size. + let result = max(balloon_delta_capped, -balloon_actual); + if result != 0 { + info!( + "balloon delta={:<6} ha={:<6} hc={:<6} ga={:<6} gc={:<6} bias={:<6} full={:>3}%", + result / ONE_MB, + host_available / ONE_MB, + self.critical_host_available / ONE_MB, + guest_available / ONE_MB, + self.critical_guest_available / ONE_MB, + bias / ONE_MB, + balloon_full_percent, + ); + } + return Ok(result); + } + return Ok(0); + } +} + fn run_control( mut linux: RunnableLinuxVm, control_server_socket: Option, @@ -2365,9 +2547,8 @@ fn run_control>>, + balloon_bias: i64, ) -> Result<()> { - const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available"; - #[derive(PollToken)] enum Token { Exit, @@ -2415,7 +2596,7 @@ fn run_control { - // Available memory is reported in MB, and we need bytes. - let host_available = file_to_i64(LOWMEM_AVAILABLE) - .map_err(Error::ReadMemAvailable)? - << 20; - let guest_free_u = if let Some(free) = stats.free_memory { - free - } else { - warn!("guest free_memory stat is missing"); - continue; - }; - let guest_cached_u = if let Some(cached) = stats.disk_caches { - cached - } else { - warn!("guest disk_caches stat is missing"); - continue; - }; - if guest_free_u > i64::max_value() as u64 { - warn!("guest free memory is too large"); - continue; - } - if guest_cached_u > i64::max_value() as u64 { - warn!("guest cached memory is too large"); - continue; - } - if balloon_actual_u > i64::max_value() as u64 { - warn!("actual balloon size is too large"); - continue; - } - // Tell the guest to change the balloon size if the target balloon size - // is more than 5% different from the current balloon size. - const RESIZE_PERCENT: i64 = 5; - let balloon_actual = balloon_actual_u as i64; - let guest_free = guest_free_u as i64; - let guest_cached = guest_cached_u as i64; - // Compute how much memory the guest should have available after we - // rebalance. - let guest_available_target = host_available; - let guest_available_delta = - guest_available_target - guest_free - guest_cached; - // How much do we have to change the balloon to balance. - let balloon_target = max(balloon_actual - guest_available_delta, 0); - // Compute the change in balloon size in percent. If the balloon size - // is 0, use 1 so we don't overflow from the infinity % increase. - let balloon_change_percent = (balloon_actual - balloon_target).abs() - * 100 - / max(balloon_actual, 1); - - if balloon_change_percent >= RESIZE_PERCENT { - info!("resizing balloon: host avail {}, guest free {} cached {} (target {}), balloon actual {} (target {})", - host_available, - guest_free, - guest_cached, - guest_available_target, - balloon_actual, - balloon_target, - ); - let command = BalloonControlCommand::Adjust { - num_bytes: balloon_target as u64, - }; - if let Err(e) = balloon_host_socket.send(&command) { - warn!("failed to send memory value to balloon device: {}", e); + match balloon_policy + .as_mut() + .map(|p| p.delta(stats, balloon_actual_u)) + { + None => { + error!( + "got result from balloon stats, but no policy is running" + ); } + Some(Err(e)) => { + warn!("failed to run balloon policy {}", e); + } + Some(Ok(delta)) if delta != 0 => { + let target = max((balloon_actual_u as i64) + delta, 0) as u64; + let command = + BalloonControlCommand::Adjust { num_bytes: target }; + if let Err(e) = balloon_host_socket.send(&command) { + warn!( + "failed to send memory value to balloon device: {}", + e + ); + } + } + Some(Ok(_)) => {} } } Err(e) => { diff --git a/src/main.rs b/src/main.rs index a85ab3d2eb..5d02af02f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1475,6 +1475,18 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: })?; cfg.gdb = Some(port); } + "balloon_bias_mib" => { + cfg.balloon_bias = + value + .unwrap() + .parse::() + .map_err(|_| argument::Error::InvalidValue { + value: value.unwrap().to_owned(), + expected: String::from("expected a valid ballon bias in MiB"), + })? + * 1024 + * 1024; // cfg.balloon_bias is in bytes. + } "help" => return Err(argument::Error::PrintHelp), _ => unreachable!(), } @@ -1673,6 +1685,7 @@ writeback=BOOL - Indicates whether the VM can use writeback caching (default: fa type=goldfish - type of battery emulation, defaults to goldfish "), Argument::value("gdb", "PORT", "(EXPERIMENTAL) gdb on the given port"), + Argument::value("balloon_bias_mib", "N", "Amount to bias balance of memory between host and guest as the balloon inflates, in MiB."), Argument::short_flag('h', "help", "Print help message.")]; let mut cfg = Config::default();