diff --git a/src/crosvm.rs b/src/crosvm.rs index 1074a6e253..04e267d6cc 100644 --- a/src/crosvm.rs +++ b/src/crosvm.rs @@ -226,6 +226,7 @@ pub struct Config { pub battery_type: Option, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] pub gdb: Option, + pub balloon_bias: i64, } impl Default for Config { @@ -284,6 +285,7 @@ impl Default for Config { battery_type: None, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] gdb: None, + balloon_bias: 0, } } } diff --git a/src/linux.rs b/src/linux.rs index b7fecb5409..f452fef3f2 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -use std::cmp::{max, Reverse}; +use std::cmp::{max, min, Reverse}; use std::convert::TryFrom; #[cfg(feature = "gpu")] use std::env; @@ -61,12 +61,13 @@ use base::{ }; use vm_control::{ BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket, - BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, - DiskControlResult, FsMappingRequest, FsMappingRequestSocket, FsMappingResponseSocket, IrqSetup, - UsbControlSocket, VcpuControl, VmControlResponseSocket, VmIrqRequest, VmIrqRequestSocket, - VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket, - VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, VmMsyncRequest, - VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmResponse, VmRunMode, + BalloonControlResult, BalloonStats, DiskControlCommand, DiskControlRequestSocket, + DiskControlResponseSocket, DiskControlResult, FsMappingRequest, FsMappingRequestSocket, + FsMappingResponseSocket, IrqSetup, UsbControlSocket, VcpuControl, VmControlResponseSocket, + VmIrqRequest, VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket, + VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, + VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmResponse, + VmRunMode, }; #[cfg(all(target_arch = "x86_64", feature = "gdb"))] use vm_control::{VcpuDebug, VcpuDebugStatus, VcpuDebugStatusMessage, VmRequest}; @@ -101,6 +102,7 @@ pub enum Error { AddPmemDeviceMemory(base::Error), AllocateGpuDeviceAddress, AllocatePmemDeviceAddress(resources::Error), + BalloonActualTooLarge, BalloonDeviceNew(virtio::BalloonError), BlockDeviceNew(base::Error), BlockSignal(base::signal::Error), @@ -131,6 +133,10 @@ pub enum Error { FsDeviceNew(virtio::fs::Error), GetMaxOpenFiles(io::Error), GetSignalMask(signal::Error), + GuestCachedMissing(), + GuestCachedTooLarge(std::num::TryFromIntError), + GuestFreeMissing(), + GuestFreeTooLarge(std::num::TryFromIntError), #[cfg(all(target_arch = "x86_64", feature = "gdb"))] HandleDebugCommand(::Error), InputDeviceNew(virtio::InputError), @@ -153,6 +159,7 @@ pub enum Error { PmemDeviceImageTooBig, PmemDeviceNew(base::Error), ReadMemAvailable(io::Error), + ReadStatm(io::Error), RegisterBalloon(arch::DeviceRegistrationError), RegisterBlock(arch::DeviceRegistrationError), RegisterGpu(arch::DeviceRegistrationError), @@ -201,6 +208,7 @@ impl Display for Error { AllocatePmemDeviceAddress(e) => { write!(f, "failed to allocate memory for pmem device: {}", e) } + BalloonActualTooLarge => write!(f, "balloon actual size is too large"), BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e), BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e), BlockSignal(e) => write!(f, "failed to block signal: {}", e), @@ -233,6 +241,10 @@ impl Display for Error { FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e), GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e), GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e), + GuestCachedMissing() => write!(f, "guest cached is missing from balloon stats"), + GuestCachedTooLarge(e) => write!(f, "guest cached is too large: {}", e), + GuestFreeMissing() => write!(f, "guest free is missing from balloon stats"), + GuestFreeTooLarge(e) => write!(f, "guest free is too large: {}", e), #[cfg(all(target_arch = "x86_64", feature = "gdb"))] HandleDebugCommand(e) => write!(f, "failed to handle a gdb command: {}", e), InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e), @@ -261,7 +273,12 @@ impl Display for Error { write!(f, "failed to create pmem device: pmem device image too big") } PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e), - ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e), + ReadMemAvailable(e) => write!( + f, + "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}", + e + ), + ReadStatm(e) => write!(f, "failed to read /proc/self/statm: {}", e), RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e), RegisterBlock(e) => write!(f, "error registering block device: {}", e), RegisterGpu(e) => write!(f, "error registering gpu device: {}", e), @@ -2075,10 +2092,10 @@ fn file_fields_to_i64>(path: P) -> io::Result> { // Reads the contents of a file and converts them into a u64, and if there // are multiple fields it only returns the first one. -fn file_to_i64>(path: P) -> io::Result { +fn file_to_i64>(path: P, nth: usize) -> io::Result { file_fields_to_i64(path)? .into_iter() - .next() + .nth(nth) .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file")) } @@ -2334,6 +2351,7 @@ where sigchld_fd, cfg.sandbox, Arc::clone(&map_request), + cfg.balloon_bias, ) } @@ -2355,6 +2373,170 @@ fn kick_all_vcpus( irq_chip.kick_halted_vcpus(); } +// BalloonPolicy determines the size to set the balloon. +struct BalloonPolicy { + // Estimate for when the guest starts aggressivly freeing memory. + critical_guest_available: i64, + critical_host_available: i64, // ChromeOS critical margin. + guest_available_bias: i64, + max_balloon_actual: i64, // The largest the balloon has ever been observed. + prev_balloon_full_percent: i64, // How full was the balloon at the previous timestep. + prev_guest_available: i64, // Available memory in the guest at the previous timestep. +} + +const ONE_KB: i64 = 1024; +const ONE_MB: i64 = 1024 * ONE_KB; + +const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available"; +const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin"; + +// BalloonPolicy implements the virtio balloon sizing logic. +// The balloon is sized with the following heuristics: +// Balance Available +// The balloon is sized to balance the amount of available memory above a +// critical margin. The critical margin is the level at which memory is +// freed. In the host, this is the ChromeOS available critical margin, which +// is the trigger to kill tabs. In the guest, we estimate this level by +// tracking the minimum amount of available memory, discounting sharp +// 'valleys'. If the guest manages to keep available memory above a given +// level even with some pressure, then we determine that this is the +// 'critical' level for the guest. We don't update this critical value if +// the balloon is fully inflated because in that case, the guest may be out +// of memory to free. +// guest_available_bias +// Even if available memory is perfectly balanced between host and guest, +// The size of the balloon will still drift randomly depending on whether +// those host or guest reclaims memory first/faster every time memory is +// low. To encourage large balloons to shrink and small balloons to grow, +// the following bias is added to the guest critical margin: +// (guest_available_bias * balloon_full_percent) / 100 +// This give the guest more memory when the balloon is full. +impl BalloonPolicy { + fn new( + memory_size: i64, + critical_host_available: i64, + guest_available_bias: i64, + ) -> BalloonPolicy { + // Estimate some reasonable initial maximum for balloon size. + let max_balloon_actual = (memory_size * 3) / 4; + // 400MB is above the zone min margin even for Crostini VMs on 16GB + // devices (~85MB), and is above when Android Low Memory Killer kills + // apps (~250MB). + let critical_guest_available = 400 * ONE_MB; + + BalloonPolicy { + critical_guest_available, + critical_host_available, + guest_available_bias, + max_balloon_actual, + prev_balloon_full_percent: 0, + prev_guest_available: 0, + } + } + fn delta(&mut self, stats: BalloonStats, balloon_actual_u: u64) -> Result { + let guest_free = stats + .free_memory + .map(i64::try_from) + .ok_or(Error::GuestFreeMissing())? + .map_err(Error::GuestFreeTooLarge)?; + let guest_cached = stats + .disk_caches + .map(i64::try_from) + .ok_or(Error::GuestFreeMissing())? + .map_err(Error::GuestFreeTooLarge)?; + let balloon_actual = match balloon_actual_u { + size if size < i64::max_value() as u64 => size as i64, + _ => return Err(Error::BalloonActualTooLarge), + }; + let guest_available = guest_free + guest_cached; + // Available memory is reported in MB, and we need bytes. + let host_available = + file_to_i64(LOWMEM_AVAILABLE, 0).map_err(Error::ReadMemAvailable)? * ONE_MB; + if self.max_balloon_actual < balloon_actual { + self.max_balloon_actual = balloon_actual; + info!( + "balloon updated max_balloon_actual to {} MiB", + self.max_balloon_actual / ONE_MB, + ); + } + let balloon_full_percent = balloon_actual * 100 / self.max_balloon_actual; + // Update critical_guest_available if we see a lower available with the + // balloon not fully inflated. If the balloon is completely inflated + // there is a risk that the low available level we see comes at the cost + // of stability. The Linux OOM Killer might have been forced to kill + // something important, or page reclaim was so aggressive that there are + // long UI hangs. + if guest_available < self.critical_guest_available && balloon_full_percent < 95 { + // To ignore temporary low memory states, we require that two guest + // available measurements in a row are low. + if self.prev_guest_available < self.critical_guest_available + && self.prev_balloon_full_percent < 95 + { + self.critical_guest_available = self.prev_guest_available; + info!( + "balloon updated critical_guest_available to {} MiB", + self.critical_guest_available / ONE_MB, + ); + } + } + + // Compute the difference in available memory above the host and guest + // critical thresholds. + let bias = (self.guest_available_bias * balloon_full_percent) / 100; + let guest_above_critical = guest_available - self.critical_guest_available - bias; + let host_above_critical = host_available - self.critical_host_available; + let balloon_delta = guest_above_critical - host_above_critical; + // Only let the balloon take up MAX_CRITICAL_DELTA of available memory + // below the critical level in host or guest. + const MAX_CRITICAL_DELTA: i64 = 10 * ONE_MB; + let balloon_delta_capped = if balloon_delta < 0 { + // The balloon is deflating, taking memory from the host. Don't let + // it take more than the amount of available memory above the + // critical margin, plus MAX_CRITICAL_DELTA. + max( + balloon_delta, + -(host_available - self.critical_host_available + MAX_CRITICAL_DELTA), + ) + } else { + // The balloon is inflating, taking memory from the guest. Don't let + // it take more than the amount of available memory above the + // critical margin, plus MAX_CRITICAL_DELTA. + min( + balloon_delta, + guest_available - self.critical_guest_available + MAX_CRITICAL_DELTA, + ) + }; + + self.prev_balloon_full_percent = balloon_full_percent; + self.prev_guest_available = guest_available; + + // Only return a value if target would change available above critical + // by more than 1%, or we are within 1 MB of critical in host or guest. + if guest_above_critical < ONE_MB + || host_above_critical < ONE_MB + || (balloon_delta.abs() * 100) / guest_above_critical > 1 + || (balloon_delta.abs() * 100) / host_above_critical > 1 + { + // Finally, make sure the balloon delta won't cause a negative size. + let result = max(balloon_delta_capped, -balloon_actual); + if result != 0 { + info!( + "balloon delta={:<6} ha={:<6} hc={:<6} ga={:<6} gc={:<6} bias={:<6} full={:>3}%", + result / ONE_MB, + host_available / ONE_MB, + self.critical_host_available / ONE_MB, + guest_available / ONE_MB, + self.critical_guest_available / ONE_MB, + bias / ONE_MB, + balloon_full_percent, + ); + } + return Ok(result); + } + return Ok(0); + } +} + fn run_control( mut linux: RunnableLinuxVm, control_server_socket: Option, @@ -2365,9 +2547,8 @@ fn run_control>>, + balloon_bias: i64, ) -> Result<()> { - const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available"; - #[derive(PollToken)] enum Token { Exit, @@ -2415,7 +2596,7 @@ fn run_control { - // Available memory is reported in MB, and we need bytes. - let host_available = file_to_i64(LOWMEM_AVAILABLE) - .map_err(Error::ReadMemAvailable)? - << 20; - let guest_free_u = if let Some(free) = stats.free_memory { - free - } else { - warn!("guest free_memory stat is missing"); - continue; - }; - let guest_cached_u = if let Some(cached) = stats.disk_caches { - cached - } else { - warn!("guest disk_caches stat is missing"); - continue; - }; - if guest_free_u > i64::max_value() as u64 { - warn!("guest free memory is too large"); - continue; - } - if guest_cached_u > i64::max_value() as u64 { - warn!("guest cached memory is too large"); - continue; - } - if balloon_actual_u > i64::max_value() as u64 { - warn!("actual balloon size is too large"); - continue; - } - // Tell the guest to change the balloon size if the target balloon size - // is more than 5% different from the current balloon size. - const RESIZE_PERCENT: i64 = 5; - let balloon_actual = balloon_actual_u as i64; - let guest_free = guest_free_u as i64; - let guest_cached = guest_cached_u as i64; - // Compute how much memory the guest should have available after we - // rebalance. - let guest_available_target = host_available; - let guest_available_delta = - guest_available_target - guest_free - guest_cached; - // How much do we have to change the balloon to balance. - let balloon_target = max(balloon_actual - guest_available_delta, 0); - // Compute the change in balloon size in percent. If the balloon size - // is 0, use 1 so we don't overflow from the infinity % increase. - let balloon_change_percent = (balloon_actual - balloon_target).abs() - * 100 - / max(balloon_actual, 1); - - if balloon_change_percent >= RESIZE_PERCENT { - info!("resizing balloon: host avail {}, guest free {} cached {} (target {}), balloon actual {} (target {})", - host_available, - guest_free, - guest_cached, - guest_available_target, - balloon_actual, - balloon_target, - ); - let command = BalloonControlCommand::Adjust { - num_bytes: balloon_target as u64, - }; - if let Err(e) = balloon_host_socket.send(&command) { - warn!("failed to send memory value to balloon device: {}", e); + match balloon_policy + .as_mut() + .map(|p| p.delta(stats, balloon_actual_u)) + { + None => { + error!( + "got result from balloon stats, but no policy is running" + ); } + Some(Err(e)) => { + warn!("failed to run balloon policy {}", e); + } + Some(Ok(delta)) if delta != 0 => { + let target = max((balloon_actual_u as i64) + delta, 0) as u64; + let command = + BalloonControlCommand::Adjust { num_bytes: target }; + if let Err(e) = balloon_host_socket.send(&command) { + warn!( + "failed to send memory value to balloon device: {}", + e + ); + } + } + Some(Ok(_)) => {} } } Err(e) => { diff --git a/src/main.rs b/src/main.rs index a85ab3d2eb..5d02af02f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1475,6 +1475,18 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: })?; cfg.gdb = Some(port); } + "balloon_bias_mib" => { + cfg.balloon_bias = + value + .unwrap() + .parse::() + .map_err(|_| argument::Error::InvalidValue { + value: value.unwrap().to_owned(), + expected: String::from("expected a valid ballon bias in MiB"), + })? + * 1024 + * 1024; // cfg.balloon_bias is in bytes. + } "help" => return Err(argument::Error::PrintHelp), _ => unreachable!(), } @@ -1673,6 +1685,7 @@ writeback=BOOL - Indicates whether the VM can use writeback caching (default: fa type=goldfish - type of battery emulation, defaults to goldfish "), Argument::value("gdb", "PORT", "(EXPERIMENTAL) gdb on the given port"), + Argument::value("balloon_bias_mib", "N", "Amount to bias balance of memory between host and guest as the balloon inflates, in MiB."), Argument::short_flag('h', "help", "Print help message.")]; let mut cfg = Config::default();