diff --git a/swap/src/lib.rs b/swap/src/lib.rs index 4ead12f436..b592129002 100644 --- a/swap/src/lib.rs +++ b/swap/src/lib.rs @@ -59,9 +59,13 @@ pub enum Status { /// single thread. InProgress, /// swap out succeeded. - Done { + Active { /// time taken for swap-out. - time_took_ms: u128, + swap_time_ms: u128, + /// count of pages on RAM. + resident_pages: usize, + /// count of pages in swap files. + swap_pages: usize, }, /// swap out failed. Failed, @@ -382,12 +386,13 @@ fn disable_monitoring( page_handler: PageHandler, uffd_list: &UffdList, guest_memory: &GuestMemory, -) -> anyhow::Result<()> { - page_handler +) -> anyhow::Result { + let num_pages = page_handler .swap_in(uffd_list.main_uffd()) .context("unregister all regions")?; let regions = regions_from_guest_memory(guest_memory); - unregister_regions(®ions, uffd_list.get_list()).context("unregister regions") + unregister_regions(®ions, uffd_list.get_list()).context("unregister regions")?; + Ok(num_pages) } /// the main thread of the monitor process. @@ -409,7 +414,7 @@ fn monitor_process( .context("create wait context")?; let mut uffd_list = UffdList::new(uffd, &wait_ctx); - let mut status = Status::Ready; + let mut lastest_swap_out_time_ms = None; let mut page_handler_opt: Option = None; let mut page_fault_logger: Option = None; @@ -491,40 +496,68 @@ fn monitor_process( info!("start swapping out"); let t0 = std::time::Instant::now(); + let mut num_pages = 0; let result = guest_memory.with_regions::<_, anyhow::Error>( |_, _, _, host_addr, shm, shm_offset| { // safe because all the regions are registered to all userfaultfd // and page fault events are handled by PageHandler. - unsafe { page_handler.swap_out(host_addr, shm, shm_offset) } - .context("swap out") + num_pages += + unsafe { page_handler.swap_out(host_addr, shm, shm_offset) } + .context("swap out")?; + Ok(()) }, ); match result { Ok(()) => { - let time_took_ms = t0.elapsed().as_millis(); - info!("swapping out finish in {} ms", time_took_ms); - status = Status::Done { time_took_ms }; + let swap_time_ms = t0.elapsed().as_millis(); + info!("swap out {} pages in {} ms", num_pages, swap_time_ms); + if page_handler.compute_resident_pages() > 0 { + error!( + "active page is not zero just after swap out but {} pages", + page_handler.compute_resident_pages() + ); + } + lastest_swap_out_time_ms = Some(swap_time_ms); } Err(e) => { error!("failed to swapping out the state: {}", e); - status = Status::Failed; + lastest_swap_out_time_ms = None; } } } Command::Disable => { - status = Status::Ready; if let Some(page_handler) = page_handler_opt.take() { - disable_monitoring(page_handler, &uffd_list, &guest_memory)?; - info!("swap in all pages. swap disabled."); + let t0 = std::time::Instant::now(); + let num_pages = + disable_monitoring(page_handler, &uffd_list, &guest_memory)?; + let time_took_ms = t0.elapsed().as_millis(); + info!( + "swap in all {} pages in {} ms. swap disabled.", + num_pages, time_took_ms + ); } else { - warn!("swap is disabled."); + warn!("swap is already disabled."); } } Command::Exit => { break 'wait; } Command::Status => { + let status = if let Some(ref page_handler) = page_handler_opt { + if let Some(swap_time_ms) = lastest_swap_out_time_ms { + Status::Active { + swap_time_ms, + resident_pages: page_handler.compute_resident_pages(), + swap_pages: page_handler.compute_swap_pages(), + } + } else { + Status::Failed + } + } else { + Status::Ready + }; tube.send(&status).context("send status response")?; + info!("swap status: {:?}.", status); } Command::StartPageFaultLogging => { if page_fault_logger.is_none() { diff --git a/swap/src/page_handler.rs b/swap/src/page_handler.rs index 4d29396300..77338b2e7d 100644 --- a/swap/src/page_handler.rs +++ b/swap/src/page_handler.rs @@ -58,6 +58,8 @@ struct Region { /// the head page index of the region. head_page_idx: usize, file: SwapFile, + resident_pages: usize, + swap_active: bool, } /// PageHandler manages the page states of multiple regions. @@ -171,6 +173,8 @@ impl PageHandler { self.regions.push(Region { head_page_idx, file, + resident_pages: 0, + swap_active: false, }); Ok(()) } @@ -211,9 +215,12 @@ impl PageHandler { let page_idx = self.addr_to_page_idx(address); // the head address of the page. let page_addr = self.page_base_addr(address); + let page_size = 1 << self.pagesize_shift; let Region { head_page_idx, file, + resident_pages, + .. } = self .find_region(page_idx) .ok_or(Error::InvalidAddress(address))?; @@ -223,14 +230,18 @@ impl PageHandler { Some(page_slice) => { Self::copy_all(uffd, page_addr, page_slice, true)?; file.clear(idx_in_region)?; + *resident_pages += 1; Ok(()) } None => { // Map a zero page since no swap file has been created yet but the fault happened. // safe because the fault page is notified by uffd. - let result = uffd.zero(page_addr, 1 << self.pagesize_shift, true); + let result = uffd.zero(page_addr, page_size, true); match result { - Ok(_) => Ok(()), + Ok(_) => { + *resident_pages += 1; + Ok(()) + } Err(UffdError::ZeropageFailed(errno)) if errno as i32 == libc::EEXIST => { // zeroing fails with EEXIST if the page is already filled. This case can // happen if page faults on the same page happen on different processes. @@ -282,6 +293,8 @@ impl PageHandler { /// /// The memory must be protected not to be updated during swapped out. /// + /// Returns the count of swapped out pages. + /// /// # Arguments /// /// * `base_addr` - the head address of the memory region to swap out. @@ -301,7 +314,7 @@ impl PageHandler { base_addr: usize, memfd: &T, base_offset: u64, - ) -> Result<()> + ) -> Result where T: AsRawDescriptor, { @@ -316,6 +329,7 @@ impl PageHandler { let region_size = self.regions[region_position].file.num_pages() << self.pagesize_shift; let file_data = FileDataIterator::new(memfd, base_offset, region_size as u64); + let mut swapped_size = 0; for data_range in file_data { // assert offset is page aligned let offset = (data_range.start - base_offset) as usize; @@ -328,7 +342,8 @@ impl PageHandler { let mem_slice = unsafe { std::slice::from_raw_parts(addr as *const u8, size) }; self.regions[region_position] .file - .write_to_file(page_idx - head_page_idx, mem_slice)? + .write_to_file(page_idx - head_page_idx, mem_slice)?; + swapped_size += size; // TODO(kawasin): periodically MADV_REMOVE the guest memory. if the pages are in zram, // it increases the RAM usage during swap_out. // TODO(kawasin): free the page cache of the swap file. or direct I/O. @@ -343,23 +358,56 @@ impl PageHandler { libc::MADV_REMOVE, ); } + let swapped_pages = swapped_size >> self.pagesize_shift; + let mut region = &mut self.regions[region_position]; + // Suppress error log on the first swap_out, since regident_pages is not initialized but + // zero. + if region.swap_active && swapped_pages != region.resident_pages { + error!( + "swapped pages ({}) does not match with resident pages ({}).", + swapped_pages, region.resident_pages + ); + } + region.resident_pages = 0; + region.swap_active = true; - Ok(()) + Ok(swapped_pages) } /// Swap in all the content. /// + /// Returns the count of swapped out pages. + /// /// # Arguments /// /// * `uffd` - the main [Userfaultfd]. - pub fn swap_in(self, uffd: &Userfaultfd) -> Result<()> { + pub fn swap_in(self, uffd: &Userfaultfd) -> Result { + let mut swapped_size = 0; for region in self.regions.iter() { for pages in region.file.all_present_pages() { let page_idx = region.head_page_idx + pages.base_idx; let page_addr = self.page_idx_to_addr(page_idx); + let size = pages.content.size(); Self::copy_all(uffd, page_addr, pages.content, false)?; + swapped_size += size; } } - Ok(()) + Ok(swapped_size >> self.pagesize_shift) + } + + /// Returns count of pages active on the memory. + pub fn compute_resident_pages(&self) -> usize { + self.regions.iter().map(|r| r.resident_pages).sum() + } + + /// Returns count of pages present in the swap files. + pub fn compute_swap_pages(&self) -> usize { + let mut swapped_size = 0; + for r in self.regions.iter() { + for pages in r.file.all_present_pages() { + swapped_size += pages.content.size(); + } + } + swapped_size >> self.pagesize_shift } }