From 2b6855e7252000639ea410d16b319812ba1d97d0 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <vineethrp@google.com>
Date: Wed, 12 Jan 2022 16:57:22 +0000
Subject: [PATCH] crosvm: Add support to pass vcpu-cgroup-path to crosvm

The latency reduces to 50-100ms max from over 1 second, when we overload
the system.

BUG=b:208138181
TEST=run stress-ng inside and outside VM.

Change-Id: I10a22807b47a1ed2dd181c2ddcc995eebef7f652
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3383936
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Vineeth Pillai <vineethrp@google.com>
---
 src/crosvm.rs |  2 ++
 src/linux.rs  | 28 +++++++++++++++++++++++++++-
 src/main.rs   | 12 ++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/src/crosvm.rs b/src/crosvm.rs
index 6409329a6c..76a2edd3e6 100644
--- a/src/crosvm.rs
+++ b/src/crosvm.rs
@@ -332,6 +332,7 @@ pub struct Config {
     pub vhost_vsock_device: Option<VhostVsockDeviceParameter>,
     pub vhost_net_device_path: PathBuf,
     pub vcpu_count: Option<usize>,
+    pub vcpu_cgroup_path: Option<PathBuf>,
     pub rt_cpus: Vec<usize>,
     pub vcpu_affinity: Option<VcpuAffinity>,
     pub cpu_clusters: Vec<Vec<usize>>,
@@ -436,6 +437,7 @@ impl Default for Config {
             vhost_vsock_device: None,
             vhost_net_device_path: PathBuf::from(VHOST_NET_PATH),
             vcpu_count: None,
+            vcpu_cgroup_path: None,
             rt_cpus: Vec::new(),
             vcpu_affinity: None,
             cpu_clusters: Vec::new(),
diff --git a/src/linux.rs b/src/linux.rs
index 1fadbfcc58..2906ec7853 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -8,6 +8,7 @@ use std::convert::{TryFrom, TryInto};
 #[cfg(feature = "gpu")]
 use std::env;
 use std::fs::{File, OpenOptions};
+use std::io::prelude::*;
 use std::io::stdin;
 use std::iter;
 use std::mem;
@@ -19,6 +20,7 @@ use std::str;
 use std::sync::{mpsc, Arc, Barrier};
 use std::time::Duration;
 
+use std::process;
 use std::thread;
 use std::thread::JoinHandle;
 
@@ -404,7 +406,6 @@ fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceRe
 fn create_tpm_device(cfg: &Config) -> DeviceResult {
     use std::ffi::CString;
     use std::fs;
-    use std::process;
 
     let tpm_storage: PathBuf;
     let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
@@ -2300,6 +2301,7 @@ fn runnable_vcpu<V>(
     use_hypervisor_signals: bool,
     enable_per_vm_core_scheduling: bool,
     host_cpu_topology: bool,
+    vcpu_cgroup_tasks_file: Option<File>,
 ) -> Result<(V, VcpuRunHandle)>
 where
     V: VcpuArch,
@@ -2350,6 +2352,12 @@ where
         }
     }
 
+    // Move vcpu thread to cgroup
+    if let Some(mut f) = vcpu_cgroup_tasks_file {
+        f.write_all(base::gettid().to_string().as_bytes())
+            .context("failed to write vcpu tid to cgroup tasks")?;
+    }
+
     if run_rt {
         const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
         if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
@@ -2476,6 +2484,7 @@ fn run_vcpu<V>(
     >,
     enable_per_vm_core_scheduling: bool,
     host_cpu_topology: bool,
+    vcpu_cgroup_tasks_file: Option<File>,
 ) -> Result<JoinHandle<()>>
 where
     V: VcpuArch + 'static,
@@ -2503,6 +2512,7 @@ where
                 use_hypervisor_signals,
                 enable_per_vm_core_scheduling,
                 host_cpu_topology,
+                vcpu_cgroup_tasks_file,
             );
 
             start_barrier.wait();
@@ -3469,6 +3479,15 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
             error!("Failed to enable core scheduling: {}", e);
         }
     }
+    let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
+        None => None,
+        Some(cgroup_path) => {
+            // Move main process to cgroup_path
+            let mut f = File::create(&cgroup_path.join("tasks"))?;
+            f.write_all(process::id().to_string().as_bytes())?;
+            Some(f)
+        }
+    };
     for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
         let (to_vcpu_channel, from_main_channel) = mpsc::channel();
         let vcpu_affinity = match linux.vcpu_affinity.clone() {
@@ -3502,6 +3521,13 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
             to_gdb_channel.clone(),
             cfg.per_vm_core_scheduling,
             cfg.host_cpu_topology,
+            match vcpu_cgroup_tasks_file {
+                None => None,
+                Some(ref f) => Some(
+                    f.try_clone()
+                        .context("failed to clone vcpu cgroup tasks file")?,
+                ),
+            },
         )?;
         vcpu_handles.push((handle, to_vcpu_channel));
     }
diff --git a/src/main.rs b/src/main.rs
index 9241c789c3..d6c1f5ee14 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1210,6 +1210,17 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument::
         "per-vm-core-scheduling" => {
             cfg.per_vm_core_scheduling = true;
         }
+        "vcpu-cgroup-path" => {
+            let vcpu_cgroup_path = PathBuf::from(value.unwrap());
+            if !vcpu_cgroup_path.exists() {
+                return Err(argument::Error::InvalidValue {
+                    value: value.unwrap().to_owned(),
+                    expected: String::from("This vcpu_cgroup_path path does not exist"),
+                });
+            }
+
+            cfg.vcpu_cgroup_path = Some(vcpu_cgroup_path);
+        }
         #[cfg(feature = "audio_cras")]
         "cras-snd" => {
             cfg.cras_snds.push(
@@ -2442,6 +2453,7 @@ fn run_vm(args: std::env::Args) -> std::result::Result<CommandStatus, ()> {
           Argument::flag("per-vm-core-scheduling", "Enable per-VM core scheduling intead of the default one (per-vCPU core scheduing) by
               making all vCPU threads share same cookie for core scheduling.
               This option is no-op on devices that have neither MDS nor L1TF vulnerability."),
+          Argument::value("vcpu-cgroup-path", "PATH", "Move all vCPU threads to this CGroup (default: nothing moves)."),
 #[cfg(feature = "audio_cras")]
           Argument::value("cras-snd",
           "[capture=true,client=crosvm,socket=unified,num_output_streams=1,num_input_streams=1]",