From 5b158073d5ebe27418569f4354d72aedfacebf75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= Date: Thu, 26 Aug 2021 18:48:56 +0200 Subject: [PATCH 1/4] Rename tid variable to cpu_id --- src/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6f0344b2..57842e58 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,32 +51,32 @@ impl Uhyve { let this = Arc::new(self); - (0..this.num_cpus()).for_each(|tid| { + (0..this.num_cpus()).for_each(|cpu_id| { let vm = this.clone(); let exit_tx = exit_tx.clone(); let local_cpu_affinity = match &cpu_affinity { - Some(vec) => vec.get(tid as usize).cloned(), + Some(vec) => vec.get(cpu_id as usize).cloned(), None => None, }; // create thread for each CPU thread::spawn(move || { - debug!("Create thread for CPU {}", tid); + debug!("Create thread for CPU {}", cpu_id); match local_cpu_affinity { Some(core_id) => { - debug!("Trying to pin thread {} to CPU {}", tid, core_id.id); + debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id); core_affinity::set_for_current(core_id); // This does not return an error if it fails :( } None => debug!("No affinity specified, not binding thread"), } - let mut cpu = vm.create_cpu(tid).unwrap(); + let mut cpu = vm.create_cpu(cpu_id).unwrap(); cpu.init(vm.get_entry_point()).unwrap(); // only one core is able to enter startup code // => the wait for the predecessor core - while tid != vm.cpu_online() { + while cpu_id != vm.cpu_online() { hint::spin_loop(); } @@ -84,7 +84,7 @@ impl Uhyve { let result = cpu.run(); match result { Err(x) => { - error!("CPU {} crashes! {:?}", tid, x); + error!("CPU {} crashes! {:?}", cpu_id, x); } Ok(exit_code) => { exit_tx.send(exit_code).unwrap(); From 7dd707894d0aa5edc85a57aec2f0a7e6938f5ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= Date: Thu, 26 Aug 2021 18:50:36 +0200 Subject: [PATCH 2/4] Linux: Make vCPUs kickable This allows vCPUs to be kicked out of execution externally. Kicked vCPUs exit without code. --- src/lib.rs | 9 +- src/linux/vcpu.rs | 285 ++++++++++++++++++++++++---------------------- src/macos/vcpu.rs | 5 +- src/vm.rs | 6 +- 4 files changed, 157 insertions(+), 148 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 57842e58..918f3018 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,12 +83,9 @@ impl Uhyve { // jump into the VM and execute code of the guest let result = cpu.run(); match result { - Err(x) => { - error!("CPU {} crashes! {:?}", cpu_id, x); - } - Ok(exit_code) => { - exit_tx.send(exit_code).unwrap(); - } + Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(), + Ok(None) => {} + Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err), } }); }); diff --git a/src/linux/vcpu.rs b/src/linux/vcpu.rs index 0e15f182..0c58e66b 100755 --- a/src/linux/vcpu.rs +++ b/src/linux/vcpu.rs @@ -278,168 +278,174 @@ impl VirtualCPU for UhyveCPU { fn r#continue(&mut self) -> HypervisorResult { loop { - let exitreason = self.vcpu.run()?; - match exitreason { - VcpuExit::Hlt => { - // Ignore `VcpuExit::Hlt` - debug!("{:?}", VcpuExit::Hlt); - } - VcpuExit::Shutdown => { - return Ok(VcpuStopReason::Exit(0)); - } - VcpuExit::IoIn(port, addr) => match port { - PCI_CONFIG_DATA_PORT => { - if let Some(pci_addr) = self.pci_addr { - if pci_addr & 0x1ff800 == 0 { - let virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.handle_read(pci_addr & 0x3ff, addr); - } else { - unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff }; - } - } else { - unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff }; - } - } - PCI_CONFIG_ADDRESS_PORT => {} - VIRTIO_PCI_STATUS => { - let virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.read_status(addr); - } - VIRTIO_PCI_HOST_FEATURES => { - let virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.read_host_features(addr); - } - VIRTIO_PCI_GUEST_FEATURES => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.read_requested_features(addr); - } - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF..=VIRTIO_PCI_CONFIG_OFF_MSIX_OFF_MAX => { - let virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.read_mac_byte(addr, port - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF); + match self.vcpu.run() { + Ok(vcpu_stop_reason) => match vcpu_stop_reason { + VcpuExit::Hlt => { + // Ignore `VcpuExit::Hlt` + debug!("{:?}", VcpuExit::Hlt); } - VIRTIO_PCI_ISR => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.reset_interrupt() + VcpuExit::Shutdown => { + return Ok(VcpuStopReason::Exit(0)); } - VIRTIO_PCI_LINK_STATUS_MSIX_OFF => { - let virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.read_link_status(addr); - } - _ => { - info!("Unhanded IO Exit"); - } - }, - VcpuExit::IoOut(port, addr) => { - match port { - UHYVE_UART_PORT => { - self.uart(addr)?; - } - UHYVE_PORT_CMDSIZE => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.cmdsize(self.host_address(data_addr)); - } - UHYVE_PORT_CMDVAL => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.cmdval(self.host_address(data_addr)); - } - UHYVE_PORT_NETWRITE => { - match &self.tx { - Some(tx_channel) => tx_channel.send(1).unwrap(), - - None => {} - }; - } - UHYVE_PORT_EXIT => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - return Ok(VcpuStopReason::Exit( - self.exit(self.host_address(data_addr)), - )); - } - UHYVE_PORT_OPEN => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.open(self.host_address(data_addr)); - } - UHYVE_PORT_WRITE => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.write(self.host_address(data_addr))?; - } - UHYVE_PORT_READ => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.read(self.host_address(data_addr)); - } - UHYVE_PORT_UNLINK => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.unlink(self.host_address(data_addr)); - } - UHYVE_PORT_LSEEK => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.lseek(self.host_address(data_addr)); - } - UHYVE_PORT_CLOSE => { - let data_addr: usize = - unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - self.close(self.host_address(data_addr)); - } - //TODO: + VcpuExit::IoIn(port, addr) => match port { PCI_CONFIG_DATA_PORT => { if let Some(pci_addr) = self.pci_addr { if pci_addr & 0x1ff800 == 0 { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.handle_write(pci_addr & 0x3ff, addr); + let virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.handle_read(pci_addr & 0x3ff, addr); + } else { + unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff }; } + } else { + unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff }; } } - PCI_CONFIG_ADDRESS_PORT => { - self.pci_addr = Some(unsafe { *(addr.as_ptr() as *const u32) }); - } + PCI_CONFIG_ADDRESS_PORT => {} VIRTIO_PCI_STATUS => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.write_status(addr); + let virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.read_status(addr); + } + VIRTIO_PCI_HOST_FEATURES => { + let virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.read_host_features(addr); } VIRTIO_PCI_GUEST_FEATURES => { let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.write_requested_features(addr); + virtio_device.read_requested_features(addr); } - VIRTIO_PCI_QUEUE_NOTIFY => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.handle_notify_output(addr, self); + VIRTIO_PCI_CONFIG_OFF_MSIX_OFF..=VIRTIO_PCI_CONFIG_OFF_MSIX_OFF_MAX => { + let virtio_device = self.virtio_device.lock().unwrap(); + virtio_device + .read_mac_byte(addr, port - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF); } - VIRTIO_PCI_QUEUE_SEL => { + VIRTIO_PCI_ISR => { let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.write_selected_queue(addr); + virtio_device.reset_interrupt() } - VIRTIO_PCI_QUEUE_PFN => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.write_pfn(addr, self); + VIRTIO_PCI_LINK_STATUS_MSIX_OFF => { + let virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.read_link_status(addr); } _ => { - panic!("Unhandled IO exit: 0x{:x}", port); + info!("Unhanded IO Exit"); + } + }, + VcpuExit::IoOut(port, addr) => { + match port { + UHYVE_UART_PORT => { + self.uart(addr)?; + } + UHYVE_PORT_CMDSIZE => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.cmdsize(self.host_address(data_addr)); + } + UHYVE_PORT_CMDVAL => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.cmdval(self.host_address(data_addr)); + } + UHYVE_PORT_NETWRITE => { + match &self.tx { + Some(tx_channel) => tx_channel.send(1).unwrap(), + + None => {} + }; + } + UHYVE_PORT_EXIT => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + return Ok(VcpuStopReason::Exit( + self.exit(self.host_address(data_addr)), + )); + } + UHYVE_PORT_OPEN => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.open(self.host_address(data_addr)); + } + UHYVE_PORT_WRITE => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.write(self.host_address(data_addr))?; + } + UHYVE_PORT_READ => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.read(self.host_address(data_addr)); + } + UHYVE_PORT_UNLINK => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.unlink(self.host_address(data_addr)); + } + UHYVE_PORT_LSEEK => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.lseek(self.host_address(data_addr)); + } + UHYVE_PORT_CLOSE => { + let data_addr: usize = + unsafe { (*(addr.as_ptr() as *const u32)) as usize }; + self.close(self.host_address(data_addr)); + } + //TODO: + PCI_CONFIG_DATA_PORT => { + if let Some(pci_addr) = self.pci_addr { + if pci_addr & 0x1ff800 == 0 { + let mut virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.handle_write(pci_addr & 0x3ff, addr); + } + } + } + PCI_CONFIG_ADDRESS_PORT => { + self.pci_addr = Some(unsafe { *(addr.as_ptr() as *const u32) }); + } + VIRTIO_PCI_STATUS => { + let mut virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.write_status(addr); + } + VIRTIO_PCI_GUEST_FEATURES => { + let mut virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.write_requested_features(addr); + } + VIRTIO_PCI_QUEUE_NOTIFY => { + let mut virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.handle_notify_output(addr, self); + } + VIRTIO_PCI_QUEUE_SEL => { + let mut virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.write_selected_queue(addr); + } + VIRTIO_PCI_QUEUE_PFN => { + let mut virtio_device = self.virtio_device.lock().unwrap(); + virtio_device.write_pfn(addr, self); + } + _ => { + panic!("Unhandled IO exit: 0x{:x}", port); + } } } - } - VcpuExit::Debug => { - info!("Caught Debug Interrupt! {:?}", exitreason); - return Ok(VcpuStopReason::Debug); - } - VcpuExit::InternalError => { - panic!("{:?}", VcpuExit::InternalError) - } - vcpu_exit => { - unimplemented!("{:?}", vcpu_exit) - } + VcpuExit::Debug => { + info!("Caught Debug Interrupt!"); + return Ok(VcpuStopReason::Debug); + } + VcpuExit::InternalError => { + panic!("{:?}", VcpuExit::InternalError) + } + vcpu_exit => { + unimplemented!("{:?}", vcpu_exit) + } + }, + Err(err) => match err.errno() { + libc::EINTR => return Ok(VcpuStopReason::Kick), + _ => return Err(err), + }, } } } - fn run(&mut self) -> HypervisorResult { + fn run(&mut self) -> HypervisorResult> { // Pause first CPU before first execution, so we have time to attach debugger if self.id == 0 { self.gdb_handle_exception(None); @@ -448,7 +454,8 @@ impl VirtualCPU for UhyveCPU { loop { match self.r#continue()? { VcpuStopReason::Debug => self.gdb_handle_exception(Some(VcpuExit::Debug)), - VcpuStopReason::Exit(code) => break Ok(code), + VcpuStopReason::Exit(code) => break Ok(Some(code)), + VcpuStopReason::Kick => break Ok(None), } } } diff --git a/src/macos/vcpu.rs b/src/macos/vcpu.rs index c298c092..dcc230ba 100644 --- a/src/macos/vcpu.rs +++ b/src/macos/vcpu.rs @@ -730,7 +730,7 @@ impl VirtualCPU for UhyveCPU { } } - fn run(&mut self) -> HypervisorResult { + fn run(&mut self) -> HypervisorResult> { // Pause first CPU before first execution, so we have time to attach debugger if self.id == 0 { self.gdb_handle_exception(false); @@ -739,7 +739,8 @@ impl VirtualCPU for UhyveCPU { loop { match self.r#continue()? { VcpuStopReason::Debug => self.gdb_handle_exception(true), - VcpuStopReason::Exit(code) => break Ok(code), + VcpuStopReason::Exit(code) => break Ok(Some(code)), + VcpuStopReason::Kick => break Ok(None), } } } diff --git a/src/vm.rs b/src/vm.rs index 518db5c7..4c987b61 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -228,8 +228,12 @@ pub type LoadKernelResult = Result; pub enum VcpuStopReason { /// The vCPU stopped for debugging. Debug, + /// The vCPU exited with the specified exit code. Exit(i32), + + /// The vCPU got kicked. + Kick, } pub trait VirtualCPU { @@ -240,7 +244,7 @@ pub trait VirtualCPU { fn r#continue(&mut self) -> HypervisorResult; /// Start the execution of the CPU. The function will run until it crashes (`Err`) or terminate with an exit code (`Ok`). - fn run(&mut self) -> HypervisorResult; + fn run(&mut self) -> HypervisorResult>; /// Prints the VCPU's registers to stdout. fn print_registers(&self); From f01cd3a277effbc606005e9b4d1a97a8da46f31b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= Date: Thu, 26 Aug 2021 18:59:32 +0200 Subject: [PATCH 3/4] Make `Uhyve::run` OS-specific This copies the code without changes. --- src/lib.rs | 70 ---------------------------------------------- src/linux/mod.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++++ src/macos/mod.rs | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 70 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 918f3018..261f7ce0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,73 +28,3 @@ pub mod vm; pub use arch::*; pub use os::uhyve::Uhyve; - -use core_affinity::CoreId; -use std::hint; -use std::sync::mpsc; -use std::sync::Arc; -use std::thread; -use vm::Vm; - -impl Uhyve { - /// Runs the VM. - /// - /// Blocks until the VM has finished execution. - pub fn run(mut self, cpu_affinity: Option>) -> i32 { - unsafe { - self.load_kernel().expect("Unabled to load the kernel"); - } - - // For communication of the exit code from one vcpu to this thread as return - // value. - let (exit_tx, exit_rx) = mpsc::channel(); - - let this = Arc::new(self); - - (0..this.num_cpus()).for_each(|cpu_id| { - let vm = this.clone(); - let exit_tx = exit_tx.clone(); - - let local_cpu_affinity = match &cpu_affinity { - Some(vec) => vec.get(cpu_id as usize).cloned(), - None => None, - }; - - // create thread for each CPU - thread::spawn(move || { - debug!("Create thread for CPU {}", cpu_id); - match local_cpu_affinity { - Some(core_id) => { - debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id); - core_affinity::set_for_current(core_id); // This does not return an error if it fails :( - } - None => debug!("No affinity specified, not binding thread"), - } - - let mut cpu = vm.create_cpu(cpu_id).unwrap(); - cpu.init(vm.get_entry_point()).unwrap(); - - // only one core is able to enter startup code - // => the wait for the predecessor core - while cpu_id != vm.cpu_online() { - hint::spin_loop(); - } - - // jump into the VM and execute code of the guest - let result = cpu.run(); - match result { - Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(), - Ok(None) => {} - Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err), - } - }); - }); - - // This is a semi-bad design. We don't wait for the other cpu's threads to - // finish, but as soon as one cpu sends an exit code, we return it and - // ignore the remaining running threads. A better design would be to force - // the VCPUs externally to stop, so that the other threads don't block and - // can be terminated correctly. - exit_rx.recv().unwrap() - } -} diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 7b8ecadf..182e0a1e 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -6,9 +6,18 @@ pub mod virtqueue; pub type HypervisorError = kvm_ioctls::Error; +use std::{ + hint, + sync::{mpsc, Arc}, + thread, +}; + +use core_affinity::CoreId; use kvm_ioctls::Kvm; use lazy_static::lazy_static; +use crate::{vm::Vm, Uhyve}; + lazy_static! { static ref KVM: Kvm = Kvm::new().unwrap(); } @@ -19,3 +28,66 @@ trait MemoryRegion { fn guest_address(&self) -> usize; fn host_address(&self) -> usize; } + +impl Uhyve { + /// Runs the VM. + /// + /// Blocks until the VM has finished execution. + pub fn run(mut self, cpu_affinity: Option>) -> i32 { + unsafe { + self.load_kernel().expect("Unabled to load the kernel"); + } + + // For communication of the exit code from one vcpu to this thread as return + // value. + let (exit_tx, exit_rx) = mpsc::channel(); + + let this = Arc::new(self); + + (0..this.num_cpus()).for_each(|cpu_id| { + let vm = this.clone(); + let exit_tx = exit_tx.clone(); + + let local_cpu_affinity = match &cpu_affinity { + Some(vec) => vec.get(cpu_id as usize).cloned(), + None => None, + }; + + // create thread for each CPU + thread::spawn(move || { + debug!("Create thread for CPU {}", cpu_id); + match local_cpu_affinity { + Some(core_id) => { + debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id); + core_affinity::set_for_current(core_id); // This does not return an error if it fails :( + } + None => debug!("No affinity specified, not binding thread"), + } + + let mut cpu = vm.create_cpu(cpu_id).unwrap(); + cpu.init(vm.get_entry_point()).unwrap(); + + // only one core is able to enter startup code + // => the wait for the predecessor core + while cpu_id != vm.cpu_online() { + hint::spin_loop(); + } + + // jump into the VM and execute code of the guest + let result = cpu.run(); + match result { + Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(), + Ok(None) => {} + Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err), + } + }); + }); + + // This is a semi-bad design. We don't wait for the other cpu's threads to + // finish, but as soon as one cpu sends an exit code, we return it and + // ignore the remaining running threads. A better design would be to force + // the VCPUs externally to stop, so that the other threads don't block and + // can be terminated correctly. + exit_rx.recv().unwrap() + } +} diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 364a19c8..e66c3ee1 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -1,6 +1,79 @@ +use std::{ + hint, + sync::{mpsc, Arc}, + thread, +}; + +use core_affinity::CoreId; + +use crate::{vm::Vm, Uhyve}; + pub mod gdb; mod ioapic; pub mod uhyve; pub mod vcpu; pub type HypervisorError = xhypervisor::Error; + +impl Uhyve { + /// Runs the VM. + /// + /// Blocks until the VM has finished execution. + pub fn run(mut self, cpu_affinity: Option>) -> i32 { + unsafe { + self.load_kernel().expect("Unabled to load the kernel"); + } + + // For communication of the exit code from one vcpu to this thread as return + // value. + let (exit_tx, exit_rx) = mpsc::channel(); + + let this = Arc::new(self); + + (0..this.num_cpus()).for_each(|cpu_id| { + let vm = this.clone(); + let exit_tx = exit_tx.clone(); + + let local_cpu_affinity = match &cpu_affinity { + Some(vec) => vec.get(cpu_id as usize).cloned(), + None => None, + }; + + // create thread for each CPU + thread::spawn(move || { + debug!("Create thread for CPU {}", cpu_id); + match local_cpu_affinity { + Some(core_id) => { + debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id); + core_affinity::set_for_current(core_id); // This does not return an error if it fails :( + } + None => debug!("No affinity specified, not binding thread"), + } + + let mut cpu = vm.create_cpu(cpu_id).unwrap(); + cpu.init(vm.get_entry_point()).unwrap(); + + // only one core is able to enter startup code + // => the wait for the predecessor core + while cpu_id != vm.cpu_online() { + hint::spin_loop(); + } + + // jump into the VM and execute code of the guest + let result = cpu.run(); + match result { + Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(), + Ok(None) => {} + Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err), + } + }); + }); + + // This is a semi-bad design. We don't wait for the other cpu's threads to + // finish, but as soon as one cpu sends an exit code, we return it and + // ignore the remaining running threads. A better design would be to force + // the VCPUs externally to stop, so that the other threads don't block and + // can be terminated correctly. + exit_rx.recv().unwrap() + } +} From a357cd1bfc8be045bacf508dab750e3e82878334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= Date: Thu, 26 Aug 2021 19:00:37 +0200 Subject: [PATCH 4/4] Linux: Add kick signal, shut down vCPU threads gracefully --- Cargo.lock | 30 +++++---- Cargo.toml | 3 + src/linux/mod.rs | 154 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 125 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 165e0be5..333e8223 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,7 +27,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", - "libc", + "libc 0.2.100", "winapi 0.3.9", ] @@ -131,7 +131,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f" dependencies = [ "kernel32-sys", - "libc", + "libc 0.2.100", "num_cpus", "winapi 0.2.8", ] @@ -320,7 +320,7 @@ version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ - "libc", + "libc 0.2.100", ] [[package]] @@ -389,7 +389,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2924454e22895c738e43331ae310459c74a11ded9c97dc250129ee10d2f9ca2" dependencies = [ "kvm-bindings", - "libc", + "libc 0.2.100", "vmm-sys-util", ] @@ -399,6 +399,11 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "libc" +version = "0.2.98" +source = "git+https://github.com/rust-lang/libc?rev=f5e31f208#f5e31f208ee51d60e4848e09c16ee86e3968b6f8" + [[package]] name = "libc" version = "0.2.100" @@ -430,7 +435,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a" dependencies = [ - "libc", + "libc 0.2.100", ] [[package]] @@ -457,19 +462,18 @@ dependencies = [ "bitflags", "cc", "cfg-if", - "libc", + "libc 0.2.100", ] [[package]] name = "nix" version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1e25ee6b412c2a1e3fcb6a4499a5c1bfe7f43e014bdce9a6b6666e5aa2d187" +source = "git+https://github.com/nix-rust/nix#dab7332eabed8646f6d01a0d0688b4d1438accb4" dependencies = [ "bitflags", "cc", "cfg-if", - "libc", + "libc 0.2.98", "memoffset", ] @@ -498,7 +502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" dependencies = [ "hermit-abi", - "libc", + "libc 0.2.100", ] [[package]] @@ -851,7 +855,7 @@ dependencies = [ "kvm-bindings", "kvm-ioctls", "lazy_static", - "libc", + "libc 0.2.100", "log", "mac_address", "nix 0.22.0", @@ -913,7 +917,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cf11afbc4ebc0d5c7a7748a77d19e2042677fc15faa2f4ccccb27c18a60605" dependencies = [ "bitflags", - "libc", + "libc 0.2.100", ] [[package]] @@ -1051,5 +1055,5 @@ version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "643a7272630495dac4765f4f6b020b54af2c4d71fd29ef6b6141ade280355f8a" dependencies = [ - "libc", + "libc 0.2.100", ] diff --git a/Cargo.toml b/Cargo.toml index 588a7d7f..d5a7d834 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,9 @@ harness = false default = [] instrument = ["rftrace", "rftrace-frontend"] +[patch.crates-io] +nix = { git = "https://github.com/nix-rust/nix" } + [dependencies] bitflags = "1.3" byteorder = "1.4" diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 182e0a1e..1072a574 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -7,14 +7,20 @@ pub mod virtqueue; pub type HypervisorError = kvm_ioctls::Error; use std::{ - hint, - sync::{mpsc, Arc}, + hint, mem, + os::unix::prelude::JoinHandleExt, + sync::{Arc, Barrier}, thread, }; use core_affinity::CoreId; use kvm_ioctls::Kvm; use lazy_static::lazy_static; +use libc::{SIGRTMAX, SIGRTMIN}; +use nix::sys::{ + pthread::{pthread_kill, Pthread}, + signal::{signal, SigHandler, Signal}, +}; use crate::{vm::Vm, Uhyve}; @@ -29,65 +35,115 @@ trait MemoryRegion { fn host_address(&self) -> usize; } +/// The signal for kicking vCPUs out of KVM_RUN. +/// +/// It is used to stop a vCPU from another thread. +struct KickSignal; + +impl KickSignal { + const RTSIG_OFFSET: libc::c_int = 0; + + fn get() -> Signal { + let kick_signal = SIGRTMIN() + Self::RTSIG_OFFSET; + assert!(kick_signal <= SIGRTMAX()); + // TODO: Remove the transmute once realtime signals are properly supported by nix + // https://github.com/nix-rust/nix/issues/495 + unsafe { mem::transmute(kick_signal) } + } + + fn register_handler() -> nix::Result<()> { + extern "C" fn handle_signal(_signal: libc::c_int) {} + // SAFETY: We don't use the `signal`'s return value. + unsafe { + signal(Self::get(), SigHandler::Handler(handle_signal))?; + } + Ok(()) + } + + /// Sends the kick signal to a thread. + /// + /// [`KickSignal::register_handler`] should be called prior to this to avoid crashing the program with the default handler. + fn pthread_kill(pthread: Pthread) -> nix::Result<()> { + pthread_kill(pthread, Self::get()) + } +} + impl Uhyve { /// Runs the VM. /// /// Blocks until the VM has finished execution. pub fn run(mut self, cpu_affinity: Option>) -> i32 { + KickSignal::register_handler().unwrap(); + unsafe { self.load_kernel().expect("Unabled to load the kernel"); } - // For communication of the exit code from one vcpu to this thread as return - // value. - let (exit_tx, exit_rx) = mpsc::channel(); + // After spinning up all vCPU threads, the main thread waits for any vCPU to end execution. + let barrier = Arc::new(Barrier::new(2)); let this = Arc::new(self); + let threads = (0..this.num_cpus()) + .map(|cpu_id| { + let vm = this.clone(); + let barrier = barrier.clone(); + let local_cpu_affinity = cpu_affinity + .as_ref() + .map(|core_ids| core_ids.get(cpu_id as usize).copied()) + .flatten(); + + thread::spawn(move || { + debug!("Create thread for CPU {}", cpu_id); + match local_cpu_affinity { + Some(core_id) => { + debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id); + core_affinity::set_for_current(core_id); // This does not return an error if it fails :( + } + None => debug!("No affinity specified, not binding thread"), + } - (0..this.num_cpus()).for_each(|cpu_id| { - let vm = this.clone(); - let exit_tx = exit_tx.clone(); - - let local_cpu_affinity = match &cpu_affinity { - Some(vec) => vec.get(cpu_id as usize).cloned(), - None => None, - }; - - // create thread for each CPU - thread::spawn(move || { - debug!("Create thread for CPU {}", cpu_id); - match local_cpu_affinity { - Some(core_id) => { - debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id); - core_affinity::set_for_current(core_id); // This does not return an error if it fails :( + let mut cpu = vm.create_cpu(cpu_id).unwrap(); + cpu.init(vm.get_entry_point()).unwrap(); + + // only one core is able to enter startup code + // => the wait for the predecessor core + while cpu_id != vm.cpu_online() { + hint::spin_loop(); + } + + // jump into the VM and execute code of the guest + match cpu.run() { + Ok(code) => { + if code.is_some() { + // Let the main thread continue with kicking the other vCPUs + barrier.wait(); + } + code + } + Err(err) => { + error!("CPU {} crashed with {:?}", cpu_id, err); + None + } } - None => debug!("No affinity specified, not binding thread"), - } - - let mut cpu = vm.create_cpu(cpu_id).unwrap(); - cpu.init(vm.get_entry_point()).unwrap(); - - // only one core is able to enter startup code - // => the wait for the predecessor core - while cpu_id != vm.cpu_online() { - hint::spin_loop(); - } - - // jump into the VM and execute code of the guest - let result = cpu.run(); - match result { - Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(), - Ok(None) => {} - Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err), - } - }); - }); - - // This is a semi-bad design. We don't wait for the other cpu's threads to - // finish, but as soon as one cpu sends an exit code, we return it and - // ignore the remaining running threads. A better design would be to force - // the VCPUs externally to stop, so that the other threads don't block and - // can be terminated correctly. - exit_rx.recv().unwrap() + }) + }) + .collect::>(); + + // Wait for one vCPU to return with an exit code. + barrier.wait(); + for thread in &threads { + KickSignal::pthread_kill(thread.as_pthread_t()).unwrap(); + } + + let code = threads + .into_iter() + .filter_map(|thread| thread.join().unwrap()) + .collect::>(); + assert_eq!( + 1, + code.len(), + "more than one thread finished with an exit code" + ); + code[0] } }