From 5b158073d5ebe27418569f4354d72aedfacebf75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= <mkroening@posteo.net>
Date: Thu, 26 Aug 2021 18:48:56 +0200
Subject: [PATCH 1/4] Rename tid variable to cpu_id

---
 src/lib.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
index 6f0344b2..57842e58 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -51,32 +51,32 @@ impl Uhyve {
 
 		let this = Arc::new(self);
 
-		(0..this.num_cpus()).for_each(|tid| {
+		(0..this.num_cpus()).for_each(|cpu_id| {
 			let vm = this.clone();
 			let exit_tx = exit_tx.clone();
 
 			let local_cpu_affinity = match &cpu_affinity {
-				Some(vec) => vec.get(tid as usize).cloned(),
+				Some(vec) => vec.get(cpu_id as usize).cloned(),
 				None => None,
 			};
 
 			// create thread for each CPU
 			thread::spawn(move || {
-				debug!("Create thread for CPU {}", tid);
+				debug!("Create thread for CPU {}", cpu_id);
 				match local_cpu_affinity {
 					Some(core_id) => {
-						debug!("Trying to pin thread {} to CPU {}", tid, core_id.id);
+						debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id);
 						core_affinity::set_for_current(core_id); // This does not return an error if it fails :(
 					}
 					None => debug!("No affinity specified, not binding thread"),
 				}
 
-				let mut cpu = vm.create_cpu(tid).unwrap();
+				let mut cpu = vm.create_cpu(cpu_id).unwrap();
 				cpu.init(vm.get_entry_point()).unwrap();
 
 				// only one core is able to enter startup code
 				// => the wait for the predecessor core
-				while tid != vm.cpu_online() {
+				while cpu_id != vm.cpu_online() {
 					hint::spin_loop();
 				}
 
@@ -84,7 +84,7 @@ impl Uhyve {
 				let result = cpu.run();
 				match result {
 					Err(x) => {
-						error!("CPU {} crashes! {:?}", tid, x);
+						error!("CPU {} crashes! {:?}", cpu_id, x);
 					}
 					Ok(exit_code) => {
 						exit_tx.send(exit_code).unwrap();

From 7dd707894d0aa5edc85a57aec2f0a7e6938f5ee5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= <mkroening@posteo.net>
Date: Thu, 26 Aug 2021 18:50:36 +0200
Subject: [PATCH 2/4] Linux: Make vCPUs kickable

This allows vCPUs to be kicked out of execution externally. Kicked vCPUs
exit without code.
---
 src/lib.rs        |   9 +-
 src/linux/vcpu.rs | 285 ++++++++++++++++++++++++----------------------
 src/macos/vcpu.rs |   5 +-
 src/vm.rs         |   6 +-
 4 files changed, 157 insertions(+), 148 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 57842e58..918f3018 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -83,12 +83,9 @@ impl Uhyve {
 				// jump into the VM and execute code of the guest
 				let result = cpu.run();
 				match result {
-					Err(x) => {
-						error!("CPU {} crashes! {:?}", cpu_id, x);
-					}
-					Ok(exit_code) => {
-						exit_tx.send(exit_code).unwrap();
-					}
+					Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(),
+					Ok(None) => {}
+					Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err),
 				}
 			});
 		});
diff --git a/src/linux/vcpu.rs b/src/linux/vcpu.rs
index 0e15f182..0c58e66b 100755
--- a/src/linux/vcpu.rs
+++ b/src/linux/vcpu.rs
@@ -278,168 +278,174 @@ impl VirtualCPU for UhyveCPU {
 
 	fn r#continue(&mut self) -> HypervisorResult<VcpuStopReason> {
 		loop {
-			let exitreason = self.vcpu.run()?;
-			match exitreason {
-				VcpuExit::Hlt => {
-					// Ignore `VcpuExit::Hlt`
-					debug!("{:?}", VcpuExit::Hlt);
-				}
-				VcpuExit::Shutdown => {
-					return Ok(VcpuStopReason::Exit(0));
-				}
-				VcpuExit::IoIn(port, addr) => match port {
-					PCI_CONFIG_DATA_PORT => {
-						if let Some(pci_addr) = self.pci_addr {
-							if pci_addr & 0x1ff800 == 0 {
-								let virtio_device = self.virtio_device.lock().unwrap();
-								virtio_device.handle_read(pci_addr & 0x3ff, addr);
-							} else {
-								unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff };
-							}
-						} else {
-							unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff };
-						}
-					}
-					PCI_CONFIG_ADDRESS_PORT => {}
-					VIRTIO_PCI_STATUS => {
-						let virtio_device = self.virtio_device.lock().unwrap();
-						virtio_device.read_status(addr);
-					}
-					VIRTIO_PCI_HOST_FEATURES => {
-						let virtio_device = self.virtio_device.lock().unwrap();
-						virtio_device.read_host_features(addr);
-					}
-					VIRTIO_PCI_GUEST_FEATURES => {
-						let mut virtio_device = self.virtio_device.lock().unwrap();
-						virtio_device.read_requested_features(addr);
-					}
-					VIRTIO_PCI_CONFIG_OFF_MSIX_OFF..=VIRTIO_PCI_CONFIG_OFF_MSIX_OFF_MAX => {
-						let virtio_device = self.virtio_device.lock().unwrap();
-						virtio_device.read_mac_byte(addr, port - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF);
+			match self.vcpu.run() {
+				Ok(vcpu_stop_reason) => match vcpu_stop_reason {
+					VcpuExit::Hlt => {
+						// Ignore `VcpuExit::Hlt`
+						debug!("{:?}", VcpuExit::Hlt);
 					}
-					VIRTIO_PCI_ISR => {
-						let mut virtio_device = self.virtio_device.lock().unwrap();
-						virtio_device.reset_interrupt()
+					VcpuExit::Shutdown => {
+						return Ok(VcpuStopReason::Exit(0));
 					}
-					VIRTIO_PCI_LINK_STATUS_MSIX_OFF => {
-						let virtio_device = self.virtio_device.lock().unwrap();
-						virtio_device.read_link_status(addr);
-					}
-					_ => {
-						info!("Unhanded IO Exit");
-					}
-				},
-				VcpuExit::IoOut(port, addr) => {
-					match port {
-						UHYVE_UART_PORT => {
-							self.uart(addr)?;
-						}
-						UHYVE_PORT_CMDSIZE => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.cmdsize(self.host_address(data_addr));
-						}
-						UHYVE_PORT_CMDVAL => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.cmdval(self.host_address(data_addr));
-						}
-						UHYVE_PORT_NETWRITE => {
-							match &self.tx {
-								Some(tx_channel) => tx_channel.send(1).unwrap(),
-
-								None => {}
-							};
-						}
-						UHYVE_PORT_EXIT => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							return Ok(VcpuStopReason::Exit(
-								self.exit(self.host_address(data_addr)),
-							));
-						}
-						UHYVE_PORT_OPEN => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.open(self.host_address(data_addr));
-						}
-						UHYVE_PORT_WRITE => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.write(self.host_address(data_addr))?;
-						}
-						UHYVE_PORT_READ => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.read(self.host_address(data_addr));
-						}
-						UHYVE_PORT_UNLINK => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.unlink(self.host_address(data_addr));
-						}
-						UHYVE_PORT_LSEEK => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.lseek(self.host_address(data_addr));
-						}
-						UHYVE_PORT_CLOSE => {
-							let data_addr: usize =
-								unsafe { (*(addr.as_ptr() as *const u32)) as usize };
-							self.close(self.host_address(data_addr));
-						}
-						//TODO:
+					VcpuExit::IoIn(port, addr) => match port {
 						PCI_CONFIG_DATA_PORT => {
 							if let Some(pci_addr) = self.pci_addr {
 								if pci_addr & 0x1ff800 == 0 {
-									let mut virtio_device = self.virtio_device.lock().unwrap();
-									virtio_device.handle_write(pci_addr & 0x3ff, addr);
+									let virtio_device = self.virtio_device.lock().unwrap();
+									virtio_device.handle_read(pci_addr & 0x3ff, addr);
+								} else {
+									unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff };
 								}
+							} else {
+								unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff };
 							}
 						}
-						PCI_CONFIG_ADDRESS_PORT => {
-							self.pci_addr = Some(unsafe { *(addr.as_ptr() as *const u32) });
-						}
+						PCI_CONFIG_ADDRESS_PORT => {}
 						VIRTIO_PCI_STATUS => {
-							let mut virtio_device = self.virtio_device.lock().unwrap();
-							virtio_device.write_status(addr);
+							let virtio_device = self.virtio_device.lock().unwrap();
+							virtio_device.read_status(addr);
+						}
+						VIRTIO_PCI_HOST_FEATURES => {
+							let virtio_device = self.virtio_device.lock().unwrap();
+							virtio_device.read_host_features(addr);
 						}
 						VIRTIO_PCI_GUEST_FEATURES => {
 							let mut virtio_device = self.virtio_device.lock().unwrap();
-							virtio_device.write_requested_features(addr);
+							virtio_device.read_requested_features(addr);
 						}
-						VIRTIO_PCI_QUEUE_NOTIFY => {
-							let mut virtio_device = self.virtio_device.lock().unwrap();
-							virtio_device.handle_notify_output(addr, self);
+						VIRTIO_PCI_CONFIG_OFF_MSIX_OFF..=VIRTIO_PCI_CONFIG_OFF_MSIX_OFF_MAX => {
+							let virtio_device = self.virtio_device.lock().unwrap();
+							virtio_device
+								.read_mac_byte(addr, port - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF);
 						}
-						VIRTIO_PCI_QUEUE_SEL => {
+						VIRTIO_PCI_ISR => {
 							let mut virtio_device = self.virtio_device.lock().unwrap();
-							virtio_device.write_selected_queue(addr);
+							virtio_device.reset_interrupt()
 						}
-						VIRTIO_PCI_QUEUE_PFN => {
-							let mut virtio_device = self.virtio_device.lock().unwrap();
-							virtio_device.write_pfn(addr, self);
+						VIRTIO_PCI_LINK_STATUS_MSIX_OFF => {
+							let virtio_device = self.virtio_device.lock().unwrap();
+							virtio_device.read_link_status(addr);
 						}
 						_ => {
-							panic!("Unhandled IO exit: 0x{:x}", port);
+							info!("Unhanded IO Exit");
+						}
+					},
+					VcpuExit::IoOut(port, addr) => {
+						match port {
+							UHYVE_UART_PORT => {
+								self.uart(addr)?;
+							}
+							UHYVE_PORT_CMDSIZE => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.cmdsize(self.host_address(data_addr));
+							}
+							UHYVE_PORT_CMDVAL => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.cmdval(self.host_address(data_addr));
+							}
+							UHYVE_PORT_NETWRITE => {
+								match &self.tx {
+									Some(tx_channel) => tx_channel.send(1).unwrap(),
+
+									None => {}
+								};
+							}
+							UHYVE_PORT_EXIT => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								return Ok(VcpuStopReason::Exit(
+									self.exit(self.host_address(data_addr)),
+								));
+							}
+							UHYVE_PORT_OPEN => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.open(self.host_address(data_addr));
+							}
+							UHYVE_PORT_WRITE => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.write(self.host_address(data_addr))?;
+							}
+							UHYVE_PORT_READ => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.read(self.host_address(data_addr));
+							}
+							UHYVE_PORT_UNLINK => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.unlink(self.host_address(data_addr));
+							}
+							UHYVE_PORT_LSEEK => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.lseek(self.host_address(data_addr));
+							}
+							UHYVE_PORT_CLOSE => {
+								let data_addr: usize =
+									unsafe { (*(addr.as_ptr() as *const u32)) as usize };
+								self.close(self.host_address(data_addr));
+							}
+							//TODO:
+							PCI_CONFIG_DATA_PORT => {
+								if let Some(pci_addr) = self.pci_addr {
+									if pci_addr & 0x1ff800 == 0 {
+										let mut virtio_device = self.virtio_device.lock().unwrap();
+										virtio_device.handle_write(pci_addr & 0x3ff, addr);
+									}
+								}
+							}
+							PCI_CONFIG_ADDRESS_PORT => {
+								self.pci_addr = Some(unsafe { *(addr.as_ptr() as *const u32) });
+							}
+							VIRTIO_PCI_STATUS => {
+								let mut virtio_device = self.virtio_device.lock().unwrap();
+								virtio_device.write_status(addr);
+							}
+							VIRTIO_PCI_GUEST_FEATURES => {
+								let mut virtio_device = self.virtio_device.lock().unwrap();
+								virtio_device.write_requested_features(addr);
+							}
+							VIRTIO_PCI_QUEUE_NOTIFY => {
+								let mut virtio_device = self.virtio_device.lock().unwrap();
+								virtio_device.handle_notify_output(addr, self);
+							}
+							VIRTIO_PCI_QUEUE_SEL => {
+								let mut virtio_device = self.virtio_device.lock().unwrap();
+								virtio_device.write_selected_queue(addr);
+							}
+							VIRTIO_PCI_QUEUE_PFN => {
+								let mut virtio_device = self.virtio_device.lock().unwrap();
+								virtio_device.write_pfn(addr, self);
+							}
+							_ => {
+								panic!("Unhandled IO exit: 0x{:x}", port);
+							}
 						}
 					}
-				}
-				VcpuExit::Debug => {
-					info!("Caught Debug Interrupt! {:?}", exitreason);
-					return Ok(VcpuStopReason::Debug);
-				}
-				VcpuExit::InternalError => {
-					panic!("{:?}", VcpuExit::InternalError)
-				}
-				vcpu_exit => {
-					unimplemented!("{:?}", vcpu_exit)
-				}
+					VcpuExit::Debug => {
+						info!("Caught Debug Interrupt!");
+						return Ok(VcpuStopReason::Debug);
+					}
+					VcpuExit::InternalError => {
+						panic!("{:?}", VcpuExit::InternalError)
+					}
+					vcpu_exit => {
+						unimplemented!("{:?}", vcpu_exit)
+					}
+				},
+				Err(err) => match err.errno() {
+					libc::EINTR => return Ok(VcpuStopReason::Kick),
+					_ => return Err(err),
+				},
 			}
 		}
 	}
 
-	fn run(&mut self) -> HypervisorResult<i32> {
+	fn run(&mut self) -> HypervisorResult<Option<i32>> {
 		// Pause first CPU before first execution, so we have time to attach debugger
 		if self.id == 0 {
 			self.gdb_handle_exception(None);
@@ -448,7 +454,8 @@ impl VirtualCPU for UhyveCPU {
 		loop {
 			match self.r#continue()? {
 				VcpuStopReason::Debug => self.gdb_handle_exception(Some(VcpuExit::Debug)),
-				VcpuStopReason::Exit(code) => break Ok(code),
+				VcpuStopReason::Exit(code) => break Ok(Some(code)),
+				VcpuStopReason::Kick => break Ok(None),
 			}
 		}
 	}
diff --git a/src/macos/vcpu.rs b/src/macos/vcpu.rs
index c298c092..dcc230ba 100644
--- a/src/macos/vcpu.rs
+++ b/src/macos/vcpu.rs
@@ -730,7 +730,7 @@ impl VirtualCPU for UhyveCPU {
 		}
 	}
 
-	fn run(&mut self) -> HypervisorResult<i32> {
+	fn run(&mut self) -> HypervisorResult<Option<i32>> {
 		// Pause first CPU before first execution, so we have time to attach debugger
 		if self.id == 0 {
 			self.gdb_handle_exception(false);
@@ -739,7 +739,8 @@ impl VirtualCPU for UhyveCPU {
 		loop {
 			match self.r#continue()? {
 				VcpuStopReason::Debug => self.gdb_handle_exception(true),
-				VcpuStopReason::Exit(code) => break Ok(code),
+				VcpuStopReason::Exit(code) => break Ok(Some(code)),
+				VcpuStopReason::Kick => break Ok(None),
 			}
 		}
 	}
diff --git a/src/vm.rs b/src/vm.rs
index 518db5c7..4c987b61 100644
--- a/src/vm.rs
+++ b/src/vm.rs
@@ -228,8 +228,12 @@ pub type LoadKernelResult<T> = Result<T, LoadKernelError>;
 pub enum VcpuStopReason {
 	/// The vCPU stopped for debugging.
 	Debug,
+
 	/// The vCPU exited with the specified exit code.
 	Exit(i32),
+
+	/// The vCPU got kicked.
+	Kick,
 }
 
 pub trait VirtualCPU {
@@ -240,7 +244,7 @@ pub trait VirtualCPU {
 	fn r#continue(&mut self) -> HypervisorResult<VcpuStopReason>;
 
 	/// Start the execution of the CPU. The function will run until it crashes (`Err`) or terminate with an exit code (`Ok`).
-	fn run(&mut self) -> HypervisorResult<i32>;
+	fn run(&mut self) -> HypervisorResult<Option<i32>>;
 
 	/// Prints the VCPU's registers to stdout.
 	fn print_registers(&self);

From f01cd3a277effbc606005e9b4d1a97a8da46f31b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= <mkroening@posteo.net>
Date: Thu, 26 Aug 2021 18:59:32 +0200
Subject: [PATCH 3/4] Make `Uhyve::run` OS-specific

This copies the code without changes.
---
 src/lib.rs       | 70 ----------------------------------------------
 src/linux/mod.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++++
 src/macos/mod.rs | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 145 insertions(+), 70 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 918f3018..261f7ce0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -28,73 +28,3 @@ pub mod vm;
 
 pub use arch::*;
 pub use os::uhyve::Uhyve;
-
-use core_affinity::CoreId;
-use std::hint;
-use std::sync::mpsc;
-use std::sync::Arc;
-use std::thread;
-use vm::Vm;
-
-impl Uhyve {
-	/// Runs the VM.
-	///
-	/// Blocks until the VM has finished execution.
-	pub fn run(mut self, cpu_affinity: Option<Vec<CoreId>>) -> i32 {
-		unsafe {
-			self.load_kernel().expect("Unabled to load the kernel");
-		}
-
-		// For communication of the exit code from one vcpu to this thread as return
-		// value.
-		let (exit_tx, exit_rx) = mpsc::channel();
-
-		let this = Arc::new(self);
-
-		(0..this.num_cpus()).for_each(|cpu_id| {
-			let vm = this.clone();
-			let exit_tx = exit_tx.clone();
-
-			let local_cpu_affinity = match &cpu_affinity {
-				Some(vec) => vec.get(cpu_id as usize).cloned(),
-				None => None,
-			};
-
-			// create thread for each CPU
-			thread::spawn(move || {
-				debug!("Create thread for CPU {}", cpu_id);
-				match local_cpu_affinity {
-					Some(core_id) => {
-						debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id);
-						core_affinity::set_for_current(core_id); // This does not return an error if it fails :(
-					}
-					None => debug!("No affinity specified, not binding thread"),
-				}
-
-				let mut cpu = vm.create_cpu(cpu_id).unwrap();
-				cpu.init(vm.get_entry_point()).unwrap();
-
-				// only one core is able to enter startup code
-				// => the wait for the predecessor core
-				while cpu_id != vm.cpu_online() {
-					hint::spin_loop();
-				}
-
-				// jump into the VM and execute code of the guest
-				let result = cpu.run();
-				match result {
-					Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(),
-					Ok(None) => {}
-					Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err),
-				}
-			});
-		});
-
-		// This is a semi-bad design. We don't wait for the other cpu's threads to
-		// finish, but as soon as one cpu sends an exit code, we return it and
-		// ignore the remaining running threads. A better design would be to force
-		// the VCPUs externally to stop, so that the other threads don't block and
-		// can be terminated correctly.
-		exit_rx.recv().unwrap()
-	}
-}
diff --git a/src/linux/mod.rs b/src/linux/mod.rs
index 7b8ecadf..182e0a1e 100755
--- a/src/linux/mod.rs
+++ b/src/linux/mod.rs
@@ -6,9 +6,18 @@ pub mod virtqueue;
 
 pub type HypervisorError = kvm_ioctls::Error;
 
+use std::{
+	hint,
+	sync::{mpsc, Arc},
+	thread,
+};
+
+use core_affinity::CoreId;
 use kvm_ioctls::Kvm;
 use lazy_static::lazy_static;
 
+use crate::{vm::Vm, Uhyve};
+
 lazy_static! {
 	static ref KVM: Kvm = Kvm::new().unwrap();
 }
@@ -19,3 +28,66 @@ trait MemoryRegion {
 	fn guest_address(&self) -> usize;
 	fn host_address(&self) -> usize;
 }
+
+impl Uhyve {
+	/// Runs the VM.
+	///
+	/// Blocks until the VM has finished execution.
+	pub fn run(mut self, cpu_affinity: Option<Vec<CoreId>>) -> i32 {
+		unsafe {
+			self.load_kernel().expect("Unabled to load the kernel");
+		}
+
+		// For communication of the exit code from one vcpu to this thread as return
+		// value.
+		let (exit_tx, exit_rx) = mpsc::channel();
+
+		let this = Arc::new(self);
+
+		(0..this.num_cpus()).for_each(|cpu_id| {
+			let vm = this.clone();
+			let exit_tx = exit_tx.clone();
+
+			let local_cpu_affinity = match &cpu_affinity {
+				Some(vec) => vec.get(cpu_id as usize).cloned(),
+				None => None,
+			};
+
+			// create thread for each CPU
+			thread::spawn(move || {
+				debug!("Create thread for CPU {}", cpu_id);
+				match local_cpu_affinity {
+					Some(core_id) => {
+						debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id);
+						core_affinity::set_for_current(core_id); // This does not return an error if it fails :(
+					}
+					None => debug!("No affinity specified, not binding thread"),
+				}
+
+				let mut cpu = vm.create_cpu(cpu_id).unwrap();
+				cpu.init(vm.get_entry_point()).unwrap();
+
+				// only one core is able to enter startup code
+				// => the wait for the predecessor core
+				while cpu_id != vm.cpu_online() {
+					hint::spin_loop();
+				}
+
+				// jump into the VM and execute code of the guest
+				let result = cpu.run();
+				match result {
+					Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(),
+					Ok(None) => {}
+					Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err),
+				}
+			});
+		});
+
+		// This is a semi-bad design. We don't wait for the other cpu's threads to
+		// finish, but as soon as one cpu sends an exit code, we return it and
+		// ignore the remaining running threads. A better design would be to force
+		// the VCPUs externally to stop, so that the other threads don't block and
+		// can be terminated correctly.
+		exit_rx.recv().unwrap()
+	}
+}
diff --git a/src/macos/mod.rs b/src/macos/mod.rs
index 364a19c8..e66c3ee1 100644
--- a/src/macos/mod.rs
+++ b/src/macos/mod.rs
@@ -1,6 +1,79 @@
+use std::{
+	hint,
+	sync::{mpsc, Arc},
+	thread,
+};
+
+use core_affinity::CoreId;
+
+use crate::{vm::Vm, Uhyve};
+
 pub mod gdb;
 mod ioapic;
 pub mod uhyve;
 pub mod vcpu;
 
 pub type HypervisorError = xhypervisor::Error;
+
+impl Uhyve {
+	/// Runs the VM.
+	///
+	/// Blocks until the VM has finished execution.
+	pub fn run(mut self, cpu_affinity: Option<Vec<CoreId>>) -> i32 {
+		unsafe {
+			self.load_kernel().expect("Unabled to load the kernel");
+		}
+
+		// For communication of the exit code from one vcpu to this thread as return
+		// value.
+		let (exit_tx, exit_rx) = mpsc::channel();
+
+		let this = Arc::new(self);
+
+		(0..this.num_cpus()).for_each(|cpu_id| {
+			let vm = this.clone();
+			let exit_tx = exit_tx.clone();
+
+			let local_cpu_affinity = match &cpu_affinity {
+				Some(vec) => vec.get(cpu_id as usize).cloned(),
+				None => None,
+			};
+
+			// create thread for each CPU
+			thread::spawn(move || {
+				debug!("Create thread for CPU {}", cpu_id);
+				match local_cpu_affinity {
+					Some(core_id) => {
+						debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id);
+						core_affinity::set_for_current(core_id); // This does not return an error if it fails :(
+					}
+					None => debug!("No affinity specified, not binding thread"),
+				}
+
+				let mut cpu = vm.create_cpu(cpu_id).unwrap();
+				cpu.init(vm.get_entry_point()).unwrap();
+
+				// only one core is able to enter startup code
+				// => the wait for the predecessor core
+				while cpu_id != vm.cpu_online() {
+					hint::spin_loop();
+				}
+
+				// jump into the VM and execute code of the guest
+				let result = cpu.run();
+				match result {
+					Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(),
+					Ok(None) => {}
+					Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err),
+				}
+			});
+		});
+
+		// This is a semi-bad design. We don't wait for the other cpu's threads to
+		// finish, but as soon as one cpu sends an exit code, we return it and
+		// ignore the remaining running threads. A better design would be to force
+		// the VCPUs externally to stop, so that the other threads don't block and
+		// can be terminated correctly.
+		exit_rx.recv().unwrap()
+	}
+}

From a357cd1bfc8be045bacf508dab750e3e82878334 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kr=C3=B6ning?= <mkroening@posteo.net>
Date: Thu, 26 Aug 2021 19:00:37 +0200
Subject: [PATCH 4/4] Linux: Add kick signal, shut down vCPU threads gracefully

---
 Cargo.lock       |  30 +++++----
 Cargo.toml       |   3 +
 src/linux/mod.rs | 154 ++++++++++++++++++++++++++++++++---------------
 3 files changed, 125 insertions(+), 62 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 165e0be5..333e8223 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -27,7 +27,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
 dependencies = [
  "hermit-abi",
- "libc",
+ "libc 0.2.100",
  "winapi 0.3.9",
 ]
 
@@ -131,7 +131,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f"
 dependencies = [
  "kernel32-sys",
- "libc",
+ "libc 0.2.100",
  "num_cpus",
  "winapi 0.2.8",
 ]
@@ -320,7 +320,7 @@ version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
 dependencies = [
- "libc",
+ "libc 0.2.100",
 ]
 
 [[package]]
@@ -389,7 +389,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f2924454e22895c738e43331ae310459c74a11ded9c97dc250129ee10d2f9ca2"
 dependencies = [
  "kvm-bindings",
- "libc",
+ "libc 0.2.100",
  "vmm-sys-util",
 ]
 
@@ -399,6 +399,11 @@ version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
+[[package]]
+name = "libc"
+version = "0.2.98"
+source = "git+https://github.com/rust-lang/libc?rev=f5e31f208#f5e31f208ee51d60e4848e09c16ee86e3968b6f8"
+
 [[package]]
 name = "libc"
 version = "0.2.100"
@@ -430,7 +435,7 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
 dependencies = [
- "libc",
+ "libc 0.2.100",
 ]
 
 [[package]]
@@ -457,19 +462,18 @@ dependencies = [
  "bitflags",
  "cc",
  "cfg-if",
- "libc",
+ "libc 0.2.100",
 ]
 
 [[package]]
 name = "nix"
 version = "0.22.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf1e25ee6b412c2a1e3fcb6a4499a5c1bfe7f43e014bdce9a6b6666e5aa2d187"
+source = "git+https://github.com/nix-rust/nix#dab7332eabed8646f6d01a0d0688b4d1438accb4"
 dependencies = [
  "bitflags",
  "cc",
  "cfg-if",
- "libc",
+ "libc 0.2.98",
  "memoffset",
 ]
 
@@ -498,7 +502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
 dependencies = [
  "hermit-abi",
- "libc",
+ "libc 0.2.100",
 ]
 
 [[package]]
@@ -851,7 +855,7 @@ dependencies = [
  "kvm-bindings",
  "kvm-ioctls",
  "lazy_static",
- "libc",
+ "libc 0.2.100",
  "log",
  "mac_address",
  "nix 0.22.0",
@@ -913,7 +917,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "01cf11afbc4ebc0d5c7a7748a77d19e2042677fc15faa2f4ccccb27c18a60605"
 dependencies = [
  "bitflags",
- "libc",
+ "libc 0.2.100",
 ]
 
 [[package]]
@@ -1051,5 +1055,5 @@ version = "0.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "643a7272630495dac4765f4f6b020b54af2c4d71fd29ef6b6141ade280355f8a"
 dependencies = [
- "libc",
+ "libc 0.2.100",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 588a7d7f..d5a7d834 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -33,6 +33,9 @@ harness = false
 default = []
 instrument = ["rftrace", "rftrace-frontend"]
 
+[patch.crates-io]
+nix = { git = "https://github.com/nix-rust/nix" }
+
 [dependencies]
 bitflags = "1.3"
 byteorder = "1.4"
diff --git a/src/linux/mod.rs b/src/linux/mod.rs
index 182e0a1e..1072a574 100755
--- a/src/linux/mod.rs
+++ b/src/linux/mod.rs
@@ -7,14 +7,20 @@ pub mod virtqueue;
 pub type HypervisorError = kvm_ioctls::Error;
 
 use std::{
-	hint,
-	sync::{mpsc, Arc},
+	hint, mem,
+	os::unix::prelude::JoinHandleExt,
+	sync::{Arc, Barrier},
 	thread,
 };
 
 use core_affinity::CoreId;
 use kvm_ioctls::Kvm;
 use lazy_static::lazy_static;
+use libc::{SIGRTMAX, SIGRTMIN};
+use nix::sys::{
+	pthread::{pthread_kill, Pthread},
+	signal::{signal, SigHandler, Signal},
+};
 
 use crate::{vm::Vm, Uhyve};
 
@@ -29,65 +35,115 @@ trait MemoryRegion {
 	fn host_address(&self) -> usize;
 }
 
+/// The signal for kicking vCPUs out of KVM_RUN.
+///
+/// It is used to stop a vCPU from another thread.
+struct KickSignal;
+
+impl KickSignal {
+	const RTSIG_OFFSET: libc::c_int = 0;
+
+	fn get() -> Signal {
+		let kick_signal = SIGRTMIN() + Self::RTSIG_OFFSET;
+		assert!(kick_signal <= SIGRTMAX());
+		// TODO: Remove the transmute once realtime signals are properly supported by nix
+		// https://github.com/nix-rust/nix/issues/495
+		unsafe { mem::transmute(kick_signal) }
+	}
+
+	fn register_handler() -> nix::Result<()> {
+		extern "C" fn handle_signal(_signal: libc::c_int) {}
+		// SAFETY: We don't use the `signal`'s return value.
+		unsafe {
+			signal(Self::get(), SigHandler::Handler(handle_signal))?;
+		}
+		Ok(())
+	}
+
+	/// Sends the kick signal to a thread.
+	///
+	/// [`KickSignal::register_handler`] should be called prior to this to avoid crashing the program with the default handler.
+	fn pthread_kill(pthread: Pthread) -> nix::Result<()> {
+		pthread_kill(pthread, Self::get())
+	}
+}
+
 impl Uhyve {
 	/// Runs the VM.
 	///
 	/// Blocks until the VM has finished execution.
 	pub fn run(mut self, cpu_affinity: Option<Vec<CoreId>>) -> i32 {
+		KickSignal::register_handler().unwrap();
+
 		unsafe {
 			self.load_kernel().expect("Unabled to load the kernel");
 		}
 
-		// For communication of the exit code from one vcpu to this thread as return
-		// value.
-		let (exit_tx, exit_rx) = mpsc::channel();
+		// After spinning up all vCPU threads, the main thread waits for any vCPU to end execution.
+		let barrier = Arc::new(Barrier::new(2));
 
 		let this = Arc::new(self);
+		let threads = (0..this.num_cpus())
+			.map(|cpu_id| {
+				let vm = this.clone();
+				let barrier = barrier.clone();
+				let local_cpu_affinity = cpu_affinity
+					.as_ref()
+					.map(|core_ids| core_ids.get(cpu_id as usize).copied())
+					.flatten();
+
+				thread::spawn(move || {
+					debug!("Create thread for CPU {}", cpu_id);
+					match local_cpu_affinity {
+						Some(core_id) => {
+							debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id);
+							core_affinity::set_for_current(core_id); // This does not return an error if it fails :(
+						}
+						None => debug!("No affinity specified, not binding thread"),
+					}
 
-		(0..this.num_cpus()).for_each(|cpu_id| {
-			let vm = this.clone();
-			let exit_tx = exit_tx.clone();
-
-			let local_cpu_affinity = match &cpu_affinity {
-				Some(vec) => vec.get(cpu_id as usize).cloned(),
-				None => None,
-			};
-
-			// create thread for each CPU
-			thread::spawn(move || {
-				debug!("Create thread for CPU {}", cpu_id);
-				match local_cpu_affinity {
-					Some(core_id) => {
-						debug!("Trying to pin thread {} to CPU {}", cpu_id, core_id.id);
-						core_affinity::set_for_current(core_id); // This does not return an error if it fails :(
+					let mut cpu = vm.create_cpu(cpu_id).unwrap();
+					cpu.init(vm.get_entry_point()).unwrap();
+
+					// only one core is able to enter startup code
+					// => the wait for the predecessor core
+					while cpu_id != vm.cpu_online() {
+						hint::spin_loop();
+					}
+
+					// jump into the VM and execute code of the guest
+					match cpu.run() {
+						Ok(code) => {
+							if code.is_some() {
+								// Let the main thread continue with kicking the other vCPUs
+								barrier.wait();
+							}
+							code
+						}
+						Err(err) => {
+							error!("CPU {} crashed with {:?}", cpu_id, err);
+							None
+						}
 					}
-					None => debug!("No affinity specified, not binding thread"),
-				}
-
-				let mut cpu = vm.create_cpu(cpu_id).unwrap();
-				cpu.init(vm.get_entry_point()).unwrap();
-
-				// only one core is able to enter startup code
-				// => the wait for the predecessor core
-				while cpu_id != vm.cpu_online() {
-					hint::spin_loop();
-				}
-
-				// jump into the VM and execute code of the guest
-				let result = cpu.run();
-				match result {
-					Ok(Some(exit_code)) => exit_tx.send(exit_code).unwrap(),
-					Ok(None) => {}
-					Err(err) => error!("CPU {} crashed with {:?}", cpu_id, err),
-				}
-			});
-		});
-
-		// This is a semi-bad design. We don't wait for the other cpu's threads to
-		// finish, but as soon as one cpu sends an exit code, we return it and
-		// ignore the remaining running threads. A better design would be to force
-		// the VCPUs externally to stop, so that the other threads don't block and
-		// can be terminated correctly.
-		exit_rx.recv().unwrap()
+				})
+			})
+			.collect::<Vec<_>>();
+
+		// Wait for one vCPU to return with an exit code.
+		barrier.wait();
+		for thread in &threads {
+			KickSignal::pthread_kill(thread.as_pthread_t()).unwrap();
+		}
+
+		let code = threads
+			.into_iter()
+			.filter_map(|thread| thread.join().unwrap())
+			.collect::<Vec<_>>();
+		assert_eq!(
+			1,
+			code.len(),
+			"more than one thread finished with an exit code"
+		);
+		code[0]
 	}
 }