From 3540534908db6f9d746109cb0e70a70a16a0cead Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:09:54 +0200 Subject: [PATCH 01/11] GuestMemory: Add lifetimes to try_access() With virtual memory, seemingly consecutive I/O virtual memory regions may actually be fragmented across multiple pages in our userspace mapping. Existing `descriptor_utils::Reader::new()` (and `Writer`) implementations (e.g. in virtiofsd or vm-virtio/virtio-queue) use `GuestMemory::get_slice()` to turn guest memory address ranges into valid slices in our address space; but with this fragmentation, it is easily possible that a range no longer corresponds to a single slice. To fix this, we can instead use `try_access()` to collect all slices, but to do so, its region argument needs to have the correct lifetime so we can collect the slices into a `Vec<_>` outside of the closure. Signed-off-by: Hanna Czenczek --- src/guest_memory.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 39e4f10a..371511e6 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -374,9 +374,9 @@ pub trait GuestMemory { /// - the error code returned by the callback 'f' /// - the size of the already handled data when encountering the first hole /// - the size of the already handled data when the whole range has been handled - fn try_access(&self, count: usize, addr: GuestAddress, mut f: F) -> Result + fn try_access<'a, F>(&'a self, count: usize, addr: GuestAddress, mut f: F) -> Result where - F: FnMut(usize, usize, MemoryRegionAddress, &Self::R) -> Result, + F: FnMut(usize, usize, MemoryRegionAddress, &'a Self::R) -> Result, { let mut cur = addr; let mut total = 0; From 9bcd5ac9b9ae37d1fb421f86f0aff310411933af Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:09:59 +0200 Subject: [PATCH 02/11] Bytes: Fix read() and write() read() and write() must not ignore the `count` parameter: The mappings passed into the `try_access()` closure are only valid for up to `count` bytes, not more. Signed-off-by: Hanna Czenczek --- src/guest_memory.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 371511e6..930dd5b0 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -464,8 +464,8 @@ impl Bytes for T { self.try_access( buf.len(), addr, - |offset, _count, caddr, region| -> Result { - region.write(&buf[offset..], caddr) + |offset, count, caddr, region| -> Result { + region.write(&buf[offset..(offset + count)], caddr) }, ) } @@ -474,8 +474,8 @@ impl Bytes for T { self.try_access( buf.len(), addr, - |offset, _count, caddr, region| -> Result { - region.read(&mut buf[offset..], caddr) + |offset, count, caddr, region| -> Result { + region.read(&mut buf[offset..(offset + count)], caddr) }, ) } From 2b83c72be656e5d46b83cb3a66d580e56cf33d5b Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Thu, 22 May 2025 13:15:44 +0200 Subject: [PATCH 03/11] Bytes: Do not use to_region_addr() When we switch to a (potentially) virtual memory model, we want to compact the interface, especially removing references to memory regions because virtual memory is not just split into regions, but pages first. The one memory-region-referencing part we are going to keep is `try_access()` because that method is nicely structured around the fragmentation we will have to accept when it comes to paged memory. `to_region_addr()` in contrast does not even take a length argument, so for virtual memory, using the returned region and address is unsafe if doing so crosses page boundaries. Therefore, switch `Bytes::load()` and `store()` from using `to_region_addr()` to `try_access()`. Signed-off-by: Hanna Czenczek --- src/guest_memory.rs | 62 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 930dd5b0..2e9da95d 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -44,6 +44,7 @@ use std::convert::From; use std::fs::File; use std::io; +use std::mem::size_of; use std::ops::{BitAnd, BitOr, Deref}; use std::rc::Rc; use std::sync::atomic::Ordering; @@ -591,17 +592,62 @@ impl Bytes for T { } fn store(&self, val: O, addr: GuestAddress, order: Ordering) -> Result<()> { - // `find_region` should really do what `to_region_addr` is doing right now, except - // it should keep returning a `Result`. - self.to_region_addr(addr) - .ok_or(Error::InvalidGuestAddress(addr)) - .and_then(|(region, region_addr)| region.store(val, region_addr, order)) + let expected = size_of::(); + + let completed = self.try_access( + expected, + addr, + |offset, len, region_addr, region| -> Result { + assert_eq!(offset, 0); + if len < expected { + return Err(Error::PartialBuffer { + expected, + completed: len, + }); + } + region.store(val, region_addr, order).map(|()| expected) + }, + )?; + + if completed < expected { + Err(Error::PartialBuffer { + expected, + completed, + }) + } else { + Ok(()) + } } fn load(&self, addr: GuestAddress, order: Ordering) -> Result { - self.to_region_addr(addr) - .ok_or(Error::InvalidGuestAddress(addr)) - .and_then(|(region, region_addr)| region.load(region_addr, order)) + let expected = size_of::(); + let mut result = None::; + + let completed = self.try_access( + expected, + addr, + |offset, len, region_addr, region| -> Result { + assert_eq!(offset, 0); + if len < expected { + return Err(Error::PartialBuffer { + expected, + completed: len, + }); + } + result = Some(region.load(region_addr, order)?); + Ok(expected) + }, + )?; + + if completed < expected { + Err(Error::PartialBuffer { + expected, + completed, + }) + } else { + // Must be set because `completed == expected` + Ok(result.unwrap()) + } } } From 5b0e1ae7bb43d327f32be198fa0a2038f66f1c59 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:01 +0200 Subject: [PATCH 04/11] Add IoMemory trait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing `GuestMemory` trait is insufficient for representing virtual memory, as it does not allow specifying the required access permissions. Its focus on all guest memory implementations consisting of a relatively small number of regions is also unsuited for paged virtual memory with a potentially very lage set of non-continuous mappings. The new `IoMemory` trait in contrast provides only a small number of methods that keep the implementing type’s internal structure more opaque, and every access needs to be accompanied by the required permissions. Signed-off-by: Hanna Czenczek --- src/io_memory.rs | 183 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + 2 files changed, 186 insertions(+) create mode 100644 src/io_memory.rs diff --git a/src/io_memory.rs b/src/io_memory.rs new file mode 100644 index 00000000..e437ee5b --- /dev/null +++ b/src/io_memory.rs @@ -0,0 +1,183 @@ +// Copyright (C) 2025 Red Hat. All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Provides a trait for virtual I/O memory. +//! +//! This trait is more stripped down than `GuestMemory` because the fragmented nature of virtual +//! memory does not allow a direct translation to long continuous regions. +//! +//! In addition, any access to virtual memory must be annotated with the intended access mode (i.e. +//! reading and/or writing). + +use crate::guest_memory::Result; +use crate::{bitmap, GuestAddress, GuestMemory, MemoryRegionAddress, VolatileSlice}; + +/// Permissions for accessing virtual memory. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Permissions { + /// No permissions + No, + /// Read-only + Read, + /// Write-only + Write, + /// Allow both reading and writing + ReadWrite, +} + +impl Permissions { + /// Check whether the permissions `self` allow the given `access`. + pub fn allow(&self, access: Self) -> bool { + *self & access == access + } +} + +impl std::ops::BitOr for Permissions { + type Output = Permissions; + + /// Return the union of `self` and `rhs`. + fn bitor(self, rhs: Permissions) -> Self::Output { + use Permissions::*; + + match (self, rhs) { + (No, rhs) => rhs, + (lhs, No) => lhs, + (ReadWrite, _) | (_, ReadWrite) => ReadWrite, + (Read, Read) => Read, + (Read, Write) | (Write, Read) => ReadWrite, + (Write, Write) => Write, + } + } +} + +impl std::ops::BitAnd for Permissions { + type Output = Permissions; + + /// Return the intersection of `self` and `rhs`. + fn bitand(self, rhs: Permissions) -> Self::Output { + use Permissions::*; + + match (self, rhs) { + (No, _) | (_, No) => No, + (ReadWrite, rhs) => rhs, + (lhs, ReadWrite) => lhs, + (Read, Read) => Read, + (Read, Write) | (Write, Read) => No, + (Write, Write) => Write, + } + } +} + +/// Represents virtual I/O memory. +/// +/// `IoMemory` is generally backed by some “physical” `GuestMemory`, which then consists for +/// `GuestMemoryRegion` objects. However, the mapping from I/O virtual addresses (IOVAs) to +/// physical addresses may be arbitrarily fragmented. Translation is done via an IOMMU. +/// +/// Note in contrast to `GuestMemory`: +/// - Any IOVA range may consist of arbitrarily many underlying ranges in physical memory. +/// - Accessing an IOVA requires passing the intended access mode, and the IOMMU will check whether +/// the given access mode is permitted for the given IOVA. +/// - The translation result for a given IOVA may change over time (i.e. the physical address +/// associated with an IOVA may change). +pub trait IoMemory { + /// Underlying `GuestMemory` type. + type PhysicalMemory: GuestMemory; + + /// Return `true` if `addr..(addr + count)` is accessible with `access`. + fn range_accessible(&self, addr: GuestAddress, count: usize, access: Permissions) -> bool; + + /// Invokes callback `f` to handle data in the address range `[addr, addr + count)`, with + /// permissions `access`. + /// + /// The address range `[addr, addr + count)` may span more than one + /// [`GuestMemoryRegion`](trait.GuestMemoryRegion.html) object, or even have holes in it. + /// So [`try_access()`](trait.IoMemory.html#method.try_access) invokes the callback 'f' + /// for each [`GuestMemoryRegion`](trait.GuestMemoryRegion.html) object involved and returns: + /// - the error code returned by the callback 'f' + /// - the size of the already handled data when encountering the first hole + /// - the size of the already handled data when the whole range has been handled + fn try_access<'a, F>( + &'a self, + count: usize, + addr: GuestAddress, + access: Permissions, + f: F, + ) -> Result + where + F: FnMut( + usize, + usize, + MemoryRegionAddress, + &'a ::R, + ) -> Result; + + /// Returns a [`VolatileSlice`](struct.VolatileSlice.html) of `count` bytes starting at + /// `addr`. + /// + /// Note that because of the fragmented nature of virtual memory, it can easily happen that the + /// range `[addr, addr + count)` is not backed by a continuous region in our own virtual + /// memory, which will make generating the slice impossible. + fn get_slice( + &self, + addr: GuestAddress, + count: usize, + access: Permissions, + ) -> Result>>; + + /// If this virtual memory is just a plain `GuestMemory` object underneath without an IOMMU + /// translation layer in between, return that `GuestMemory` object. + fn physical_memory(&self) -> Option<&Self::PhysicalMemory> { + None + } +} + +impl IoMemory for M { + type PhysicalMemory = M; + + fn range_accessible(&self, addr: GuestAddress, count: usize, _access: Permissions) -> bool { + if count <= 1 { + ::address_in_range(self, addr) + } else if let Some(end) = addr.0.checked_add(count as u64 - 1) { + ::address_in_range(self, addr) + && ::address_in_range(self, GuestAddress(end)) + } else { + false + } + } + + fn try_access<'a, F>( + &'a self, + count: usize, + addr: GuestAddress, + _access: Permissions, + f: F, + ) -> Result + where + F: FnMut( + usize, + usize, + MemoryRegionAddress, + &'a ::R, + ) -> Result, + { + ::try_access(self, count, addr, f) + } + + fn get_slice( + &self, + addr: GuestAddress, + count: usize, + _access: Permissions, + ) -> Result>> { + ::get_slice(self, addr, count) + } + + fn physical_memory(&self) -> Option<&Self::PhysicalMemory> { + Some(self) + } +} diff --git a/src/lib.rs b/src/lib.rs index 2f87f4c8..679ae7da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,9 @@ pub use region::{ pub mod io; pub use io::{ReadVolatile, WriteVolatile}; +pub mod io_memory; +pub use io_memory::{IoMemory, Permissions}; + #[cfg(feature = "backend-mmap")] pub mod mmap; From 45985f478ae6958fd333f9487d47aa7f8c250bd2 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:04 +0200 Subject: [PATCH 05/11] Switch to IoMemory as the primary memory type Rust only allows us to give one trait the blanket implementations for `Bytes` and `GuestAddressSpace`. We want `IoMemory` to be our primary external interface becaue it has users specify the access permissions they need, and because we can (and do) provide a blanket `IoMemory` implementation for all `GuestMemory` types. Therefore, replace requirements of `GuestMemory` by `IoMemory` instead. Signed-off-by: Hanna Czenczek --- src/atomic.rs | 32 ++++++++++++++++---------------- src/guest_memory.rs | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/src/atomic.rs b/src/atomic.rs index 22697d05..4cebbcd8 100644 --- a/src/atomic.rs +++ b/src/atomic.rs @@ -2,7 +2,7 @@ // Copyright (C) 2020 Red Hat, Inc. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! A wrapper over an `ArcSwap` struct to support RCU-style mutability. +//! A wrapper over an `ArcSwap` struct to support RCU-style mutability. //! //! With the `backend-atomic` feature enabled, simply replacing `GuestMemoryMmap` //! with `GuestMemoryAtomic` will enable support for mutable memory maps. @@ -15,17 +15,17 @@ use arc_swap::{ArcSwap, Guard}; use std::ops::Deref; use std::sync::{Arc, LockResult, Mutex, MutexGuard, PoisonError}; -use crate::{GuestAddressSpace, GuestMemory}; +use crate::{GuestAddressSpace, IoMemory}; /// A fast implementation of a mutable collection of memory regions. /// /// This implementation uses `ArcSwap` to provide RCU-like snapshotting of the memory map: -/// every update of the memory map creates a completely new `GuestMemory` object, and +/// every update of the memory map creates a completely new `IoMemory` object, and /// readers will not be blocked because the copies they retrieved will be collected once /// no one can access them anymore. Under the assumption that updates to the memory map /// are rare, this allows a very efficient implementation of the `memory()` method. #[derive(Clone, Debug)] -pub struct GuestMemoryAtomic { +pub struct GuestMemoryAtomic { // GuestAddressSpace, which we want to implement, is basically a drop-in // replacement for &M. Therefore, we need to pass to devices the `GuestMemoryAtomic` // rather than a reference to it. To obtain this effect we wrap the actual fields @@ -34,9 +34,9 @@ pub struct GuestMemoryAtomic { inner: Arc<(ArcSwap, Mutex<()>)>, } -impl From> for GuestMemoryAtomic { +impl From> for GuestMemoryAtomic { /// create a new `GuestMemoryAtomic` object whose initial contents come from - /// the `map` reference counted `GuestMemory`. + /// the `map` reference counted `IoMemory`. fn from(map: Arc) -> Self { let inner = (ArcSwap::new(map), Mutex::new(())); GuestMemoryAtomic { @@ -45,9 +45,9 @@ impl From> for GuestMemoryAtomic { } } -impl GuestMemoryAtomic { +impl GuestMemoryAtomic { /// create a new `GuestMemoryAtomic` object whose initial contents come from - /// the `map` `GuestMemory`. + /// the `map` `IoMemory`. pub fn new(map: M) -> Self { Arc::new(map).into() } @@ -75,7 +75,7 @@ impl GuestMemoryAtomic { } } -impl GuestAddressSpace for GuestMemoryAtomic { +impl GuestAddressSpace for GuestMemoryAtomic { type T = GuestMemoryLoadGuard; type M = M; @@ -86,14 +86,14 @@ impl GuestAddressSpace for GuestMemoryAtomic { /// A guard that provides temporary access to a `GuestMemoryAtomic`. This /// object is returned from the `memory()` method. It dereference to -/// a snapshot of the `GuestMemory`, so it can be used transparently to +/// a snapshot of the `IoMemory`, so it can be used transparently to /// access memory. #[derive(Debug)] -pub struct GuestMemoryLoadGuard { +pub struct GuestMemoryLoadGuard { guard: Guard>, } -impl GuestMemoryLoadGuard { +impl GuestMemoryLoadGuard { /// Make a clone of the held pointer and returns it. This is more /// expensive than just using the snapshot, but it allows to hold on /// to the snapshot outside the scope of the guard. It also allows @@ -104,7 +104,7 @@ impl GuestMemoryLoadGuard { } } -impl Clone for GuestMemoryLoadGuard { +impl Clone for GuestMemoryLoadGuard { fn clone(&self) -> Self { GuestMemoryLoadGuard { guard: Guard::from_inner(Arc::clone(&*self.guard)), @@ -112,7 +112,7 @@ impl Clone for GuestMemoryLoadGuard { } } -impl Deref for GuestMemoryLoadGuard { +impl Deref for GuestMemoryLoadGuard { type Target = M; fn deref(&self) -> &Self::Target { @@ -125,12 +125,12 @@ impl Deref for GuestMemoryLoadGuard { /// possibly after updating the memory map represented by the /// `GuestMemoryAtomic` that created the guard. #[derive(Debug)] -pub struct GuestMemoryExclusiveGuard<'a, M: GuestMemory> { +pub struct GuestMemoryExclusiveGuard<'a, M: IoMemory> { parent: &'a GuestMemoryAtomic, _guard: MutexGuard<'a, ()>, } -impl GuestMemoryExclusiveGuard<'_, M> { +impl GuestMemoryExclusiveGuard<'_, M> { /// Replace the memory map in the `GuestMemoryAtomic` that created the guard /// with the new memory map, `map`. The lock is then dropped since this /// method consumes the guard. diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 2e9da95d..0b71114e 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -55,7 +55,7 @@ use crate::bitmap::MS; use crate::bytes::{AtomicAccess, Bytes}; use crate::io::{ReadVolatile, WriteVolatile}; use crate::volatile_memory::{self, VolatileSlice}; -use crate::GuestMemoryRegion; +use crate::{GuestMemoryRegion, IoMemory, Permissions}; /// Errors associated with handling guest memory accesses. #[allow(missing_docs)] @@ -222,7 +222,7 @@ impl FileOffset { /// ``` pub trait GuestAddressSpace { /// The type that will be used to access guest memory. - type M: GuestMemory; + type M: IoMemory; /// A type that provides access to the memory. type T: Clone + Deref; @@ -233,7 +233,7 @@ pub trait GuestAddressSpace { fn memory(&self) -> Self::T; } -impl GuestAddressSpace for &M { +impl GuestAddressSpace for &M { type M = M; type T = Self; @@ -242,7 +242,7 @@ impl GuestAddressSpace for &M { } } -impl GuestAddressSpace for Rc { +impl GuestAddressSpace for Rc { type M = M; type T = Self; @@ -251,7 +251,7 @@ impl GuestAddressSpace for Rc { } } -impl GuestAddressSpace for Arc { +impl GuestAddressSpace for Arc { type M = M; type T = Self; @@ -458,13 +458,14 @@ pub trait GuestMemory { } } -impl Bytes for T { +impl Bytes for T { type E = Error; fn write(&self, buf: &[u8], addr: GuestAddress) -> Result { self.try_access( buf.len(), addr, + Permissions::Write, |offset, count, caddr, region| -> Result { region.write(&buf[offset..(offset + count)], caddr) }, @@ -475,6 +476,7 @@ impl Bytes for T { self.try_access( buf.len(), addr, + Permissions::Read, |offset, count, caddr, region| -> Result { region.read(&mut buf[offset..(offset + count)], caddr) }, @@ -542,9 +544,12 @@ impl Bytes for T { where F: ReadVolatile, { - self.try_access(count, addr, |_, len, caddr, region| -> Result { - region.read_volatile_from(caddr, src, len) - }) + self.try_access( + count, + addr, + Permissions::Write, + |_, len, caddr, region| -> Result { region.read_volatile_from(caddr, src, len) }, + ) } fn read_exact_volatile_from( @@ -570,11 +575,16 @@ impl Bytes for T { where F: WriteVolatile, { - self.try_access(count, addr, |_, len, caddr, region| -> Result { - // For a non-RAM region, reading could have side effects, so we - // must use write_all(). - region.write_all_volatile_to(caddr, dst, len).map(|()| len) - }) + self.try_access( + count, + addr, + Permissions::Read, + |_, len, caddr, region| -> Result { + // For a non-RAM region, reading could have side effects, so we + // must use write_all(). + region.write_all_volatile_to(caddr, dst, len).map(|()| len) + }, + ) } fn write_all_volatile_to(&self, addr: GuestAddress, dst: &mut F, count: usize) -> Result<()> @@ -597,6 +607,7 @@ impl Bytes for T { let completed = self.try_access( expected, addr, + Permissions::Write, |offset, len, region_addr, region| -> Result { assert_eq!(offset, 0); if len < expected { @@ -626,6 +637,7 @@ impl Bytes for T { let completed = self.try_access( expected, addr, + Permissions::Read, |offset, len, region_addr, region| -> Result { assert_eq!(offset, 0); if len < expected { From 254db08d308b48e286e8285a6b6c8b8e80510575 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:07 +0200 Subject: [PATCH 06/11] Add Iommu trait and Iotlb struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Iommu trait defines an interface for translating virtual addresses into addresses in an underlying address space. It is supposed to do so by internally keeping an instance of the Iotlb type, updating it with mappings whenever necessary (e.g. when actively invalidated or when there’s an access failure) from some internal data source (e.g. for a vhost-user IOMMU, the data comes from the vhost-user front-end by requesting an update). In a later commit, we are going to provide an implementation of `IoMemory` that can use an `Iommu` to provide an I/O virtual address space. Note that while I/O virtual memory in practice will be organized in pages, the vhost-user specification makes no mention of a specific page size or how to obtain it. Therefore, we cannot really assume any page size and have to use plain ranges with byte granularity as mappings instead. Signed-off-by: Hanna Czenczek --- Cargo.toml | 2 + src/atomic.rs | 7 +- src/guest_memory.rs | 6 + src/iommu.rs | 332 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 + 5 files changed, 349 insertions(+), 3 deletions(-) create mode 100644 src/iommu.rs diff --git a/Cargo.toml b/Cargo.toml index 2a60e94e..cee70507 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ default = ["rawfd"] backend-bitmap = ["dep:libc"] backend-mmap = ["dep:libc"] backend-atomic = ["arc-swap"] +iommu = ["dep:rangemap"] rawfd = ["dep:libc"] xen = ["backend-mmap", "bitflags", "vmm-sys-util"] @@ -23,6 +24,7 @@ xen = ["backend-mmap", "bitflags", "vmm-sys-util"] libc = { version = "0.2.39", optional = true } arc-swap = { version = "1.0.0", optional = true } bitflags = { version = "2.4.0", optional = true } +rangemap = { version = "1.5.1", optional = true } thiserror = "1.0.40" vmm-sys-util = { version = ">=0.12.1,<=0.14.0", optional = true } diff --git a/src/atomic.rs b/src/atomic.rs index 4cebbcd8..e3287ad2 100644 --- a/src/atomic.rs +++ b/src/atomic.rs @@ -143,7 +143,7 @@ impl GuestMemoryExclusiveGuard<'_, M> { mod tests { use super::*; use crate::region::tests::{new_guest_memory_collection_from_regions, Collection, MockRegion}; - use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize}; + use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, IoMemory}; type GuestMemoryMmapAtomic = GuestMemoryAtomic; @@ -157,7 +157,8 @@ mod tests { let mut iterated_regions = Vec::new(); let gmm = new_guest_memory_collection_from_regions(®ions).unwrap(); let gm = GuestMemoryMmapAtomic::new(gmm); - let mem = gm.memory(); + let vmem = gm.memory(); + let mem = vmem.physical_memory().unwrap(); for region in mem.iter() { assert_eq!(region.len(), region_size as GuestUsize); @@ -176,7 +177,7 @@ mod tests { .map(|x| (x.0, x.1)) .eq(iterated_regions.iter().copied())); - let mem2 = mem.into_inner(); + let mem2 = vmem.into_inner(); for region in mem2.iter() { assert_eq!(region.len(), region_size as GuestUsize); } diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 0b71114e..4cb7f652 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -54,6 +54,8 @@ use crate::address::{Address, AddressValue}; use crate::bitmap::MS; use crate::bytes::{AtomicAccess, Bytes}; use crate::io::{ReadVolatile, WriteVolatile}; +#[cfg(feature = "iommu")] +use crate::iommu::Error as IommuError; use crate::volatile_memory::{self, VolatileSlice}; use crate::{GuestMemoryRegion, IoMemory, Permissions}; @@ -84,6 +86,10 @@ pub enum Error { /// The address to be read by `try_access` is outside the address range. #[error("The address to be read by `try_access` is outside the address range")] GuestAddressOverflow, + #[cfg(feature = "iommu")] + /// IOMMU translation error + #[error("IOMMU failed to translate guest address: {0}")] + IommuError(IommuError), } impl From for Error { diff --git a/src/iommu.rs b/src/iommu.rs new file mode 100644 index 00000000..cc16ea33 --- /dev/null +++ b/src/iommu.rs @@ -0,0 +1,332 @@ +// Copyright (C) 2025 Red Hat. All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Provide an interface for IOMMUs enabling I/O virtual address (IOVA) translation. +//! +//! All IOMMUs consist of an IOTLB ([`Iotlb`]), which is backed by a data source that can deliver +//! all mappings. For example, for vhost-user, that data source is the vhost-user front-end; i.e. +//! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies +//! the desired mapping. + +use crate::{GuestAddress, Permissions}; +use rangemap::RangeMap; +use std::cmp; +use std::fmt::Debug; +use std::num::Wrapping; +use std::ops::{Deref, Range}; + +/// Errors associated with IOMMU address translation. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Lookup cannot be resolved. + #[error( + "Cannot translate I/O virtual address range {:#x}+{}: {reason}", + iova_range.base.0, + iova_range.length, + )] + CannotResolve { + /// IOVA range that could not be resolved + iova_range: IovaRange, + /// Some human-readable specifics about the reason + reason: String, + }, + + /// Wanted to translate an IOVA range into a single slice, but the range is fragmented. + #[error( + "Expected {:#x}+{} to be a continuous I/O virtual address range, but only {continuous_length} bytes are", + iova_range.base.0, + iova_range.length, + )] + Fragmented { + /// Full IOVA range that was to be translated + iova_range: IovaRange, + /// Length of the continuous head (i.e. the first fragment) + continuous_length: usize, + }, + + /// IOMMU is not configured correctly, and so cannot translate addresses. + #[error("IOMMU not configured correctly, cannot operate: {reason}")] + IommuMisconfigured { + /// Some human-readable specifics about the misconfiguration + reason: String, + }, +} + +/// An IOMMU, allowing translation of I/O virtual addresses (IOVAs). +/// +/// Generally, `Iommu` implementaions consist of an [`Iotlb`], which is supposed to be consulted +/// first for lookup requests. All misses and access failures then should be resolved by looking +/// up the affected ranges in the actual IOMMU (which has all current mappings) and putting the +/// results back into the IOTLB. A subsequent lookup in the IOTLB should result in a full +/// translation, which can then be returned. +pub trait Iommu: Debug + Send + Sync { + /// `Deref` type associated with the type that internally wraps the `Iotlb`. + /// + /// For example, the `Iommu` may keep the `Iotlb` wrapped in an `RwLock`, making this type + /// `RwLockReadGuard<'a, Iotlb>`. + /// + /// We need this specific type instead of a plain reference so that [`IotlbIterator`] can + /// actually own the reference and prolong its lifetime. + type IotlbGuard<'a>: Deref + 'a + where + Self: 'a; + + /// Translate the given range for the given access into the underlying address space. + /// + /// Any translation request is supposed to be fully served by an internal [`Iotlb`] instance. + /// Any misses or access failures should result in a lookup in the full IOMMU structures, + /// filling the IOTLB with the results, and then repeating the lookup in there. + fn translate( + &self, + iova: GuestAddress, + length: usize, + access: Permissions, + ) -> Result>, Error>; +} + +/// Mapping target in an IOMMU/IOTLB. +/// +/// This is the data to which each entry in an IOMMU/IOTLB maps. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct IommuMapping { + /// Difference between the mapped and the IOVA address, i.e. what to add to an IOVA address to + /// get the mapped adrress. + /// + /// We cannot store the more obvious mapped base address for this range because that would + /// allow rangemap to wrongfully merge consecutive map entries if they are a duplicate mapping + /// (which does happen). Storing the difference ensures that entries are only merged when they + /// are indeed consecutive. + /// + /// Note that we make no granularity restrictions (i.e. do not operate on a unit like pages), + /// so the source and target address may have arbitrary alignment. That is why both fields + /// here need to be separate and we cannot merge the two bits that are `permissions` with this + /// base address into a single `u64` field. + target_source_diff: Wrapping, + /// Allowed access for the mapped range + permissions: Permissions, +} + +/// Provides an IOTLB. +/// +/// The IOTLB caches IOMMU mappings. It must be preemptively updated whenever mappings are +/// restricted or removed; in contrast, adding mappings or making them more permissive does not +/// require preemptive updates, as subsequent accesses that violate the previous (more restrictive) +/// permissions will trigger TLB misses or access failures, which is then supposed to result in an +/// update from the outer [`Iommu`] object that performs the translation. +#[derive(Debug, Default)] +pub struct Iotlb { + /// Mappings of which we know. + /// + /// Note that the vhost(-user) specification makes no mention of a specific page size, even + /// though in practice the IOVA address space will be organized in terms of pages. However, we + /// cannot really rely on that (or any specific page size; it could be 4k, the guest page size, + /// or the host page size), so we need to be able to handle continuous ranges of any + /// granularity. + tlb: RangeMap, +} + +/// Iterates over a range of valid IOTLB mappings that together constitute a continuous range in +/// I/O virtual address space. +/// +/// Returned by [`Iotlb::lookup()`] and [`Iommu::translate()`] in case translation was successful +/// (i.e. the whole requested range is mapped and permits the given access). +#[derive(Clone, Debug)] +pub struct IotlbIterator> { + /// IOTLB that provides these mapings + iotlb: D, + /// I/O virtual address range left to iterate over + range: Range, + /// Requested access permissions + access: Permissions, +} + +/// Representation of an IOVA memory range (i.e. in the I/O virtual address space). +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct IovaRange { + /// IOVA base address + pub base: GuestAddress, + /// Length (in bytes) of this range + pub length: usize, +} + +/// Representation of a mapped memory range in the underlying address space. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MappedRange { + /// Base address in the underlying address space + pub base: GuestAddress, + /// Length (in bytes) of this mapping + pub length: usize, +} + +/// Lists the subranges in I/O virtual address space that turned out to not be accessible when +/// trying to access an IOVA range. +#[derive(Clone, Debug)] +pub struct IotlbFails { + /// Subranges not mapped at all + pub misses: Vec, + /// Subranges that are mapped, but do not allow the requested access mode + pub access_fails: Vec, +} + +impl IommuMapping { + /// Create a new mapping. + fn new(source_base: u64, target_base: u64, permissions: Permissions) -> Self { + IommuMapping { + target_source_diff: Wrapping(target_base) - Wrapping(source_base), + permissions, + } + } + + /// Map the given source address (IOVA) to its corresponding target address. + fn map(&self, iova: u64) -> u64 { + (Wrapping(iova) + self.target_source_diff).0 + } + + /// Return the permissions for this mapping. + fn permissions(&self) -> Permissions { + self.permissions + } +} + +impl Iotlb { + /// Create a new empty instance. + pub fn new() -> Self { + Default::default() + } + + /// Change the mapping of the given IOVA range. + pub fn set_mapping( + &mut self, + iova: GuestAddress, + map_to: GuestAddress, + length: usize, + perm: Permissions, + ) -> Result<(), Error> { + // Soft TODO: We may want to evict old entries here once the TLB grows to a certain size, + // but that will require LRU book-keeping. However, this is left for the future, because: + // - this TLB is not implemented in hardware, so we do not really have strong entry count + // constraints, and + // - it seems like at least Linux guests invalidate mappings often, automatically limiting + // our entry count. + + let mapping = IommuMapping::new(iova.0, map_to.0, perm); + self.tlb.insert(iova.0..(iova.0 + length as u64), mapping); + + Ok(()) + } + + /// Remove any mapping in the given IOVA range. + pub fn invalidate_mapping(&mut self, iova: GuestAddress, length: usize) { + self.tlb.remove(iova.0..(iova.0 + length as u64)); + } + + /// Remove all mappings. + pub fn invalidate_all(&mut self) { + self.tlb.clear(); + } + + /// Perform a lookup for the given range and the given `access` mode. + /// + /// If the whole range is mapped and accessible, return an iterator over all mappings. + /// + /// If any part of the range is not mapped or does not permit the given access mode, return an + /// `Err(_)` that contains a list of all such subranges. + pub fn lookup>( + this: D, + iova: GuestAddress, + length: usize, + access: Permissions, + ) -> Result, IotlbFails> { + let full_range = iova.0..(iova.0 + length as u64); + + let has_misses = this.tlb.gaps(&full_range).any(|_| true); + let has_access_fails = this + .tlb + .overlapping(full_range.clone()) + .any(|(_, mapping)| !mapping.permissions().allow(access)); + + if has_misses || has_access_fails { + let misses = this + .tlb + .gaps(&full_range) + .map(|range| { + // Gaps are always cut down to the range given to `gaps()` + debug_assert!(range.start >= full_range.start && range.end <= full_range.end); + range.try_into().unwrap() + }) + .collect::>(); + + let access_fails = this + .tlb + .overlapping(full_range.clone()) + .filter(|(_, mapping)| !mapping.permissions().allow(access)) + .map(|(range, _)| { + let start = cmp::max(range.start, full_range.start); + let end = cmp::min(range.end, full_range.end); + (start..end).try_into().unwrap() + }) + .collect::>(); + + return Err(IotlbFails { + misses, + access_fails, + }); + } + + Ok(IotlbIterator { + iotlb: this, + range: full_range, + access, + }) + } +} + +impl> Iterator for IotlbIterator { + /// Addresses in the underlying address space + type Item = MappedRange; + + fn next(&mut self) -> Option { + // Note that we can expect the whole IOVA range to be mapped with the right access flags. + // The `IotlbIterator` is created by `Iotlb::lookup()` only if the whole range is mapped + // accessibly; we have a permanent reference to `Iotlb`, so the range cannot be invalidated + // in the meantime. + // Another note: It is tempting to have `IotlbIterator` wrap around the + // `rangemap::Overlapping` iterator, but that just takes a (lifetimed) reference to the + // map, not an owned reference (like RwLockReadGuard), which we want to use; so using that + // would probably require self-referential structs. + + if self.range.is_empty() { + return None; + } + + let (range, mapping) = self.iotlb.tlb.get_key_value(&self.range.start).unwrap(); + + assert!(mapping.permissions().allow(self.access)); + + let mapping_iova_start = self.range.start; + let mapping_iova_end = cmp::min(self.range.end, range.end); + let mapping_len = mapping_iova_end - mapping_iova_start; + + self.range.start = mapping_iova_end; + + Some(MappedRange { + base: GuestAddress(mapping.map(mapping_iova_start)), + length: mapping_len.try_into().unwrap(), + }) + } +} + +impl TryFrom> for IovaRange { + type Error = >::Error; + + fn try_from(range: Range) -> Result { + Ok(IovaRange { + base: GuestAddress(range.start), + length: (range.end - range.start).try_into()?, + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index 679ae7da..327cfcf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,6 +61,11 @@ pub use io::{ReadVolatile, WriteVolatile}; pub mod io_memory; pub use io_memory::{IoMemory, Permissions}; +#[cfg(feature = "iommu")] +pub mod iommu; +#[cfg(feature = "iommu")] +pub use iommu::{Iommu, Iotlb}; + #[cfg(feature = "backend-mmap")] pub mod mmap; From ccb8b63201afdfc3a70cceb066c66fe3638d4078 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:10 +0200 Subject: [PATCH 07/11] Add IommuMemory This `IoMemory` type provides an I/O virtual address space by adding an IOMMU translation layer to an underlying `GuestMemory` object. Signed-off-by: Hanna Czenczek --- src/iommu.rs | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 2 +- 2 files changed, 185 insertions(+), 2 deletions(-) diff --git a/src/iommu.rs b/src/iommu.rs index cc16ea33..035f9546 100644 --- a/src/iommu.rs +++ b/src/iommu.rs @@ -12,12 +12,16 @@ //! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies //! the desired mapping. -use crate::{GuestAddress, Permissions}; +use crate::guest_memory::{Error as GuestMemoryError, Result as GuestMemoryResult}; +use crate::{ + bitmap, GuestAddress, GuestMemory, IoMemory, MemoryRegionAddress, Permissions, VolatileSlice, +}; use rangemap::RangeMap; use std::cmp; use std::fmt::Debug; use std::num::Wrapping; use std::ops::{Deref, Range}; +use std::sync::Arc; /// Errors associated with IOMMU address translation. #[derive(Debug, thiserror::Error)] @@ -172,6 +176,22 @@ pub struct IotlbFails { pub access_fails: Vec, } +/// [`IoMemory`] type that consists of an underlying [`GuestMemory`] object plus an [`Iommu`]. +/// +/// The underlying [`GuestMemory`] is basically the physical memory, and the [`Iommu`] translates +/// the I/O virtual address space that `IommuMemory` provides into that underlying physical address +/// space. +#[derive(Debug, Default)] +pub struct IommuMemory { + /// Physical memory + inner: M, + /// IOMMU to translate IOVAs into physical addresses + iommu: Arc, + /// Whether the IOMMU is even to be used or not; disabling it makes this a pass-through to + /// `inner`. + use_iommu: bool, +} + impl IommuMapping { /// Create a new mapping. fn new(source_base: u64, target_base: u64, permissions: Permissions) -> Self { @@ -330,3 +350,166 @@ impl TryFrom> for IovaRange { }) } } + +impl IommuMemory { + /// Create a new `IommuMemory` instance. + pub fn new(inner: M, iommu: Arc, use_iommu: bool) -> Self { + IommuMemory { + inner, + iommu, + use_iommu, + } + } + + /// Create a new version of `self` with the underlying physical memory replaced. + /// + /// Note that the inner `Arc` reference to the IOMMU is cloned, i.e. both the existing and the + /// new `IommuMemory` object will share an IOMMU instance. (The `use_iommu` flag however is + /// copied, so is independent between the two instances.) + pub fn inner_replaced(&self, inner: M) -> Self { + IommuMemory { + inner, + iommu: Arc::clone(&self.iommu), + use_iommu: self.use_iommu, + } + } + + /// Enable or disable the IOMMU. + /// + /// Disabling the IOMMU switches to pass-through mode, where every access is done directly on + /// the underlying physical memory. + pub fn set_iommu_enabled(&mut self, enabled: bool) { + self.use_iommu = enabled; + } + + /// Return a reference to the IOMMU. + pub fn iommu(&self) -> &Arc { + &self.iommu + } + + /// Return a reference to the inner physical memory object. + pub fn inner(&self) -> &M { + &self.inner + } +} + +impl Clone for IommuMemory { + fn clone(&self) -> Self { + IommuMemory { + inner: self.inner.clone(), + iommu: Arc::clone(&self.iommu), + use_iommu: self.use_iommu, + } + } +} + +impl IoMemory for IommuMemory { + type PhysicalMemory = M; + + fn range_accessible(&self, addr: GuestAddress, count: usize, access: Permissions) -> bool { + if !self.use_iommu { + return self.inner.range_accessible(addr, count, access); + } + + let Ok(mut translated_iter) = self.iommu.translate(addr, count, access) else { + return false; + }; + + translated_iter.all(|translated| { + self.inner + .range_accessible(translated.base, translated.length, access) + }) + } + + fn try_access<'a, F>( + &'a self, + count: usize, + addr: GuestAddress, + access: Permissions, + mut f: F, + ) -> GuestMemoryResult + where + F: FnMut( + usize, + usize, + MemoryRegionAddress, + &'a ::R, + ) -> GuestMemoryResult, + { + if !self.use_iommu { + return self.inner.try_access(count, addr, f); + } + + let translated = self + .iommu + .translate(addr, count, access) + .map_err(GuestMemoryError::IommuError)?; + + let mut total = 0; + for mapping in translated { + let handled = self.inner.try_access( + mapping.length, + mapping.base, + |inner_offset, count, in_region_addr, region| { + f(total + inner_offset, count, in_region_addr, region) + }, + )?; + + if handled == 0 { + break; + } else if handled > count { + return Err(GuestMemoryError::CallbackOutOfRange); + } + + total += handled; + // `GuestMemory::try_access()` only returns a short count when no more data needs to be + // processed, so we can stop here + if handled < mapping.length { + break; + } + } + + Ok(total) + } + + fn get_slice( + &self, + addr: GuestAddress, + count: usize, + access: Permissions, + ) -> GuestMemoryResult>> { + if !self.use_iommu { + return self.inner.get_slice(addr, count); + } + + // Ensure `count` is at least 1 so we can translate something + let adj_count = cmp::max(count, 1); + + let mut translated = self + .iommu + .translate(addr, adj_count, access) + .map_err(GuestMemoryError::IommuError)?; + + let mapping = translated.next().unwrap(); + if translated.next().is_some() { + return Err(GuestMemoryError::IommuError(Error::Fragmented { + iova_range: IovaRange { + base: addr, + length: count, + }, + continuous_length: mapping.length, + })); + } + + assert!(mapping.length == count || (count == 0 && mapping.length == 1)); + self.inner.get_slice(mapping.base, count) + } + + fn physical_memory(&self) -> Option<&Self::PhysicalMemory> { + if self.use_iommu { + None + } else { + Some(&self.inner) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 327cfcf8..09314c4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,7 +64,7 @@ pub use io_memory::{IoMemory, Permissions}; #[cfg(feature = "iommu")] pub mod iommu; #[cfg(feature = "iommu")] -pub use iommu::{Iommu, Iotlb}; +pub use iommu::{Iommu, IommuMemory, Iotlb}; #[cfg(feature = "backend-mmap")] pub mod mmap; From 460f4dae5140b27dc911472160f2698d43903c86 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Tue, 13 May 2025 12:01:41 +0200 Subject: [PATCH 08/11] mmap: Wrap MmapRegion in Arc<> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vhost-user-backend crate will need to be able to modify all existing memory regions to use the VMM user address instead of the guest physical address once the IOMMU feature is switched on, and vice versa. To do so, it needs to be able to modify regions’ base address. Because `GuestMemoryMmap` stores regions wrapped in an `Arc<_>`, we cannot mutate them after they have been put into the `GuestMemoryMmap` object; and `MmapRegion` itself is by its nature not clonable. So to modify the regions’ base addresses, we need some way to create a new `GuestRegionMmap` referencing the same `MmapRegion` as another one, but with a different base address. We can do that by having `GuestRegionMmap` wrap its `MmapRegion` in an `Arc`, and adding a method to return a reference to that `Arc`, and a method to construct a `GuestRegionMmap` object from such a cloned `Arc.` Signed-off-by: Hanna Czenczek --- src/mmap/mod.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mmap/mod.rs b/src/mmap/mod.rs index b5baaf8a..2deb6d0e 100644 --- a/src/mmap/mod.rs +++ b/src/mmap/mod.rs @@ -15,6 +15,7 @@ use std::borrow::Borrow; use std::ops::Deref; use std::result; +use std::sync::Arc; use crate::address::Address; use crate::bitmap::{Bitmap, BS}; @@ -54,7 +55,7 @@ pub use windows::MmapRegion; /// in the virtual address space of the calling process. #[derive(Debug)] pub struct GuestRegionMmap { - mapping: MmapRegion, + mapping: Arc>, guest_base: GuestAddress, } @@ -62,7 +63,7 @@ impl Deref for GuestRegionMmap { type Target = MmapRegion; fn deref(&self) -> &MmapRegion { - &self.mapping + self.mapping.as_ref() } } @@ -71,6 +72,11 @@ impl GuestRegionMmap { /// /// Returns `None` if `guest_base` + `mapping.len()` would overflow. pub fn new(mapping: MmapRegion, guest_base: GuestAddress) -> Option { + Self::with_arc(Arc::new(mapping), guest_base) + } + + /// Same as [`Self::new()`], but takes an `Arc`-wrapped `mapping`. + pub fn with_arc(mapping: Arc>, guest_base: GuestAddress) -> Option { guest_base .0 .checked_add(mapping.size() as u64) @@ -79,6 +85,16 @@ impl GuestRegionMmap { guest_base, }) } + + /// Return a reference to the inner `Arc` (as opposed to + /// [`.deref()`](Self::deref()), which does not reference the `Arc`). + /// + /// The returned reference can be cloned to construct a new `GuestRegionMmap` with a different + /// base address (e.g. when switching between memory address spaces based on the guest physical + /// address vs. the VMM userspace virtual address). + pub fn get_mapping(&self) -> &Arc> { + &self.mapping + } } #[cfg(not(feature = "xen"))] From 8b8ef7585b05550900f4e50325f3c8c438dc645a Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Wed, 9 Jul 2025 14:50:35 +0200 Subject: [PATCH 09/11] Add tests for IOMMU functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit also adds the iommu feature to the coverage_config feature list. (I left the aarch64 coverage value unchanged; I cannot find out how to get the current value on my system, and it isn’t include in CI.) Signed-off-by: Hanna Czenczek --- coverage_config_aarch64.json | 2 +- coverage_config_x86_64.json | 4 +- src/io_memory.rs | 51 +++ src/iommu.rs | 754 +++++++++++++++++++++++++++++++++++ src/mmap/mod.rs | 25 ++ 5 files changed, 833 insertions(+), 3 deletions(-) diff --git a/coverage_config_aarch64.json b/coverage_config_aarch64.json index 4aeb3711..7332c6c7 100644 --- a/coverage_config_aarch64.json +++ b/coverage_config_aarch64.json @@ -1,5 +1,5 @@ { "coverage_score": 85.2, "exclude_path": "mmap/windows.rs", - "crate_features": "backend-mmap,backend-atomic,backend-bitmap" + "crate_features": "backend-mmap,backend-atomic,backend-bitmap,iommu" } diff --git a/coverage_config_x86_64.json b/coverage_config_x86_64.json index 13f2dfd7..068b9764 100644 --- a/coverage_config_x86_64.json +++ b/coverage_config_x86_64.json @@ -1,5 +1,5 @@ { - "coverage_score": 91.78, + "coverage_score": 92.68, "exclude_path": "mmap_windows.rs", - "crate_features": "backend-mmap,backend-atomic,backend-bitmap" + "crate_features": "backend-mmap,backend-atomic,backend-bitmap,iommu" } diff --git a/src/io_memory.rs b/src/io_memory.rs index e437ee5b..433f2b58 100644 --- a/src/io_memory.rs +++ b/src/io_memory.rs @@ -181,3 +181,54 @@ impl IoMemory for M { Some(self) } } + +#[cfg(test)] +mod tests { + use super::Permissions; + + // Note that `IoMemory` is tested primarily in src/iommu.rs via `IommuMemory`. + + /// Test `Permissions & Permissions`. + #[test] + fn test_perm_and() { + use Permissions::*; + + for p in [No, Read, Write, ReadWrite] { + assert_eq!(p & p, p); + } + for p1 in [No, Read, Write, ReadWrite] { + for p2 in [No, Read, Write, ReadWrite] { + assert_eq!(p1 & p2, p2 & p1); + } + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(No & p, No); + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(ReadWrite & p, p); + } + assert_eq!(Read & Write, No); + } + + /// Test `Permissions | Permissions`. + #[test] + fn test_perm_or() { + use Permissions::*; + + for p in [No, Read, Write, ReadWrite] { + assert_eq!(p | p, p); + } + for p1 in [No, Read, Write, ReadWrite] { + for p2 in [No, Read, Write, ReadWrite] { + assert_eq!(p1 | p2, p2 | p1); + } + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(No | p, p); + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(ReadWrite | p, ReadWrite); + } + assert_eq!(Read | Write, ReadWrite); + } +} diff --git a/src/iommu.rs b/src/iommu.rs index 035f9546..300ec666 100644 --- a/src/iommu.rs +++ b/src/iommu.rs @@ -513,3 +513,757 @@ impl IoMemory for IommuMemory { } } } + +#[cfg(test)] +mod tests { + use super::{Error, IotlbIterator, IovaRange, MappedRange}; + use crate::{ + Address, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap, GuestMemoryResult, + IoMemory, Iommu, IommuMemory, Iotlb, Permissions, + }; + use std::fmt::Debug; + use std::ops::Deref; + use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; + use std::sync::{Arc, RwLock, RwLockReadGuard}; + + #[derive(Debug)] + struct SimpleIommu { + iotlb: RwLock, + /// Records the last fail event's base IOVA + fail_base: AtomicU64, + /// Records the last fail event's length + fail_len: AtomicUsize, + /// Records whether the last fail event was a miss + fail_was_miss: AtomicBool, + /// What base physical address to map to on the next fail event (0 means return error) + next_map_to: AtomicU64, + } + + impl SimpleIommu { + fn new() -> Self { + SimpleIommu { + iotlb: Iotlb::new().into(), + fail_base: 0.into(), + fail_len: 0.into(), + fail_was_miss: false.into(), + next_map_to: 0.into(), + } + } + + fn expect_mapping_request(&self, to_phys: GuestAddress) { + // Clear failed range info so it can be tested after the request + self.fail_base.store(0, Ordering::Relaxed); + self.fail_len.store(0, Ordering::Relaxed); + self.next_map_to.store(to_phys.0, Ordering::Relaxed); + } + + fn verify_mapping_request(&self, virt: GuestAddress, len: usize, was_miss: bool) { + assert_eq!(self.fail_base.load(Ordering::Relaxed), virt.0); + assert_eq!(self.fail_len.load(Ordering::Relaxed), len); + assert_eq!(self.fail_was_miss.load(Ordering::Relaxed), was_miss); + } + } + + impl Iommu for SimpleIommu { + type IotlbGuard<'a> = RwLockReadGuard<'a, Iotlb>; + + fn translate( + &self, + iova: GuestAddress, + length: usize, + access: Permissions, + ) -> Result>, Error> { + loop { + let mut fails = + match Iotlb::lookup(self.iotlb.read().unwrap(), iova, length, access) { + Ok(success) => return Ok(success), + Err(fails) => fails, + }; + let miss = !fails.misses.is_empty(); + let fail = fails + .misses + .pop() + .or_else(|| fails.access_fails.pop()) + .expect("No failure reported, even though a failure happened"); + self.fail_base.store(fail.base.0, Ordering::Relaxed); + self.fail_len.store(fail.length, Ordering::Relaxed); + self.fail_was_miss.store(miss, Ordering::Relaxed); + + if !fails.misses.is_empty() || !fails.access_fails.is_empty() { + return Err(Error::CannotResolve { + iova_range: IovaRange { base: iova, length }, + reason: "This IOMMU can only handle one failure per access".into(), + }); + } + + let map_to = self.next_map_to.swap(0, Ordering::Relaxed); + if map_to == 0 { + return Err(Error::CannotResolve { + iova_range: IovaRange { + base: fail.base, + length: fail.length, + }, + reason: "No mapping provided for failed range".into(), + }); + } + + self.iotlb.write().unwrap().set_mapping( + fail.base, + GuestAddress(map_to), + fail.length, + access, + )?; + } + } + } + + /// Verify that `iova`+`length` is mapped to `expected`. + fn verify_hit( + iotlb: impl Deref + Debug, + iova: GuestAddress, + length: usize, + permissions: Permissions, + expected: impl IntoIterator, + ) { + let mut iter = Iotlb::lookup(iotlb, iova, length, permissions) + .inspect_err(|err| panic!("Unexpected lookup error {err:?}")) + .unwrap(); + + for e in expected { + assert_eq!(iter.next(), Some(e)); + } + assert_eq!(iter.next(), None); + } + + /// Verify that trying to look up `iova`+`length` results in misses at `expected_misses` and + /// access failures (permission-related) at `expected_access_fails`. + fn verify_fail( + iotlb: impl Deref + Debug, + iova: GuestAddress, + length: usize, + permissions: Permissions, + expected_misses: impl IntoIterator, + expected_access_fails: impl IntoIterator, + ) { + let fails = Iotlb::lookup(iotlb, iova, length, permissions) + .inspect(|hits| panic!("Expected error on lookup, found {hits:?}")) + .unwrap_err(); + + let mut miss_iter = fails.misses.into_iter(); + for e in expected_misses { + assert_eq!(miss_iter.next(), Some(e)); + } + assert_eq!(miss_iter.next(), None); + + let mut accf_iter = fails.access_fails.into_iter(); + for e in expected_access_fails { + assert_eq!(accf_iter.next(), Some(e)); + } + assert_eq!(accf_iter.next(), None); + } + + /// Enter adjacent IOTLB entries and verify they are merged into a single one. + #[test] + fn test_iotlb_merge() -> Result<(), Error> { + const IOVA: GuestAddress = GuestAddress(42); + const PHYS: GuestAddress = GuestAddress(87); + const LEN_1: usize = 123; + const LEN_2: usize = 234; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA, PHYS, LEN_1, Permissions::ReadWrite)?; + iotlb.set_mapping( + GuestAddress(IOVA.0 + LEN_1 as u64), + GuestAddress(PHYS.0 + LEN_1 as u64), + LEN_2, + Permissions::ReadWrite, + )?; + + verify_hit( + &iotlb, + IOVA, + LEN_1 + LEN_2, + Permissions::ReadWrite, + [MappedRange { + base: PHYS, + length: LEN_1 + LEN_2, + }], + ); + + // Also check just a partial range + verify_hit( + &iotlb, + GuestAddress(IOVA.0 + LEN_1 as u64 - 1), + 2, + Permissions::ReadWrite, + [MappedRange { + base: GuestAddress(PHYS.0 + LEN_1 as u64 - 1), + length: 2, + }], + ); + + Ok(()) + } + + /// Test that adjacent IOTLB entries that map to the same physical address are not merged into + /// a single entry. + #[test] + fn test_iotlb_nomerge_same_phys() -> Result<(), Error> { + const IOVA: GuestAddress = GuestAddress(42); + const PHYS: GuestAddress = GuestAddress(87); + const LEN_1: usize = 123; + const LEN_2: usize = 234; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA, PHYS, LEN_1, Permissions::ReadWrite)?; + iotlb.set_mapping( + GuestAddress(IOVA.0 + LEN_1 as u64), + PHYS, + LEN_2, + Permissions::ReadWrite, + )?; + + verify_hit( + &iotlb, + IOVA, + LEN_1 + LEN_2, + Permissions::ReadWrite, + [ + MappedRange { + base: PHYS, + length: LEN_1, + }, + MappedRange { + base: PHYS, + length: LEN_2, + }, + ], + ); + + Ok(()) + } + + /// Test permission handling + #[test] + fn test_iotlb_perms() -> Result<(), Error> { + const IOVA_R: GuestAddress = GuestAddress(42); + const PHYS_R: GuestAddress = GuestAddress(87); + const LEN_R: usize = 123; + const IOVA_W: GuestAddress = GuestAddress(IOVA_R.0 + LEN_R as u64); + const PHYS_W: GuestAddress = GuestAddress(PHYS_R.0 + LEN_R as u64); + const LEN_W: usize = 234; + const IOVA_FULL: GuestAddress = IOVA_R; + const LEN_FULL: usize = LEN_R + LEN_W; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA_R, PHYS_R, LEN_R, Permissions::Read)?; + iotlb.set_mapping(IOVA_W, PHYS_W, LEN_W, Permissions::Write)?; + + // Test 1: Access whole range as R+W, should completely fail + verify_fail( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::ReadWrite, + [], + [ + IovaRange { + base: IOVA_R, + length: LEN_R, + }, + IovaRange { + base: IOVA_W, + length: LEN_W, + }, + ], + ); + + // Test 2: Access whole range as R-only, should fail on second part + verify_fail( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::Read, + [], + [IovaRange { + base: IOVA_W, + length: LEN_W, + }], + ); + + // Test 3: Access whole range W-only, should fail on second part + verify_fail( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::Write, + [], + [IovaRange { + base: IOVA_R, + length: LEN_R, + }], + ); + + // Test 4: Access whole range w/o perms, should succeed + verify_hit( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::No, + [ + MappedRange { + base: PHYS_R, + length: LEN_R, + }, + MappedRange { + base: PHYS_W, + length: LEN_W, + }, + ], + ); + + // Test 5: Access R range as R, should succeed + verify_hit( + &iotlb, + IOVA_R, + LEN_R, + Permissions::Read, + [MappedRange { + base: PHYS_R, + length: LEN_R, + }], + ); + + // Test 6: Access W range as W, should succeed + verify_hit( + &iotlb, + IOVA_W, + LEN_W, + Permissions::Write, + [MappedRange { + base: PHYS_W, + length: LEN_W, + }], + ); + + Ok(()) + } + + /// Test IOTLB invalidation + #[test] + fn test_iotlb_invalidation() -> Result<(), Error> { + const IOVA: GuestAddress = GuestAddress(42); + const PHYS: GuestAddress = GuestAddress(87); + const LEN: usize = 123; + const INVAL_OFS: usize = LEN / 2; + const INVAL_LEN: usize = 3; + const IOVA_AT_INVAL: GuestAddress = GuestAddress(IOVA.0 + INVAL_OFS as u64); + const PHYS_AT_INVAL: GuestAddress = GuestAddress(PHYS.0 + INVAL_OFS as u64); + const IOVA_POST_INVAL: GuestAddress = GuestAddress(IOVA_AT_INVAL.0 + INVAL_LEN as u64); + const PHYS_POST_INVAL: GuestAddress = GuestAddress(PHYS_AT_INVAL.0 + INVAL_LEN as u64); + const POST_INVAL_LEN: usize = LEN - INVAL_OFS - INVAL_LEN; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA, PHYS, LEN, Permissions::ReadWrite)?; + verify_hit( + &iotlb, + IOVA, + LEN, + Permissions::ReadWrite, + [MappedRange { + base: PHYS, + length: LEN, + }], + ); + + // Invalidate something in the middle; expect mapping at the start, then miss, then further + // mapping + iotlb.invalidate_mapping(IOVA_AT_INVAL, INVAL_LEN); + verify_hit( + &iotlb, + IOVA, + INVAL_OFS, + Permissions::ReadWrite, + [MappedRange { + base: PHYS, + length: INVAL_OFS, + }], + ); + verify_fail( + &iotlb, + IOVA, + LEN, + Permissions::ReadWrite, + [IovaRange { + base: IOVA_AT_INVAL, + length: INVAL_LEN, + }], + [], + ); + verify_hit( + &iotlb, + IOVA_POST_INVAL, + POST_INVAL_LEN, + Permissions::ReadWrite, + [MappedRange { + base: PHYS_POST_INVAL, + length: POST_INVAL_LEN, + }], + ); + + // And invalidate everything; expect full miss + iotlb.invalidate_all(); + verify_fail( + &iotlb, + IOVA, + LEN, + Permissions::ReadWrite, + [IovaRange { + base: IOVA, + length: LEN, + }], + [], + ); + + Ok(()) + } + + /// Create `IommuMemory` backed by multiple physical regions, all mapped into a single virtual + /// region (if `virt_start`/`virt_perm` are given). + /// + /// Memory is filled with incrementing (overflowing) bytes, starting with value `value_offset`. + #[cfg(feature = "backend-mmap")] + fn create_virt_memory( + virt_mapping: Option<(GuestAddress, Permissions)>, + value_offset: u8, + phys_regions: impl IntoIterator, + ) -> IommuMemory, SimpleIommu> { + let phys_ranges = phys_regions + .into_iter() + .map(|range| (range.base, range.length)) + .collect::>(); + let phys_mem = GuestMemoryMmap::<()>::from_ranges(&phys_ranges).unwrap(); + + let mut byte_val = value_offset; + for (base, len) in &phys_ranges { + let slice = phys_mem.get_slice(*base, *len, Permissions::Write).unwrap(); + + for i in 0..*len { + slice.write(&[byte_val], i).unwrap(); + byte_val = byte_val.wrapping_add(1); + } + } + + let iommu = Arc::new(SimpleIommu::new()); + let mem = IommuMemory::new(phys_mem, iommu, true); + + // IOMMU is in use, this will be `None` + assert!(mem.physical_memory().is_none()); + + if let Some((mut virt, perm)) = virt_mapping { + for (base, len) in phys_ranges { + let mut iotlb = mem.iommu().iotlb.write().unwrap(); + iotlb.set_mapping(virt, base, len, perm).unwrap(); + virt = GuestAddress(virt.0 + len as u64); + } + } + + mem + } + + /// Verify the byte contents at `start`+`len`. Assume the initial byte value to be + /// `value_offset`. + /// + /// Each byte is expected to be incremented over the last (as created by + /// `create_virt_memory()`). + /// + /// Return an error if mapping fails, but just panic if there is a content mismatch. + #[cfg(feature = "backend-mmap")] + fn check_virt_mem_content( + mem: &impl IoMemory, + start: GuestAddress, + len: usize, + value_offset: u8, + ) -> GuestMemoryResult<()> { + let mut ref_value = value_offset; + let processed_len = mem.try_access( + len, + start, + Permissions::Read, + |ofs, count, in_region_addr, region| -> GuestMemoryResult { + assert_eq!(ofs as u8, ref_value.wrapping_sub(value_offset)); + for i in 0..count { + let addr = in_region_addr.checked_add(i as u64).unwrap(); + let val = region.load::(addr, Ordering::Relaxed)?; + assert_eq!(val, ref_value); + ref_value = ref_value.wrapping_add(1); + } + Ok(count) + }, + )?; + assert_eq!(processed_len, len); + + // Now try the slice interface: We have to expect fragmentation, so need an outer loop + // here. + ref_value = value_offset; + let mut start = start; + let mut len = len; + while len > 0 { + let slice = match mem.get_slice(start, len, Permissions::Read) { + Ok(slice) => slice, + Err(GuestMemoryError::IommuError(Error::Fragmented { + iova_range: _, + continuous_length, + })) => mem.get_slice(start, continuous_length, Permissions::Read)?, + Err(err) => return Err(err), + }; + + let count = slice.len(); + let mut data = vec![0u8; count]; + slice.read(&mut data, 0).unwrap(); + for val in data { + assert_eq!(val, ref_value); + ref_value = ref_value.wrapping_add(1); + } + + start = GuestAddress(start.0 + count as u64); + len -= count; + } + + Ok(()) + } + + #[cfg(feature = "backend-mmap")] + fn verify_virt_mem_content( + m: &impl IoMemory, + start: GuestAddress, + len: usize, + value_offset: u8, + ) { + check_virt_mem_content(m, start, len, value_offset).unwrap(); + } + + /// Verify that trying to read from `start`+`len` fails (because of `CannotResolve`). + /// + /// The reported failed-to-map range is checked to be `fail_start`+`fail_len`. `fail_start` + /// defaults to `start`, `fail_len` defaults to the remaining length of the whole mapping + /// starting at `fail_start` (i.e. `start + len - fail_start`). + #[cfg(feature = "backend-mmap")] + fn verify_virt_mem_error( + m: &impl IoMemory, + start: GuestAddress, + len: usize, + fail_start: Option, + fail_len: Option, + ) { + let fail_start = fail_start.unwrap_or(start); + let fail_len = fail_len.unwrap_or(len - (fail_start.0 - start.0) as usize); + let err = check_virt_mem_content(m, start, len, 0).unwrap_err(); + let GuestMemoryError::IommuError(Error::CannotResolve { + iova_range: failed_range, + reason: _, + }) = err + else { + panic!("Unexpected error: {err:?}"); + }; + assert_eq!( + failed_range, + IovaRange { + base: fail_start, + length: fail_len, + } + ); + } + + /// Test `IommuMemory`, with pre-filled mappings. + #[cfg(feature = "backend-mmap")] + #[test] + fn test_iommu_memory_pre_mapped() { + const PHYS_START_1: GuestAddress = GuestAddress(0x4000); + const PHYS_START_2: GuestAddress = GuestAddress(0x8000); + const PHYS_LEN: usize = 128; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + const VIRT_LEN: usize = PHYS_LEN * 2; + const VIRT_POST_MAP: GuestAddress = GuestAddress(VIRT_START.0 + VIRT_LEN as u64); + + let mem = create_virt_memory( + Some((VIRT_START, Permissions::Read)), + 0, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + ); + + assert!(mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::No)); + assert!(mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Read)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Write)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::ReadWrite)); + assert!(!mem.range_accessible(GuestAddress(VIRT_START.0 - 1), 1, Permissions::No)); + assert!(!mem.range_accessible(VIRT_POST_MAP, 1, Permissions::No)); + + verify_virt_mem_content(&mem, VIRT_START, VIRT_LEN, 0); + verify_virt_mem_error(&mem, GuestAddress(VIRT_START.0 - 1), 1, None, None); + verify_virt_mem_error(&mem, VIRT_POST_MAP, 1, None, None); + verify_virt_mem_error(&mem, VIRT_START, VIRT_LEN + 1, Some(VIRT_POST_MAP), None); + } + + /// Test `IommuMemory`, with mappings created through the IOMMU on the fly. + #[cfg(feature = "backend-mmap")] + #[test] + fn test_iommu_memory_live_mapped() { + const PHYS_START_1: GuestAddress = GuestAddress(0x4000); + const PHYS_START_2: GuestAddress = GuestAddress(0x8000); + const PHYS_LEN: usize = 128; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + const VIRT_START_1: GuestAddress = VIRT_START; + const VIRT_START_2: GuestAddress = GuestAddress(VIRT_START.0 + PHYS_LEN as u64); + const VIRT_LEN: usize = PHYS_LEN * 2; + const VIRT_POST_MAP: GuestAddress = GuestAddress(VIRT_START.0 + VIRT_LEN as u64); + + let mem = create_virt_memory( + None, + 0, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + ); + + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::No)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Read)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Write)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::ReadWrite)); + assert!(!mem.range_accessible(GuestAddress(VIRT_START.0 - 1), 1, Permissions::No)); + assert!(!mem.range_accessible(VIRT_POST_MAP, 1, Permissions::No)); + + verify_virt_mem_error(&mem, VIRT_START, VIRT_LEN, None, None); + verify_virt_mem_error(&mem, GuestAddress(VIRT_START.0 - 1), 1, None, None); + verify_virt_mem_error(&mem, VIRT_POST_MAP, 1, None, None); + verify_virt_mem_error(&mem, VIRT_START, VIRT_LEN + 1, None, None); + + let iommu = mem.iommu(); + + // Can only map one region at a time (with `SimpleIommu`), so only access `PHYS_LEN` first, + // not `VIRT_LEN` + iommu.expect_mapping_request(PHYS_START_1); + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN, 0); + iommu.verify_mapping_request(VIRT_START_1, PHYS_LEN, true); + + iommu.expect_mapping_request(PHYS_START_2); + verify_virt_mem_content(&mem, VIRT_START, VIRT_LEN, 0); + iommu.verify_mapping_request(VIRT_START_2, PHYS_LEN, true); + + // Also check invalid access failure + iommu + .iotlb + .write() + .unwrap() + .set_mapping(VIRT_START_1, PHYS_START_1, PHYS_LEN, Permissions::Write) + .unwrap(); + + iommu.expect_mapping_request(PHYS_START_1); + verify_virt_mem_content(&mem, VIRT_START, VIRT_LEN, 0); + iommu.verify_mapping_request(VIRT_START_1, PHYS_LEN, false); + } + + /// Test replacing the physical memory of an `IommuMemory`. + #[cfg(feature = "backend-mmap")] + #[test] + fn test_mem_replace() { + const PHYS_START_1: GuestAddress = GuestAddress(0x4000); + const PHYS_START_2: GuestAddress = GuestAddress(0x8000); + const PHYS_LEN: usize = 128; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + + // Note only one physical region. `mem2` will have two, to see that this pattern + // (`inner_replaced()`) can be used to e.g. extend physical memory. + let mem = create_virt_memory( + Some((VIRT_START, Permissions::Read)), + 0, + [MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }], + ); + + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN, 0); + verify_virt_mem_error( + &mem, + VIRT_START, + PHYS_LEN * 2, + Some(GuestAddress(VIRT_START.0 + PHYS_LEN as u64)), + None, + ); + + let mut mem2 = create_virt_memory( + Some((VIRT_START, Permissions::Read)), + 42, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + ); + + verify_virt_mem_content(&mem2, VIRT_START, PHYS_LEN * 2, 42); + + // Clone `mem` before replacing its physical memory, to see that works + let mem_cloned = mem.clone(); + + // Use `mem2`'s physical memory for `mem` + mem2.set_iommu_enabled(false); + let pmem2 = mem2.physical_memory().unwrap(); + assert!(std::ptr::eq(pmem2, mem2.inner())); + let mem = mem.inner_replaced(pmem2.clone()); + + // The physical memory has been replaced, but `mem` still uses its old IOMMU, so the + // mapping for everything past VIRT_START + PHYS_LEN does not yet exist. + mem.iommu().expect_mapping_request(PHYS_START_2); + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN * 2, 42); + mem.iommu().verify_mapping_request( + GuestAddress(VIRT_START.0 + PHYS_LEN as u64), + PHYS_LEN, + true, + ); + + // Verify `mem`'s clone still is the same (though it does use the same IOMMU) + verify_virt_mem_content(&mem_cloned, VIRT_START, PHYS_LEN, 0); + // See, it's the same IOMMU (i.e. it has a mapping PHYS_START_2): + verify_hit( + mem_cloned.iommu().iotlb.read().unwrap(), + VIRT_START, + PHYS_LEN * 2, + Permissions::Read, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + ); + // (But we cannot access that mapping because `mem_cloned`'s physical memory does not + // contain that physical range.) + } +} diff --git a/src/mmap/mod.rs b/src/mmap/mod.rs index 2deb6d0e..b421441a 100644 --- a/src/mmap/mod.rs +++ b/src/mmap/mod.rs @@ -657,4 +657,29 @@ mod tests { .unwrap() }); } + + #[test] + fn test_change_region_addr() { + let addr1 = GuestAddress(0x1000); + let addr2 = GuestAddress(0x2000); + let gm = GuestMemoryMmap::from_ranges(&[(addr1, 0x1000)]).unwrap(); + + assert!(gm.find_region(addr1).is_some()); + assert!(gm.find_region(addr2).is_none()); + + let (gm, region) = gm.remove_region(addr1, 0x1000).unwrap(); + + assert!(gm.find_region(addr1).is_none()); + assert!(gm.find_region(addr2).is_none()); + + // Note that the `region` returned by `remove_region` is an `Arc<_>`, so users generally + // cannot mutate it (change its base address). In this test, we can (we could unwrap the + // `Arc<_>`), but our users generally cannot, hence why this interface exists. + let region = GuestRegionMmap::with_arc(Arc::clone(region.get_mapping()), addr2).unwrap(); + + let gm = gm.insert_region(Arc::new(region)).unwrap(); + + assert!(gm.find_region(addr1).is_none()); + assert!(gm.find_region(addr2).is_some()); + } } From a0a9cc1d0f7a3903a9c76e5e787a8c5f0ed11637 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Fri, 30 May 2025 13:04:50 +0200 Subject: [PATCH 10/11] DESIGN: Document I/O virtual memory Document in DESIGN.md how I/O virtual memory is handled. Signed-off-by: Hanna Czenczek --- DESIGN.md | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/DESIGN.md b/DESIGN.md index 5915f50e..c3098d85 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -2,8 +2,8 @@ ## Objectives -- Provide a set of traits for accessing and configuring the physical memory of - a virtual machine. +- Provide a set of traits for accessing and configuring the physical and/or + I/O virtual memory of a virtual machine. - Provide a clean abstraction of the VM memory such that rust-vmm components can use it without depending on the implementation details specific to different VMMs. @@ -122,6 +122,29 @@ let buf = &mut [0u8; 5]; let result = guest_memory_mmap.write(buf, addr); ``` +### I/O Virtual Address Space + +When using an IOMMU, there no longer is direct access to the guest (physical) +address space, but instead only to I/O virtual address space. In this case: + +- `IoMemory` replaces `GuestMemory`: It requires specifying the required access + permissions (which are relevant for virtual memory). It also removes + interfaces that imply a mostly linear memory layout, because virtual memory is + fragmented into many pages instead of few (large) memory regions. + - Any `IoMemory` still has a `GuestMemory` inside as the underlying address + space, but if an IOMMU is used, that will generally not be guest physical + address space. With vhost-user, for example, it will be the VMM’s user + address space instead. + - `IommuMemory` as our only actually IOMMU-supporting `IoMemory` + implementation uses an `Iommu` object to translate I/O virtual addresses + (IOVAs) into VMM user addresses (VUAs), which are then passed to the inner + `GuestMemory` implementation (like `GuestMemoryMmap`). +- `GuestAddress` (for compatibility) refers to an address in any of these + address spaces: + - Guest physical addresses (GPAs) when no IOMMU is used, + - I/O virtual addresses (IOVAs), + - VMM user addresses (VUAs). + ### Utilities and Helpers The following utilities and helper traits/macros are imported from the @@ -143,7 +166,8 @@ with minor changes: - `Address` inherits `AddressValue` - `GuestMemoryRegion` inherits `Bytes`. The `Bytes` trait must be implemented. -- `GuestMemory` has a generic implementation of `Bytes`. +- `GuestMemory` has a generic implementation of `IoMemory` +- `IoMemory` has a generic implementation of `Bytes`. **Types**: From 4104d5fa54dbf7bd90d59b205a11d787ad7df964 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Fri, 30 May 2025 16:49:59 +0200 Subject: [PATCH 11/11] CHANGELOG: Add I/O virtual memory entry Signed-off-by: Hanna Czenczek --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53b14972..b9d99a67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - \[[#311](https://github.com/rust-vmm/vm-memory/pull/311)\] Allow compiling without the ReadVolatile and WriteVolatile implementations - \[[#312](https://github.com/rust-vmm/vm-memory/pull/312)\] `GuestRegionContainer`, a generic container of `GuestMemoryRegion`s, generalizing `GuestMemoryMmap` (which is now a type alias for `GuestRegionContainer`). +- \[[#327](https://github.com/rust-vmm/vm-memory/pull/327)\] I/O virtual memory support via `IoMemory`, `IommuMemory`, and `Iommu`/`Iotlb` ### Changed