From a5e52a1229c90662c5d6d8989cd69227f97e4dc4 Mon Sep 17 00:00:00 2001 From: Norbert Manthey Date: Thu, 3 Sep 2020 20:23:03 +0200 Subject: [PATCH 1/2] mmap: try THP via madvise Huge pages bring performance benefits for memory intensive applications. A simple way to use huge pages is by using transparent huge pages. This can be done by either using statically pre-reserved huge pages, or by using transparent huge pages. While some distributions enable transparent huge pages by default, other distributions chose to allow this feature only when the madvise system call is used. This change adds the madvise system call to the memory allocation. On Unix systems, the invocation of mmap is followed with an madvise system call that asks the kernel to back the memory with transparent huge pages, if possible. Note: this is a prototypical implementation of getting huge page support. No performance testing has been performed yet. We expect similar results as reported in https://arxiv.org/abs/2004.14378 Once this data is available, a configuration layer should be added to be able to disable or enable this change. Signed-off-by: Norbert Manthey --- src/mmap_unix.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mmap_unix.rs b/src/mmap_unix.rs index de583f73..52f92809 100644 --- a/src/mmap_unix.rs +++ b/src/mmap_unix.rs @@ -154,6 +154,8 @@ impl MmapRegion { return Err(Error::Mmap(io::Error::last_os_error())); } + let _ret = unsafe { libc::madvise(addr, size, libc::MADV_HUGEPAGE) }; + Ok(Self { addr: addr as *mut u8, size, From d97ec75841b6c9556492553e19118426e2476f8f Mon Sep 17 00:00:00 2001 From: Norbert Manthey Date: Mon, 16 Nov 2020 00:21:08 +0100 Subject: [PATCH 2/2] THP: introduce options Allow to control whether we will use huge pages via options. Signed-off-by: Norbert Manthey --- src/guest_memory.rs | 9 +++++++++ src/mmap.rs | 12 +++++++----- src/mmap_unix.rs | 16 +++++++++++++--- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 08d0a530..a65ff4b8 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -159,6 +159,15 @@ impl FileOffset { } } +/// Configurations options usable for Guest Memory +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)] +pub struct GuestMemoryOptions { + /// Use huge pages to back guest memory + pub huge_page: bool, + /// Use transparent huge pages to back guest memory + pub transparent_huge_page: bool, +} + /// Represents a continuous region of guest physical memory. #[allow(clippy::len_without_is_empty)] pub trait GuestMemoryRegion: Bytes { diff --git a/src/mmap.rs b/src/mmap.rs index 406bc867..7505ae89 100644 --- a/src/mmap.rs +++ b/src/mmap.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use crate::address::Address; use crate::guest_memory::{ - self, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, MemoryRegionAddress, + self, FileOffset, GuestAddress, GuestMemory, GuestMemoryOptions, GuestMemoryRegion, GuestUsize, MemoryRegionAddress, }; use crate::volatile_memory::{VolatileMemory, VolatileSlice}; use crate::Bytes; @@ -391,6 +391,7 @@ impl GuestMemoryRegion for GuestRegionMmap { /// virtual address space of the calling process. #[derive(Clone, Debug, Default)] pub struct GuestMemoryMmap { + options: GuestMemoryOptions, regions: Vec>, } @@ -416,6 +417,7 @@ impl GuestMemoryMmap { A: Borrow<(GuestAddress, usize, Option)>, T: IntoIterator, { + let options = GuestMemoryOptions {huge_page : false, transparent_huge_page : false}; Self::from_regions( ranges .into_iter() @@ -424,9 +426,9 @@ impl GuestMemoryMmap { let size = x.borrow().1; if let Some(ref f_off) = x.borrow().2 { - MmapRegion::from_file(f_off.clone(), size) + MmapRegion::from_file(f_off.clone(), size, options) } else { - MmapRegion::new(size) + MmapRegion::new(size, options) } .map_err(Error::MmapRegion) .and_then(|r| GuestRegionMmap::new(r, guest_base)) @@ -476,7 +478,7 @@ impl GuestMemoryMmap { } } - Ok(Self { regions }) + Ok(Self { regions, options: GuestMemoryOptions {huge_page : false, transparent_huge_page : false} }) } /// Insert a region into the `GuestMemoryMmap` object and return a new `GuestMemoryMmap`. @@ -509,7 +511,7 @@ impl GuestMemoryMmap { if self.regions.get(region_index).unwrap().size() as GuestUsize == size { let mut regions = self.regions.clone(); let region = regions.remove(region_index); - return Ok((Self { regions }, region)); + return Ok((Self { regions, options: GuestMemoryOptions {huge_page : false, transparent_huge_page : false} }, region)); } } diff --git a/src/mmap_unix.rs b/src/mmap_unix.rs index 52f92809..aeb3d36a 100644 --- a/src/mmap_unix.rs +++ b/src/mmap_unix.rs @@ -20,6 +20,7 @@ use std::result; use libc; use crate::guest_memory::FileOffset; +use crate::guest_memory::GuestMemoryOptions; use crate::mmap::{check_file_offset, AsSlice}; use crate::volatile_memory::{self, compute_offset, VolatileMemory, VolatileSlice}; @@ -93,12 +94,13 @@ impl MmapRegion { /// /// # Arguments /// * `size` - The size of the memory region in bytes. - pub fn new(size: usize) -> Result { + pub fn new(size: usize, options: GuestMemoryOptions) -> Result { Self::build( None, size, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_ANONYMOUS | libc::MAP_NORESERVE | libc::MAP_PRIVATE, + options ) } @@ -108,12 +110,13 @@ impl MmapRegion { /// * `file_offset` - The mapping will be created at offset `file_offset.start` in the file /// referred to by `file_offset.file`. /// * `size` - The size of the memory region in bytes. - pub fn from_file(file_offset: FileOffset, size: usize) -> Result { + pub fn from_file(file_offset: FileOffset, size: usize, options: GuestMemoryOptions) -> Result { Self::build( Some(file_offset), size, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_NORESERVE | libc::MAP_SHARED, + options ) } @@ -132,6 +135,7 @@ impl MmapRegion { size: usize, prot: i32, flags: i32, + options: GuestMemoryOptions, ) -> Result { // Forbid MAP_FIXED, as it doesn't make sense in this context, and is pretty dangerous // in general. @@ -154,7 +158,13 @@ impl MmapRegion { return Err(Error::Mmap(io::Error::last_os_error())); } - let _ret = unsafe { libc::madvise(addr, size, libc::MADV_HUGEPAGE) }; + if options.huge_page { + if options.transparent_huge_page { + if size > 2 * 1024 * 4096 { + let _ret = unsafe { libc::madvise(addr, size, libc::MADV_HUGEPAGE) }; + } + } + } Ok(Self { addr: addr as *mut u8,