Skip to content

[WIP] Add SIMD support #686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"Cargo.toml",
// guest crates for testing, not part of the workspace
"src/tests/rust_guests/simpleguest/Cargo.toml",
"src/tests/rust_guests/simdguest/Cargo.toml",
"src/tests/rust_guests/callbackguest/Cargo.toml"
]
}
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ exclude = [
"src/tests/rust_guests/dummyguest",
"src/tests/rust_guests/simpleguest",
"src/tests/rust_guests/witguest",
"src/tests/rust_guests/simdguest",
]

[workspace.package]
Expand Down
7 changes: 7 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ simpleguest_source := "src/tests/rust_guests/simpleguest/target/x86_64-unknown-n
dummyguest_source := "src/tests/rust_guests/dummyguest/target/x86_64-unknown-none"
callbackguest_source := "src/tests/rust_guests/callbackguest/target/x86_64-unknown-none"
witguest_source := "src/tests/rust_guests/witguest/target/x86_64-unknown-none"
simdguest_source := "src/tests/rust_guests/simdguest/target/x86_64-unknown-none"
rust_guests_bin_dir := "src/tests/rust_guests/bin"

################
Expand Down Expand Up @@ -43,12 +44,14 @@ build-rust-guests target=default-target: (witguest-wit)
cd src/tests/rust_guests/simpleguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
cd src/tests/rust_guests/dummyguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
cd src/tests/rust_guests/witguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
cd src/tests/rust_guests/simdguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}

@move-rust-guests target=default-target:
cp {{ callbackguest_source }}/{{ target }}/callbackguest* {{ rust_guests_bin_dir }}/{{ target }}/
cp {{ simpleguest_source }}/{{ target }}/simpleguest* {{ rust_guests_bin_dir }}/{{ target }}/
cp {{ dummyguest_source }}/{{ target }}/dummyguest* {{ rust_guests_bin_dir }}/{{ target }}/
cp {{ witguest_source }}/{{ target }}/witguest* {{ rust_guests_bin_dir }}/{{ target }}/
cp {{ simdguest_source }}/{{ target }}/simdguest* {{ rust_guests_bin_dir }}/{{ target }}/

build-and-move-rust-guests: (build-rust-guests "debug") (move-rust-guests "debug") (build-rust-guests "release") (move-rust-guests "release")
build-and-move-c-guests: (build-c-guests "debug") (move-c-guests "debug") (build-c-guests "release") (move-c-guests "release")
Expand All @@ -61,6 +64,7 @@ clean-rust:
cd src/tests/rust_guests/dummyguest && cargo clean
cd src/tests/rust_guests/callbackguest && cargo clean
cd src/tests/rust_guests/witguest && cargo clean
cd src/tests/rust_guests/simdguest && cargo clean
cd src/tests/rust_guests/witguest && rm -f interface.wasm
git clean -fdx src/tests/c_guests/bin src/tests/rust_guests/bin

Expand Down Expand Up @@ -149,6 +153,7 @@ fmt-check:
cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml -- --check
cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml -- --check
cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml -- --check
cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml -- --check
cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml -- --check

check-license-headers:
Expand All @@ -160,6 +165,7 @@ fmt-apply:
cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml
cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml
cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml
cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml
cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml

clippy target=default-target: (witguest-wit)
Expand All @@ -169,6 +175,7 @@ clippy-guests target=default-target: (witguest-wit)
cd src/tests/rust_guests/simpleguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
cd src/tests/rust_guests/callbackguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
cd src/tests/rust_guests/witguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
cd src/tests/rust_guests/simdguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings

clippy-apply-fix-unix:
cargo clippy --fix --all
Expand Down
60 changes: 60 additions & 0 deletions count_simd_instructions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash

# Script to count SIMD instructions in an ELF binary
# Usage: ./count_simd_instructions.sh <binary_file>

if [ $# -eq 0 ]; then
echo "Usage: $0 <binary_file>"
exit 1
fi

BINARY="$1"

if [ ! -f "$BINARY" ]; then
echo "Error: File '$BINARY' not found"
exit 1
fi

echo "Analyzing SIMD instructions in: $BINARY"
echo "========================================"

# Disassemble the binary
DISASM=$(objdump -d "$BINARY" 2>/dev/null)

if [ $? -ne 0 ]; then
echo "Error: Failed to disassemble binary. Make sure it's a valid ELF file."
exit 1
fi

# Count different instruction sets
SSE_COUNT=$(echo "$DISASM" | grep -i -E "\b(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd)\b" | wc -l)

SSE2_COUNT=$(echo "$DISASM" | grep -i -E "\b(movdqa|movdqu|movq|movd|paddb|paddw|paddd|paddq|psubb|psubw|psubd|psubq|pmullw|pmuludq|pand|pandn|por|pxor|psllw|pslld|psllq|psrlw|psrld|psrlq|psraw|psrad|packsswb|packssdw|packuswb|punpckhbw|punpckhwd|punpckhdq|punpckhqdq|punpcklbw|punpcklwd|punpckldq|punpcklqdq|pcmpeqb|pcmpeqw|pcmpeqd|pcmpgtb|pcmpgtw|pcmpgtd|pmaxub|pmaxsw|pminub|pminsw|psadbw|pavgb|pavgw)\b" | wc -l)

SSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(addsubpd|addsubps|haddpd|haddps|hsubpd|hsubps|movddup|movshdup|movsldup|lddqu)\b" | wc -l)

SSSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(pabsb|pabsw|pabsd|palignr|phaddb|phaddw|phaddd|phaddsw|phsubb|phsubw|phsubd|phsubsw|pmaddubsw|pmulhrsw|pshufb|psignb|psignw|psignd)\b" | wc -l)

SSE41_COUNT=$(echo "$DISASM" | grep -i -E "\b(blendpd|blendps|blendvpd|blendvps|dppd|dpps|extractps|insertps|movntdqa|mpsadbw|packusdw|pblendvb|pblendw|pcmpeqq|pextrb|pextrd|pextrq|pextrw|phminposuw|pinsrb|pinsrd|pinsrq|pmaxsb|pmaxsd|pmaxud|pmaxuw|pminsb|pminsd|pminud|pminuw|pmovsxbw|pmovsxbd|pmovsxbq|pmovsxwd|pmovsxwq|pmovsxdq|pmovzxbw|pmovzxbd|pmovzxbq|pmovzxwd|pmovzxwq|pmovzxdq|pmuldq|pmulld|ptest|roundpd|roundps|roundsd|roundss)\b" | wc -l)

SSE42_COUNT=$(echo "$DISASM" | grep -i -E "\b(crc32|pcmpestri|pcmpestrm|pcmpistri|pcmpistrm|pcmpgtq)\b" | wc -l)

AVX_COUNT=$(echo "$DISASM" | grep -i -E "\bv(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd|movaps|movapd|movups|movupd|movlps|movlpd|movhps|movhpd|movlhps|movhlps|unpcklps|unpcklpd|unpckhps|unpckhpd|addps|addpd|subps|subpd|mulps|mulpd|divps|divpd|sqrtps|sqrtpd|maxps|maxpd|minps|minpd|cmpps|cmppd|andps|andpd|andnps|andnpd|orps|orpd|xorps|xorpd|shufps|shufpd|blendps|blendpd|blendvps|blendvpd|dpps|dppd|roundps|roundpd|roundss|roundsd|insertf128|extractf128|broadcast|permute|maskload|maskstore|testc|testz|testnzc)\b" | wc -l)

AVX2_COUNT=$(echo "$DISASM" | grep -i -E "\bv(pabs|padd|psub|pmul|pand|pandn|por|pxor|psll|psrl|psra|ppack|punpck|pcmp|pmax|pmin|psad|pavg|pblend|pbroadcast|perm|pgather|pinsert|pextract|pmovsx|pmovzx|psign|pshuf|palign|pmadd|pmaddubs|phsub|phadd)\b" | wc -l)

AVX512_COUNT=$(echo "$DISASM" | grep -i -E "\b(evex|zmm|k[0-7])\b|\bv.*\{.*\}\b" | wc -l)

echo "SSE instructions: $SSE_COUNT"
echo "SSE2 instructions: $SSE2_COUNT"
echo "SSE3 instructions: $SSE3_COUNT"
echo "SSSE3 instructions: $SSSE3_COUNT"
echo "SSE4.1 instructions: $SSE41_COUNT"
echo "SSE4.2 instructions: $SSE42_COUNT"
echo "AVX instructions: $AVX_COUNT"
echo "AVX2 instructions: $AVX2_COUNT"
echo "AVX-512 instructions: $AVX512_COUNT"
echo "========================================"

TOTAL=$((SSE_COUNT + SSE2_COUNT + SSE3_COUNT + SSSE3_COUNT + SSE41_COUNT + SSE42_COUNT + AVX_COUNT + AVX2_COUNT + AVX512_COUNT))
echo "Total SIMD instructions: $TOTAL"
2 changes: 1 addition & 1 deletion src/hyperlight_guest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ Provides only the essential building blocks for interacting with the host enviro

[dependencies]
anyhow = { version = "1.0.98", default-features = false }
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
# serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
hyperlight-common = { workspace = true }
11 changes: 1 addition & 10 deletions src/hyperlight_guest/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.
use alloc::format;
use alloc::string::String;

use anyhow;
use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode;
use {anyhow, serde_json};

pub type Result<T> = core::result::Result<T, HyperlightGuestError>;

Expand All @@ -42,12 +42,3 @@ impl From<anyhow::Error> for HyperlightGuestError {
}
}
}

impl From<serde_json::Error> for HyperlightGuestError {
fn from(error: serde_json::Error) -> Self {
Self {
kind: ErrorCode::GuestError,
message: format!("Error: {:?}", error),
}
}
}
123 changes: 119 additions & 4 deletions src/hyperlight_host/src/hypervisor/kvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};

use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region};
use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region, kvm_xcrs};
use kvm_ioctls::Cap::UserMemory;
use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
use log::LevelFilter;
Expand All @@ -37,8 +37,8 @@ use super::handlers::DbgMemAccessHandlerWrapper;
use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
#[cfg(feature = "init-paging")]
use super::{
CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE,
EFER_LMA, EFER_LME, EFER_NX, EFER_SCE,
CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT,
CR4_OSXSAVE, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, XCR0_AVX, XCR0_SSE, XCR0_X87,
};
use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU};
#[cfg(gdb)]
Expand Down Expand Up @@ -336,6 +336,7 @@ impl KVMDriver {
})?;

let mut vcpu_fd = vm_fd.create_vcpu(0)?;
Self::setup_cpuid(&kvm, &mut vcpu_fd)?;
Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?;

#[cfg(gdb)]
Expand Down Expand Up @@ -409,7 +410,7 @@ impl KVMDriver {
cfg_if::cfg_if! {
if #[cfg(feature = "init-paging")] {
sregs.cr3 = _pml4_addr;
sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_OSXSAVE;
sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP;
sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX;
sregs.cs.l = 1; // required for 64-bit mode
Expand All @@ -419,6 +420,120 @@ impl KVMDriver {
}
}
vcpu_fd.set_sregs(&sregs)?;

// Setup XCR0 (Extended Control Register 0) to enable SIMD features
// This is required for AVX and other SIMD instruction support
// Only set XCR0 if the init-paging feature is enabled
cfg_if::cfg_if! {
if #[cfg(feature = "init-paging")] {
// Create a properly initialized kvm_xcrs structure
let mut xcrs: kvm_xcrs = unsafe { std::mem::zeroed() };

// Set XCR0 to enable x87 FPU (required), SSE, and AVX
// XCR0 bit 0 (x87) must always be set for any XSAVE features
xcrs.xcrs[0].xcr = 0; // XCR0 register number
xcrs.xcrs[0].value = XCR0_X87 | XCR0_SSE | XCR0_AVX;
xcrs.nr_xcrs = 1;

println!("Setting XCRs: XCR0={:#x}, nr_xcrs={}", xcrs.xcrs[0].value, xcrs.nr_xcrs);

match vcpu_fd.set_xcrs(&xcrs) {
Ok(_) => {
println!("Successfully set XCR0 to enable SIMD features: {:#x}", xcrs.xcrs[0].value);
},
Err(e) => {
println!("Failed to set XCRs (XCR0) for SIMD support: {:?}", e);
}
}
}
}

Ok(())
}

/// Setup the CPUID for the vCPU to enable SIMD features.
/// This is done by just mirroring the host's CPUID in the guest.
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
fn setup_cpuid(kvm: &Kvm, vcpu_fd: &mut VcpuFd) -> Result<()> {
// Get the supported CPUID from the host machine
let cpuid = kvm.get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;

let entries = cpuid.as_slice();

// https://en.wikipedia.org/wiki/CPUID
// sse: EAX=1, EDX bit 25
if !entries
.get(1)
.map(|entry| entry.edx & (1 << 25) != 0)
.unwrap_or(false)
{
return Err(new_error!("SSE support not detected on the host machine"));
}
// sse2 is EAX=1, EDX bit 26
if !entries
.get(1)
.map(|entry| entry.edx & (1 << 26) != 0)
.unwrap_or(false)
{
return Err(new_error!("SSE2 support not detected on the host machine"));
}
// sse3 is EAX=1, ECX bit 0
if !entries
.get(1)
.map(|entry| entry.ecx & (1 << 0) != 0)
.unwrap_or(false)
{
return Err(new_error!("SSE3 support not detected on the host machine"));
}
// ssse3 is EAX=1, ECX bit 9
if !entries
.get(1)
.map(|entry| entry.ecx & (1 << 9) != 0)
.unwrap_or(false)
{
return Err(new_error!("SSSE3 support not detected on the host machine"));
}
// sse4.1 is EAX=1, ECX bit 19
if !entries
.get(1)
.map(|entry| entry.ecx & (1 << 19) != 0)
.unwrap_or(false)
{
return Err(new_error!(
"SSE4.1 support not detected on the host machine"
));
}
// sse4.2 is EAX=1, ECX bit 20
if !entries
.get(1)
.map(|entry| entry.ecx & (1 << 20) != 0)
.unwrap_or(false)
{
return Err(new_error!(
"SSE4.2 support not detected on the host machine"
));
}
// avx is EAX=1, ECX bit 28
if !entries
.get(1)
.map(|entry| entry.ecx & (1 << 28) != 0)
.unwrap_or(false)
{
return Err(new_error!("AVX support not detected on the host machine"));
}
// avx2 is EAX=7, EBX bit 5
if !entries
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we error on all of these, rather than mirroring the host cpuid & being careful to only configure the features that actually exist on this platform?

Copy link
Contributor Author

@ludfjig ludfjig Jul 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if a guest is compiled with for example +avx, but the host doesn't support it? We won't be able to run it so I thought it'd be better to error early than get InvalidOp exception in guest.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like that might just need to be a "you have to be careful about this when using simd if not using feature detection" thing? (a lot of software nowdays has feature detection for advanced simd anyway). I feel it probably doesn't make sense to statically decide in Hyperlight what simd features will be supported; this both rules out using hyperlight on older hardware & prevents using newer simd features on newer hardware.

.get(7)
.map(|entry| entry.ebx & (1 << 5) != 0)
.unwrap_or(false)
{
return Err(new_error!("AVX2 support not detected on the host machine"));
}

// Set the CPUID for the guest's vCPU to be the same as the host's
vcpu_fd.set_cpuid2(&cpuid)?;
println!("CPUID set successfully for SIMD support");

Ok(())
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/hyperlight_host/src/hypervisor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ cfg_if::cfg_if! {
pub(crate) const CR4_PAE: u64 = 1 << 5;
pub(crate) const CR4_OSFXSR: u64 = 1 << 9;
pub(crate) const CR4_OSXMMEXCPT: u64 = 1 << 10;
pub(crate) const CR4_OSXSAVE: u64 = 1 << 18;
pub(crate) const CR0_PE: u64 = 1;
pub(crate) const CR0_MP: u64 = 1 << 1;
pub(crate) const CR0_ET: u64 = 1 << 4;
Expand All @@ -91,6 +92,11 @@ cfg_if::cfg_if! {
pub(crate) const EFER_LMA: u64 = 1 << 10;
pub(crate) const EFER_SCE: u64 = 1;
pub(crate) const EFER_NX: u64 = 1 << 11;

// XCR0 (Extended Control Register 0) bits for XSAVE features
pub(crate) const XCR0_X87: u64 = 1 << 0; // x87 FPU state
pub(crate) const XCR0_SSE: u64 = 1 << 1; // SSE state (XMM registers)
pub(crate) const XCR0_AVX: u64 = 1 << 2; // AVX state (YMM registers)
}
}

Expand Down
Loading
Loading