diff --git a/.vscode/settings.json b/.vscode/settings.json index 1ec1edf0e..fede0b6b4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,7 @@ "Cargo.toml", // guest crates for testing, not part of the workspace "src/tests/rust_guests/simpleguest/Cargo.toml", + "src/tests/rust_guests/simdguest/Cargo.toml", "src/tests/rust_guests/callbackguest/Cargo.toml" ] } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 7983d278c..d815d4a6c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1206,7 +1206,6 @@ version = "0.7.0" dependencies = [ "anyhow", "hyperlight-common", - "serde_json", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2c16795c3..064b4a87a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ exclude = [ "src/tests/rust_guests/dummyguest", "src/tests/rust_guests/simpleguest", "src/tests/rust_guests/witguest", + "src/tests/rust_guests/simdguest", ] [workspace.package] diff --git a/Justfile b/Justfile index 66b06a587..291303abc 100644 --- a/Justfile +++ b/Justfile @@ -13,6 +13,7 @@ simpleguest_source := "src/tests/rust_guests/simpleguest/target/x86_64-unknown-n dummyguest_source := "src/tests/rust_guests/dummyguest/target/x86_64-unknown-none" callbackguest_source := "src/tests/rust_guests/callbackguest/target/x86_64-unknown-none" witguest_source := "src/tests/rust_guests/witguest/target/x86_64-unknown-none" +simdguest_source := "src/tests/rust_guests/simdguest/target/x86_64-unknown-none" rust_guests_bin_dir := "src/tests/rust_guests/bin" ################ @@ -43,12 +44,14 @@ build-rust-guests target=default-target: (witguest-wit) cd src/tests/rust_guests/simpleguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} cd src/tests/rust_guests/dummyguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} cd src/tests/rust_guests/witguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} + cd src/tests/rust_guests/simdguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }} @move-rust-guests target=default-target: cp {{ callbackguest_source }}/{{ target }}/callbackguest* {{ rust_guests_bin_dir }}/{{ target }}/ cp {{ simpleguest_source }}/{{ target }}/simpleguest* {{ rust_guests_bin_dir }}/{{ target }}/ cp {{ dummyguest_source }}/{{ target }}/dummyguest* {{ rust_guests_bin_dir }}/{{ target }}/ cp {{ witguest_source }}/{{ target }}/witguest* {{ rust_guests_bin_dir }}/{{ target }}/ + cp {{ simdguest_source }}/{{ target }}/simdguest* {{ rust_guests_bin_dir }}/{{ target }}/ build-and-move-rust-guests: (build-rust-guests "debug") (move-rust-guests "debug") (build-rust-guests "release") (move-rust-guests "release") build-and-move-c-guests: (build-c-guests "debug") (move-c-guests "debug") (build-c-guests "release") (move-c-guests "release") @@ -61,6 +64,7 @@ clean-rust: cd src/tests/rust_guests/dummyguest && cargo clean cd src/tests/rust_guests/callbackguest && cargo clean cd src/tests/rust_guests/witguest && cargo clean + cd src/tests/rust_guests/simdguest && cargo clean cd src/tests/rust_guests/witguest && rm -f interface.wasm git clean -fdx src/tests/c_guests/bin src/tests/rust_guests/bin @@ -149,6 +153,7 @@ fmt-check: cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml -- --check cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml -- --check cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml -- --check + cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml -- --check cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml -- --check check-license-headers: @@ -160,6 +165,7 @@ fmt-apply: cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml + cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml clippy target=default-target: (witguest-wit) @@ -169,6 +175,7 @@ clippy-guests target=default-target: (witguest-wit) cd src/tests/rust_guests/simpleguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings cd src/tests/rust_guests/callbackguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings cd src/tests/rust_guests/witguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings + cd src/tests/rust_guests/simdguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings clippy-apply-fix-unix: cargo clippy --fix --all diff --git a/count_simd_instructions.sh b/count_simd_instructions.sh new file mode 100755 index 000000000..f5a79e8fc --- /dev/null +++ b/count_simd_instructions.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Script to count SIMD instructions in an ELF binary +# Usage: ./count_simd_instructions.sh + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +BINARY="$1" + +if [ ! -f "$BINARY" ]; then + echo "Error: File '$BINARY' not found" + exit 1 +fi + +echo "Analyzing SIMD instructions in: $BINARY" +echo "========================================" + +# Disassemble the binary +DISASM=$(objdump -d "$BINARY" 2>/dev/null) + +if [ $? -ne 0 ]; then + echo "Error: Failed to disassemble binary. Make sure it's a valid ELF file." + exit 1 +fi + +# Count different instruction sets +SSE_COUNT=$(echo "$DISASM" | grep -i -E "\b(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd)\b" | wc -l) + +SSE2_COUNT=$(echo "$DISASM" | grep -i -E "\b(movdqa|movdqu|movq|movd|paddb|paddw|paddd|paddq|psubb|psubw|psubd|psubq|pmullw|pmuludq|pand|pandn|por|pxor|psllw|pslld|psllq|psrlw|psrld|psrlq|psraw|psrad|packsswb|packssdw|packuswb|punpckhbw|punpckhwd|punpckhdq|punpckhqdq|punpcklbw|punpcklwd|punpckldq|punpcklqdq|pcmpeqb|pcmpeqw|pcmpeqd|pcmpgtb|pcmpgtw|pcmpgtd|pmaxub|pmaxsw|pminub|pminsw|psadbw|pavgb|pavgw)\b" | wc -l) + +SSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(addsubpd|addsubps|haddpd|haddps|hsubpd|hsubps|movddup|movshdup|movsldup|lddqu)\b" | wc -l) + +SSSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(pabsb|pabsw|pabsd|palignr|phaddb|phaddw|phaddd|phaddsw|phsubb|phsubw|phsubd|phsubsw|pmaddubsw|pmulhrsw|pshufb|psignb|psignw|psignd)\b" | wc -l) + +SSE41_COUNT=$(echo "$DISASM" | grep -i -E "\b(blendpd|blendps|blendvpd|blendvps|dppd|dpps|extractps|insertps|movntdqa|mpsadbw|packusdw|pblendvb|pblendw|pcmpeqq|pextrb|pextrd|pextrq|pextrw|phminposuw|pinsrb|pinsrd|pinsrq|pmaxsb|pmaxsd|pmaxud|pmaxuw|pminsb|pminsd|pminud|pminuw|pmovsxbw|pmovsxbd|pmovsxbq|pmovsxwd|pmovsxwq|pmovsxdq|pmovzxbw|pmovzxbd|pmovzxbq|pmovzxwd|pmovzxwq|pmovzxdq|pmuldq|pmulld|ptest|roundpd|roundps|roundsd|roundss)\b" | wc -l) + +SSE42_COUNT=$(echo "$DISASM" | grep -i -E "\b(crc32|pcmpestri|pcmpestrm|pcmpistri|pcmpistrm|pcmpgtq)\b" | wc -l) + +AVX_COUNT=$(echo "$DISASM" | grep -i -E "\bv(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd|movaps|movapd|movups|movupd|movlps|movlpd|movhps|movhpd|movlhps|movhlps|unpcklps|unpcklpd|unpckhps|unpckhpd|addps|addpd|subps|subpd|mulps|mulpd|divps|divpd|sqrtps|sqrtpd|maxps|maxpd|minps|minpd|cmpps|cmppd|andps|andpd|andnps|andnpd|orps|orpd|xorps|xorpd|shufps|shufpd|blendps|blendpd|blendvps|blendvpd|dpps|dppd|roundps|roundpd|roundss|roundsd|insertf128|extractf128|broadcast|permute|maskload|maskstore|testc|testz|testnzc)\b" | wc -l) + +AVX2_COUNT=$(echo "$DISASM" | grep -i -E "\bv(pabs|padd|psub|pmul|pand|pandn|por|pxor|psll|psrl|psra|ppack|punpck|pcmp|pmax|pmin|psad|pavg|pblend|pbroadcast|perm|pgather|pinsert|pextract|pmovsx|pmovzx|psign|pshuf|palign|pmadd|pmaddubs|phsub|phadd)\b" | wc -l) + +AVX512_COUNT=$(echo "$DISASM" | grep -i -E "\b(evex|zmm|k[0-7])\b|\bv.*\{.*\}\b" | wc -l) + +echo "SSE instructions: $SSE_COUNT" +echo "SSE2 instructions: $SSE2_COUNT" +echo "SSE3 instructions: $SSE3_COUNT" +echo "SSSE3 instructions: $SSSE3_COUNT" +echo "SSE4.1 instructions: $SSE41_COUNT" +echo "SSE4.2 instructions: $SSE42_COUNT" +echo "AVX instructions: $AVX_COUNT" +echo "AVX2 instructions: $AVX2_COUNT" +echo "AVX-512 instructions: $AVX512_COUNT" +echo "========================================" + +TOTAL=$((SSE_COUNT + SSE2_COUNT + SSE3_COUNT + SSSE3_COUNT + SSE41_COUNT + SSE42_COUNT + AVX_COUNT + AVX2_COUNT + AVX512_COUNT)) +echo "Total SIMD instructions: $TOTAL" diff --git a/src/hyperlight_guest/Cargo.toml b/src/hyperlight_guest/Cargo.toml index 8fce35293..ad6e435d2 100644 --- a/src/hyperlight_guest/Cargo.toml +++ b/src/hyperlight_guest/Cargo.toml @@ -13,5 +13,5 @@ Provides only the essential building blocks for interacting with the host enviro [dependencies] anyhow = { version = "1.0.98", default-features = false } -serde_json = { version = "1.0", default-features = false, features = ["alloc"] } +# serde_json = { version = "1.0", default-features = false, features = ["alloc"] } hyperlight-common = { workspace = true } diff --git a/src/hyperlight_guest/src/error.rs b/src/hyperlight_guest/src/error.rs index db7e01924..8525afac7 100644 --- a/src/hyperlight_guest/src/error.rs +++ b/src/hyperlight_guest/src/error.rs @@ -17,8 +17,8 @@ limitations under the License. use alloc::format; use alloc::string::String; +use anyhow; use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; -use {anyhow, serde_json}; pub type Result = core::result::Result; @@ -42,12 +42,3 @@ impl From for HyperlightGuestError { } } } - -impl From for HyperlightGuestError { - fn from(error: serde_json::Error) -> Self { - Self { - kind: ErrorCode::GuestError, - message: format!("Error: {:?}", error), - } - } -} diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index d85a6a838..8df7b887a 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use std::sync::Mutex; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region}; +use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region, kvm_xcrs}; use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use log::LevelFilter; @@ -37,8 +37,8 @@ use super::handlers::DbgMemAccessHandlerWrapper; use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; #[cfg(feature = "init-paging")] use super::{ - CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, - EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, + CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, + CR4_OSXSAVE, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, XCR0_AVX, XCR0_SSE, XCR0_X87, }; use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU}; #[cfg(gdb)] @@ -336,6 +336,7 @@ impl KVMDriver { })?; let mut vcpu_fd = vm_fd.create_vcpu(0)?; + Self::setup_cpuid(&kvm, &mut vcpu_fd)?; Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?; #[cfg(gdb)] @@ -409,7 +410,7 @@ impl KVMDriver { cfg_if::cfg_if! { if #[cfg(feature = "init-paging")] { sregs.cr3 = _pml4_addr; - sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; + sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_OSXSAVE; sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP; sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX; sregs.cs.l = 1; // required for 64-bit mode @@ -419,6 +420,120 @@ impl KVMDriver { } } vcpu_fd.set_sregs(&sregs)?; + + // Setup XCR0 (Extended Control Register 0) to enable SIMD features + // This is required for AVX and other SIMD instruction support + // Only set XCR0 if the init-paging feature is enabled + cfg_if::cfg_if! { + if #[cfg(feature = "init-paging")] { + // Create a properly initialized kvm_xcrs structure + let mut xcrs: kvm_xcrs = unsafe { std::mem::zeroed() }; + + // Set XCR0 to enable x87 FPU (required), SSE, and AVX + // XCR0 bit 0 (x87) must always be set for any XSAVE features + xcrs.xcrs[0].xcr = 0; // XCR0 register number + xcrs.xcrs[0].value = XCR0_X87 | XCR0_SSE | XCR0_AVX; + xcrs.nr_xcrs = 1; + + println!("Setting XCRs: XCR0={:#x}, nr_xcrs={}", xcrs.xcrs[0].value, xcrs.nr_xcrs); + + match vcpu_fd.set_xcrs(&xcrs) { + Ok(_) => { + println!("Successfully set XCR0 to enable SIMD features: {:#x}", xcrs.xcrs[0].value); + }, + Err(e) => { + println!("Failed to set XCRs (XCR0) for SIMD support: {:?}", e); + } + } + } + } + + Ok(()) + } + + /// Setup the CPUID for the vCPU to enable SIMD features. + /// This is done by just mirroring the host's CPUID in the guest. + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + fn setup_cpuid(kvm: &Kvm, vcpu_fd: &mut VcpuFd) -> Result<()> { + // Get the supported CPUID from the host machine + let cpuid = kvm.get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; + + let entries = cpuid.as_slice(); + + // https://en.wikipedia.org/wiki/CPUID + // sse: EAX=1, EDX bit 25 + if !entries + .get(1) + .map(|entry| entry.edx & (1 << 25) != 0) + .unwrap_or(false) + { + return Err(new_error!("SSE support not detected on the host machine")); + } + // sse2 is EAX=1, EDX bit 26 + if !entries + .get(1) + .map(|entry| entry.edx & (1 << 26) != 0) + .unwrap_or(false) + { + return Err(new_error!("SSE2 support not detected on the host machine")); + } + // sse3 is EAX=1, ECX bit 0 + if !entries + .get(1) + .map(|entry| entry.ecx & (1 << 0) != 0) + .unwrap_or(false) + { + return Err(new_error!("SSE3 support not detected on the host machine")); + } + // ssse3 is EAX=1, ECX bit 9 + if !entries + .get(1) + .map(|entry| entry.ecx & (1 << 9) != 0) + .unwrap_or(false) + { + return Err(new_error!("SSSE3 support not detected on the host machine")); + } + // sse4.1 is EAX=1, ECX bit 19 + if !entries + .get(1) + .map(|entry| entry.ecx & (1 << 19) != 0) + .unwrap_or(false) + { + return Err(new_error!( + "SSE4.1 support not detected on the host machine" + )); + } + // sse4.2 is EAX=1, ECX bit 20 + if !entries + .get(1) + .map(|entry| entry.ecx & (1 << 20) != 0) + .unwrap_or(false) + { + return Err(new_error!( + "SSE4.2 support not detected on the host machine" + )); + } + // avx is EAX=1, ECX bit 28 + if !entries + .get(1) + .map(|entry| entry.ecx & (1 << 28) != 0) + .unwrap_or(false) + { + return Err(new_error!("AVX support not detected on the host machine")); + } + // avx2 is EAX=7, EBX bit 5 + if !entries + .get(7) + .map(|entry| entry.ebx & (1 << 5) != 0) + .unwrap_or(false) + { + return Err(new_error!("AVX2 support not detected on the host machine")); + } + + // Set the CPUID for the guest's vCPU to be the same as the host's + vcpu_fd.set_cpuid2(&cpuid)?; + println!("CPUID set successfully for SIMD support"); + Ok(()) } } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 0a31ee468..b4aa72fe4 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -80,6 +80,7 @@ cfg_if::cfg_if! { pub(crate) const CR4_PAE: u64 = 1 << 5; pub(crate) const CR4_OSFXSR: u64 = 1 << 9; pub(crate) const CR4_OSXMMEXCPT: u64 = 1 << 10; + pub(crate) const CR4_OSXSAVE: u64 = 1 << 18; pub(crate) const CR0_PE: u64 = 1; pub(crate) const CR0_MP: u64 = 1 << 1; pub(crate) const CR0_ET: u64 = 1 << 4; @@ -91,6 +92,11 @@ cfg_if::cfg_if! { pub(crate) const EFER_LMA: u64 = 1 << 10; pub(crate) const EFER_SCE: u64 = 1; pub(crate) const EFER_NX: u64 = 1 << 11; + + // XCR0 (Extended Control Register 0) bits for XSAVE features + pub(crate) const XCR0_X87: u64 = 1 << 0; // x87 FPU state + pub(crate) const XCR0_SSE: u64 = 1 << 1; // SSE state (XMM registers) + pub(crate) const XCR0_AVX: u64 = 1 << 2; // AVX state (YMM registers) } } diff --git a/src/hyperlight_host/tests/simd_test.rs b/src/hyperlight_host/tests/simd_test.rs new file mode 100644 index 000000000..33c5201fd --- /dev/null +++ b/src/hyperlight_host/tests/simd_test.rs @@ -0,0 +1,113 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#![allow(clippy::disallowed_macros)] + +use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; +use hyperlight_host::sandbox_state::transition::Noop; +use hyperlight_host::{GuestBinary, MultiUseSandbox, UninitializedSandbox}; +use hyperlight_testing::simdguest_as_string; + +/// Helper function to create a sandbox for SIMD tests +fn create_simd_sandbox() -> MultiUseSandbox { + UninitializedSandbox::new( + GuestBinary::FilePath(simdguest_as_string().expect("simdguest binary missing")), + None, + ) + .unwrap() + .evolve(Noop::default()) + .unwrap() +} + +/// Test SSE (Streaming SIMD Extensions) feature +#[test] +fn sse_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_sse", ()) + .expect("test_sse should succeed"); + assert!(result, "SSE feature should return true"); +} + +/// Test SSE2 (Streaming SIMD Extensions 2) feature +#[test] +fn sse2_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_sse2", ()) + .expect("test_sse2 should succeed"); + assert!(result, "SSE2 feature should return true"); +} + +/// Test SSE3 (Streaming SIMD Extensions 3) feature +#[test] +fn sse3_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_sse3", ()) + .expect("test_sse3 should succeed"); + assert!(result, "SSE3 feature should return true"); +} + +/// Test SSSE3 (Supplemental Streaming SIMD Extensions 3) feature +#[test] +fn ssse3_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_ssse3", ()) + .expect("test_ssse3 should succeed"); + assert!(result, "SSSE3 feature should return true"); +} + +/// Test SSE4.1 (Streaming SIMD Extensions 4.1) feature +#[test] +fn sse4_1_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_sse4_1", ()) + .expect("test_sse4_1 should succeed"); + assert!(result, "SSE4.1 feature should return true"); +} + +/// Test SSE4.2 (Streaming SIMD Extensions 4.2) feature +#[test] +fn sse4_2_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_sse4_2", ()) + .expect("test_sse4_2 should succeed"); + assert!(result, "SSE4.2 feature should return true"); +} + +/// Test AVX (Advanced Vector Extensions) feature +#[test] +fn avx_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_avx", ()) + .expect("test_avx should succeed"); + assert!(result, "AVX feature should return true"); +} + +/// Test AVX2 (Advanced Vector Extensions 2) feature +#[test] +fn avx2_feature() { + let mut sbox = create_simd_sandbox(); + let result = sbox + .call_guest_function_by_name::("test_avx2", ()) + .expect("test_avx2 should succeed"); + assert!(result, "AVX2 feature should return true"); +} diff --git a/src/hyperlight_testing/src/lib.rs b/src/hyperlight_testing/src/lib.rs index eb48e203a..a62c4c398 100644 --- a/src/hyperlight_testing/src/lib.rs +++ b/src/hyperlight_testing/src/lib.rs @@ -96,6 +96,14 @@ pub fn dummy_guest_as_string() -> Result { .ok_or_else(|| anyhow!("couldn't convert dummy guest PathBuf to string")) } +/// Get a fully qualified OS-specific path to the simdguest elf binary +pub fn simdguest_as_string() -> Result { + let buf = rust_guest_as_pathbuf("simdguest"); + buf.to_str() + .map(|s| s.to_string()) + .ok_or_else(|| anyhow!("couldn't convert simd guest PathBuf to string")) +} + pub fn c_guest_as_pathbuf(guest: &str) -> PathBuf { let build_dir_selector = if cfg!(debug_assertions) { "debug" diff --git a/src/tests/rust_guests/callbackguest/Cargo.lock b/src/tests/rust_guests/callbackguest/Cargo.lock index 82979afeb..cae114391 100644 --- a/src/tests/rust_guests/callbackguest/Cargo.lock +++ b/src/tests/rust_guests/callbackguest/Cargo.lock @@ -85,7 +85,6 @@ version = "0.7.0" dependencies = [ "anyhow", "hyperlight-common", - "serde_json", ] [[package]] @@ -102,12 +101,6 @@ dependencies = [ "spin 0.10.0", ] -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - [[package]] name = "lock_api" version = "0.4.12" @@ -124,30 +117,6 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "proc-macro2" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - [[package]] name = "rustc_version" version = "0.4.1" @@ -157,12 +126,6 @@ dependencies = [ "semver", ] -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "scopeguard" version = "1.2.0" @@ -175,38 +138,6 @@ version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - [[package]] name = "shlex" version = "1.3.0" @@ -230,20 +161,3 @@ checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" dependencies = [ "lock_api", ] - -[[package]] -name = "syn" -version = "2.0.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" diff --git a/src/tests/rust_guests/simdguest/.cargo/config.toml b/src/tests/rust_guests/simdguest/.cargo/config.toml new file mode 100644 index 000000000..eddd04608 --- /dev/null +++ b/src/tests/rust_guests/simdguest/.cargo/config.toml @@ -0,0 +1,30 @@ +[build] +target = "x86_64-unknown-none" + +[target.x86_64-unknown-none] +rustflags = [ + "-C", + "code-model=small", + "-C", + "link-args=-e entrypoint", + "-C", + "target-feature=-soft-float,+avx2", + # Note. Setting soft-float via target-features is deprecated due to possibly introducing ABI-unsoundness, but we need it if we want to use x86_64-unknown-none as a target. + # In the future if this breaks, we can create a custom target that does not deafult to +soft-float. + # + # Note: on x86_64, avx2 also implies: + # - avx + # - sse4.2 + # - sse4.1 + # - ssse3 + # - sse3 + # - sse2 + # - sse +] +linker = "rust-lld" + +[profile.release] +panic = "abort" + +[profile.dev] +panic = "abort" diff --git a/src/tests/rust_guests/simdguest/Cargo.lock b/src/tests/rust_guests/simdguest/Cargo.lock new file mode 100644 index 000000000..c2e6021ed --- /dev/null +++ b/src/tests/rust_guests/simdguest/Cargo.lock @@ -0,0 +1,164 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "buddy_system_allocator" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0108968a3a2dab95b089c0fc3f1afa7759aa5ebe6f1d86d206d6f7ba726eb" +dependencies = [ + "spin 0.9.8", +] + +[[package]] +name = "cc" +version = "1.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "glob" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" + +[[package]] +name = "hyperlight-common" +version = "0.7.0" +dependencies = [ + "anyhow", + "flatbuffers", + "log", + "spin 0.10.0", +] + +[[package]] +name = "hyperlight-guest" +version = "0.7.0" +dependencies = [ + "anyhow", + "hyperlight-common", +] + +[[package]] +name = "hyperlight-guest-bin" +version = "0.7.0" +dependencies = [ + "buddy_system_allocator", + "cc", + "cfg-if", + "glob", + "hyperlight-common", + "hyperlight-guest", + "log", + "spin 0.10.0", +] + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdguest" +version = "0.4.0" +dependencies = [ + "hyperlight-common", + "hyperlight-guest", + "hyperlight-guest-bin", + "log", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" +dependencies = [ + "lock_api", +] diff --git a/src/tests/rust_guests/simdguest/Cargo.toml b/src/tests/rust_guests/simdguest/Cargo.toml new file mode 100644 index 000000000..51bb7b0d4 --- /dev/null +++ b/src/tests/rust_guests/simdguest/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "simdguest" +version = "0.4.0" +edition = "2021" + +[dependencies] +hyperlight-guest = { path = "../../../hyperlight_guest" } +hyperlight-guest-bin = { path = "../../../hyperlight_guest_bin" } +hyperlight-common = { path = "../../../hyperlight_common", default-features = false } +log = {version = "0.4", default-features = false } diff --git a/src/tests/rust_guests/simdguest/src/main.rs b/src/tests/rust_guests/simdguest/src/main.rs new file mode 100644 index 000000000..95ce4b44c --- /dev/null +++ b/src/tests/rust_guests/simdguest/src/main.rs @@ -0,0 +1,494 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#![no_std] +#![no_main] + +extern crate alloc; + +use alloc::string::ToString; +use alloc::vec::Vec; + +use hyperlight_common::flatbuffer_wrappers::function_call::FunctionCall; +use hyperlight_common::flatbuffer_wrappers::function_types::ReturnType; +use hyperlight_common::flatbuffer_wrappers::util::get_flatbuffer_result; +use hyperlight_guest::error::Result; +use hyperlight_guest_bin::guest_function::definition::GuestFunctionDefinition; +use hyperlight_guest_bin::guest_function::register::register_function; + +// SSE - Base Streaming SIMD Extensions +fn test_sse(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "sse")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - addss (add scalar single precision) + let mut asm_result: f32 = 1.0; + unsafe { + core::arch::asm!( + "addss {result}, {input}", + result = inout(xmm_reg) asm_result, + input = in(xmm_reg) 2.0f32, + ); + } + + // Intrinsic equivalent - _mm_add_ss + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm_set_ss(1.0); + let b = _mm_set_ss(2.0); + let result = _mm_add_ss(a, b); + _mm_cvtss_f32(result) + }; + + // Both should result in 3.0, return true if both are correct + let asm_ok = (asm_result - 3.0).abs() < f32::EPSILON; + let intrinsic_ok = (intrinsic_result - 3.0).abs() < f32::EPSILON; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "sse"))] + { + Ok(get_flatbuffer_result(false)) // SSE feature not enabled + } +} + +// SSE2 - Streaming SIMD Extensions 2 +fn test_sse2(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "sse2")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - addsd (add scalar double precision) + let mut asm_result: f64 = 1.0; + unsafe { + core::arch::asm!( + "addsd {result}, {input}", + result = inout(xmm_reg) asm_result, + input = in(xmm_reg) 2.0f64, + ); + } + + // Intrinsic equivalent - _mm_add_sd + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm_set_sd(1.0); + let b = _mm_set_sd(2.0); + let result = _mm_add_sd(a, b); + _mm_cvtsd_f64(result) + }; + + // Both should result in 3.0, return true if both are correct + let asm_ok = (asm_result - 3.0).abs() < f64::EPSILON; + let intrinsic_ok = (intrinsic_result - 3.0).abs() < f64::EPSILON; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "sse2"))] + { + Ok(get_flatbuffer_result(false)) // SSE2 feature not enabled + } +} + +// SSE3 - Streaming SIMD Extensions 3 +fn test_sse3(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "sse3")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - haddps (horizontal add packed single precision) + let val_array = [1.0f32, 2.0f32, 3.0f32, 4.0f32]; + let asm_result: i32; + unsafe { + core::arch::asm!( + "movups {tmp}, [{val_ptr}]", // Load [1.0, 2.0, 3.0, 4.0] + "haddps {tmp}, {tmp}", // SSE3 horizontal add: [3.0, 7.0, 3.0, 7.0] + "movd {result:e}, {tmp}", // Extract first element as int32 + val_ptr = in(reg) val_array.as_ptr(), + tmp = out(xmm_reg) _, + result = out(reg) asm_result, + ); + } + + // Intrinsic equivalent - _mm_hadd_ps + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm_set_ps(4.0, 3.0, 2.0, 1.0); // Reversed due to little-endian + let result = _mm_hadd_ps(a, a); + _mm_cvtss_f32(result) + }; + + // Check if both results make sense (3.0 as expected from 1.0 + 2.0) + let asm_result_float = f32::from_bits(asm_result as u32); + let asm_ok = (asm_result_float - 3.0).abs() < f32::EPSILON; + let intrinsic_ok = (intrinsic_result - 3.0).abs() < f32::EPSILON; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "sse3"))] + { + Ok(get_flatbuffer_result(false)) // SSE3 feature not enabled + } +} + +// SSSE3 - Supplemental Streaming SIMD Extensions 3 +fn test_ssse3(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "ssse3")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - pabsb (packed absolute value of bytes) + let input_data = [ + -1i8, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, + ]; + let asm_result: i32; + unsafe { + core::arch::asm!( + "movdqu {tmp}, [{input_ptr}]", // Load input vector from memory + "pabsb {tmp}, {tmp}", // SSSE3 absolute value of packed bytes + "pextrb {result:e}, {tmp}, 15", // Extract byte 15 (should be 16) + input_ptr = in(reg) input_data.as_ptr(), + tmp = out(xmm_reg) _, + result = out(reg) asm_result, + ); + } + + // Intrinsic equivalent - _mm_abs_epi8 + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let input = _mm_set_epi8( + 16, -15, 14, -13, 12, -11, 10, -9, 8, -7, 6, -5, 4, -3, 2, -1, + ); + let abs_result = _mm_abs_epi8(input); + _mm_extract_epi8(abs_result, 0) as u8 // Extract first byte (abs(-1) = 1) + }; + + // Check both results + let asm_ok = (asm_result & 0xFF) == 16; // abs(-16) = 16 from byte 15 + let intrinsic_ok = intrinsic_result == 1; // abs(-1) = 1 from byte 0 + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "ssse3"))] + { + Ok(get_flatbuffer_result(false)) // SSSE3 feature not enabled + } +} + +// SSE4.1 - Streaming SIMD Extensions 4.1 +fn test_sse4_1(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "sse4.1")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - pblendvb (variable blend packed bytes) + let val_a = [1i8; 16]; // All 1s + let val_b = [2i8; 16]; // All 2s + let val_mask = [-1i8; 16]; // All 0xFF (select from b) + let asm_result: i32; + unsafe { + core::arch::asm!( + "movdqu {a}, [{val_a_ptr}]", // Load vector of 1s + "movdqu {b}, [{val_b_ptr}]", // Load vector of 2s + "movdqu xmm0, [{val_mask_ptr}]", // Load mask into xmm0 (pblendvb implicit operand) + "pblendvb {a}, {b}", // SSE4.1 blend: xmm0=mask, a=src1, b=src2 + "pextrb {result:e}, {a}, 0", // Extract first byte + val_a_ptr = in(reg) val_a.as_ptr(), + val_b_ptr = in(reg) val_b.as_ptr(), + val_mask_ptr = in(reg) val_mask.as_ptr(), + a = out(xmm_reg) _, + b = out(xmm_reg) _, + result = out(reg) asm_result, + out("xmm0") _, // xmm0 is clobbered + ); + } + + // Intrinsic equivalent - _mm_blendv_epi8 + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm_set1_epi8(1); // All 1s + let b = _mm_set1_epi8(2); // All 2s + let mask = _mm_set1_epi8(-1); // All 0xFF (select from b) + let blended = _mm_blendv_epi8(a, b, mask); + _mm_extract_epi8(blended, 0) as u8 + }; + + // Both should result in 2 (blend selects b), return true if both are correct + let asm_ok = (asm_result & 0xFF) == 2; + let intrinsic_ok = intrinsic_result == 2; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "sse4.1"))] + { + Ok(get_flatbuffer_result(false)) // SSE4.1 feature not enabled + } +} + +// SSE4.2 - Streaming SIMD Extensions 4.2 +fn test_sse4_2(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "sse4.2")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - pcmpgtq (compare packed 64-bit integers) + let val_a = [1i64, 3i64]; // [1, 3] as 64-bit values + let val_b = [0i64, 2i64]; // [0, 2] as 64-bit values + let asm_result: u64; + unsafe { + core::arch::asm!( + "movdqu {a}, [{val_a_ptr}]", // Load [1, 3] + "movdqu {b}, [{val_b_ptr}]", // Load [0, 2] + "pcmpgtq {a}, {b}", // SSE4.2 compare: a > b + "pextrq {result}, {a}, 0", // Extract first 64-bit element + val_a_ptr = in(reg) val_a.as_ptr(), + val_b_ptr = in(reg) val_b.as_ptr(), + a = out(xmm_reg) _, + b = out(xmm_reg) _, + result = out(reg) asm_result, + ); + } + + // Intrinsic equivalent - _mm_cmpgt_epi64 + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm_set_epi64x(3, 1); // [1, 3] (reversed due to little-endian) + let b = _mm_set_epi64x(2, 0); // [0, 2] + let cmp_result = _mm_cmpgt_epi64(a, b); + _mm_extract_epi64(cmp_result, 0) as u64 + }; + + // Both should result in all bits set (0xFFFFFFFFFFFFFFFF), return true if both are correct + let asm_ok = asm_result == u64::MAX; + let intrinsic_ok = intrinsic_result == u64::MAX; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "sse4.2"))] + { + Ok(get_flatbuffer_result(false)) // SSE4.2 feature not enabled + } +} + +// AVX - Advanced Vector Extensions +fn test_avx(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "avx")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - vaddps (add packed single precision) + let val1 = 1.0f32; + let val2 = 2.0f32; + let asm_result: i32; + unsafe { + core::arch::asm!( + "vmovd {xmm1}, {val1:e}", // Load 1.0 into xmm register + "vbroadcastss {input1}, {xmm1}", // Broadcast to all 8 elements of ymm + "vmovd {xmm2}, {val2:e}", // Load 2.0 into xmm register + "vbroadcastss {input2}, {xmm2}", // Broadcast to all 8 elements of ymm + "vaddps {input1}, {input1}, {input2}", // AVX addition: [3.0; 8] + "vextractf128 {xmm_result}, {input1}, 0", // Extract lower 128 bits + "vmovd {result:e}, {xmm_result}", // Extract first element as int32 + val1 = in(reg) val1.to_bits(), + val2 = in(reg) val2.to_bits(), + xmm1 = out(xmm_reg) _, + xmm2 = out(xmm_reg) _, + xmm_result = out(xmm_reg) _, + input1 = out(ymm_reg) _, + input2 = out(ymm_reg) _, + result = out(reg) asm_result, + ); + } + + // Intrinsic equivalent - _mm256_add_ps + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm256_set1_ps(1.0); // Broadcast 1.0 to all 8 elements + let b = _mm256_set1_ps(2.0); // Broadcast 2.0 to all 8 elements + let result = _mm256_add_ps(a, b); // AVX addition: [3.0; 8] + let extracted = _mm256_extractf128_ps(result, 0); // Extract lower 128 bits + _mm_cvtss_f32(extracted) // Extract first element + }; + + // Check if both results make sense (3.0 as expected) + let asm_result_float = f32::from_bits(asm_result as u32); + let asm_ok = (asm_result_float - 3.0).abs() < f32::EPSILON; + let intrinsic_ok = (intrinsic_result - 3.0).abs() < f32::EPSILON; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "avx"))] + { + Ok(get_flatbuffer_result(false)) // AVX feature not enabled + } +} + +// AVX2 - Advanced Vector Extensions 2 +fn test_avx2(_function_call: &FunctionCall) -> Result> { + #[cfg(target_feature = "avx2")] + { + // Test with both raw assembly and intrinsics + + // Raw assembly - vpaddq (add packed 64-bit integers) + let val1 = 1i64; + let val2 = 2i64; + let asm_result: i64; + unsafe { + core::arch::asm!( + "vmovq {xmm1}, {val1}", // Load 1 into xmm register + "vpbroadcastq {input1}, {xmm1}", // Broadcast to all 4 elements of ymm + "vmovq {xmm2}, {val2}", // Load 2 into xmm register + "vpbroadcastq {input2}, {xmm2}", // Broadcast to all 4 elements of ymm + "vpaddq {input1}, {input1}, {input2}", // AVX2 addition: [3; 4] + "vextracti128 {xmm_result}, {input1}, 0", // Extract lower 128 bits + "vmovq {result}, {xmm_result}", // Extract first 64-bit element + val1 = in(reg) val1, + val2 = in(reg) val2, + xmm1 = out(xmm_reg) _, + xmm2 = out(xmm_reg) _, + xmm_result = out(xmm_reg) _, + input1 = out(ymm_reg) _, + input2 = out(ymm_reg) _, + result = out(reg) asm_result, + ); + } + + // Intrinsic equivalent - _mm256_add_epi64 + let intrinsic_result = unsafe { + use core::arch::x86_64::*; + let a = _mm256_set1_epi64x(1); // Broadcast 1 to all 4 elements + let b = _mm256_set1_epi64x(2); // Broadcast 2 to all 4 elements + let result = _mm256_add_epi64(a, b); // AVX2 addition: [3; 4] + let extracted = _mm256_extracti128_si256(result, 0); // Extract lower 128 bits + _mm_extract_epi64(extracted, 0) // Extract first 64-bit element + }; + + // Both should result in 3, return true if both are correct + let asm_ok = asm_result == 3; + let intrinsic_ok = intrinsic_result == 3; + + if asm_ok && intrinsic_ok { + Ok(get_flatbuffer_result(true)) + } else { + Ok(get_flatbuffer_result(false)) + } + } + #[cfg(not(target_feature = "avx2"))] + { + Ok(get_flatbuffer_result(false)) // AVX2 feature not enabled + } +} + +#[no_mangle] +pub extern "C" fn hyperlight_main() { + // Register individual test functions in order + let test_sse_def = GuestFunctionDefinition::new( + "test_sse".to_string(), + Vec::new(), + ReturnType::Bool, + test_sse as usize, + ); + register_function(test_sse_def); + + let test_sse2_def = GuestFunctionDefinition::new( + "test_sse2".to_string(), + Vec::new(), + ReturnType::Bool, + test_sse2 as usize, + ); + register_function(test_sse2_def); + + let test_sse3_def = GuestFunctionDefinition::new( + "test_sse3".to_string(), + Vec::new(), + ReturnType::Bool, + test_sse3 as usize, + ); + register_function(test_sse3_def); + + let test_ssse3_def = GuestFunctionDefinition::new( + "test_ssse3".to_string(), + Vec::new(), + ReturnType::Bool, + test_ssse3 as usize, + ); + register_function(test_ssse3_def); + + let test_sse4_1_def = GuestFunctionDefinition::new( + "test_sse4_1".to_string(), + Vec::new(), + ReturnType::Bool, + test_sse4_1 as usize, + ); + register_function(test_sse4_1_def); + + let test_sse4_2_def = GuestFunctionDefinition::new( + "test_sse4_2".to_string(), + Vec::new(), + ReturnType::Bool, + test_sse4_2 as usize, + ); + register_function(test_sse4_2_def); + + let test_avx_def = GuestFunctionDefinition::new( + "test_avx".to_string(), + Vec::new(), + ReturnType::Bool, + test_avx as usize, + ); + register_function(test_avx_def); + + let test_avx2_def = GuestFunctionDefinition::new( + "test_avx2".to_string(), + Vec::new(), + ReturnType::Bool, + test_avx2 as usize, + ); + register_function(test_avx2_def); +} + +#[no_mangle] +pub fn guest_dispatch_function() { + // Simple dispatch - not used in this test +} diff --git a/src/tests/rust_guests/simpleguest/Cargo.lock b/src/tests/rust_guests/simpleguest/Cargo.lock index b30d4ef5e..c3c68b208 100644 --- a/src/tests/rust_guests/simpleguest/Cargo.lock +++ b/src/tests/rust_guests/simpleguest/Cargo.lock @@ -76,7 +76,6 @@ version = "0.7.0" dependencies = [ "anyhow", "hyperlight-common", - "serde_json", ] [[package]] @@ -93,12 +92,6 @@ dependencies = [ "spin 0.10.0", ] -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - [[package]] name = "lock_api" version = "0.4.12" @@ -115,30 +108,6 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "proc-macro2" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - [[package]] name = "rustc_version" version = "0.4.1" @@ -148,12 +117,6 @@ dependencies = [ "semver", ] -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "scopeguard" version = "1.2.0" @@ -166,38 +129,6 @@ version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - [[package]] name = "shlex" version = "1.3.0" @@ -231,20 +162,3 @@ checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" dependencies = [ "lock_api", ] - -[[package]] -name = "syn" -version = "2.0.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" diff --git a/src/tests/rust_guests/witguest/Cargo.lock b/src/tests/rust_guests/witguest/Cargo.lock index a1edc4b10..3e4ed40bb 100644 --- a/src/tests/rust_guests/witguest/Cargo.lock +++ b/src/tests/rust_guests/witguest/Cargo.lock @@ -219,7 +219,6 @@ version = "0.7.0" dependencies = [ "anyhow", "hyperlight-common", - "serde_json", ] [[package]] @@ -262,12 +261,6 @@ dependencies = [ "either", ] -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - [[package]] name = "jiff" version = "0.2.14" @@ -401,12 +394,6 @@ dependencies = [ "semver", ] -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "scopeguard" version = "1.2.0" @@ -439,18 +426,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - [[package]] name = "shlex" version = "1.3.0"