From ffe715ab65dcfa5841a58fc76e813dc54e0a1c2c Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 13 Jul 2025 13:26:02 +0200 Subject: [PATCH 1/2] implement `floor` and `ceil` in assembly on `i586` --- libm-test/src/precision.rs | 22 ---------- libm/src/math/arch/i586.rs | 82 ++++++++++++++++++++++++++------------ 2 files changed, 56 insertions(+), 48 deletions(-) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index 32825b15d..3fb8c1b37 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase { impl MaybeOverride<(f64,)> for SpecialCase { fn check_float(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { - if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Ceil - && ctx.basis == CheckBasis::Musl - && input.0 < 0.0 - && input.0 > -1.0 - && expected == F::ZERO - && actual == F::ZERO - { - // musl returns -0.0, we return +0.0 - return XFAIL("i586 ceil signed zero"); - } - if cfg!(x86_no_sse) && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) && (expected - actual).abs() <= F::ONE @@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase { return XFAIL("i586 rint rounding mode"); } - if cfg!(x86_no_sse) - && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) - && expected.eq_repr(F::NEG_ZERO) - && actual.eq_repr(F::ZERO) - { - // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. - // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 - return XFAIL("i586 ceil/floor signed zero"); - } - if cfg!(x86_no_sse) && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) { diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs index f92b9a2af..c849bffa7 100644 --- a/libm/src/math/arch/i586.rs +++ b/libm/src/math/arch/i586.rs @@ -1,37 +1,67 @@ //! Architecture-specific support for x86-32 without SSE2 -use super::super::fabs; - /// Use an alternative implementation on x86, because the /// main implementation fails with the x87 FPU used by /// debian i386, probably due to excess precision issues. -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. -pub fn ceil(x: f64) -> f64 { - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; - } +/// +/// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_ceil.S +/// (written by J.T. Conklin ). +#[unsafe(naked)] +pub extern "C" fn ceil(_: f64) -> f64 { + core::arch::naked_asm!( + "pushl %ebp", + "movl %esp,%ebp", + "subl $8,%esp", + // Store fpu control word. + "fstcw -4(%ebp)", + "movw -4(%ebp),%dx", + // Round towards +oo. + "orw $0x0800,%dx", + "andw $0xfbff,%dx", + "movw %dx,-8(%ebp)", + // Load modified control word + "fldcw -8(%ebp)", + // Round. + "fldl 8(%ebp)", + "frndint", + // Restore original control word. + "fldcw -4(%ebp)", + // Restore esp and ebp and return + "leave", + "ret", + options(att_syntax) + ) } /// Use an alternative implementation on x86, because the /// main implementation fails with the x87 FPU used by /// debian i386, probably due to excess precision issues. -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. -pub fn floor(x: f64) -> f64 { - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; - } +/// +/// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_floor.S +/// (written by J.T. Conklin ). +#[unsafe(naked)] +pub extern "C" fn floor(_: f64) -> f64 { + core::arch::naked_asm!( + "pushl %ebp", + "movl %esp,%ebp", + "subl $8,%esp", + // Store fpu control word. + "fstcw -4(%ebp)", + "movw -4(%ebp),%dx", + // Round towards -oo. + "orw $0x0400,%dx", + "andw $0xf7ff,%dx", + "movw %dx,-8(%ebp)", + // Load modified control word + "fldcw -8(%ebp)", + // Round. + "fldl 8(%ebp)", + "frndint", + // Restore original control word. + "fldcw -4(%ebp)", + // Restore esp and ebp and return + "leave", + "ret", + options(att_syntax) + ) } From 8f43b0b5f568c688c3504f89097221c2d16749b4 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 17 Jul 2025 12:24:32 +0200 Subject: [PATCH 2/2] implement `floor` and `ceil` with inline assembly on `i586` --- libm/src/math/arch/i586.rs | 104 +++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs index c849bffa7..96211f8de 100644 --- a/libm/src/math/arch/i586.rs +++ b/libm/src/math/arch/i586.rs @@ -6,31 +6,33 @@ /// /// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_ceil.S /// (written by J.T. Conklin ). -#[unsafe(naked)] -pub extern "C" fn ceil(_: f64) -> f64 { - core::arch::naked_asm!( - "pushl %ebp", - "movl %esp,%ebp", - "subl $8,%esp", - // Store fpu control word. - "fstcw -4(%ebp)", - "movw -4(%ebp),%dx", - // Round towards +oo. - "orw $0x0800,%dx", - "andw $0xfbff,%dx", - "movw %dx,-8(%ebp)", - // Load modified control word - "fldcw -8(%ebp)", - // Round. - "fldl 8(%ebp)", - "frndint", - // Restore original control word. - "fldcw -4(%ebp)", - // Restore esp and ebp and return - "leave", - "ret", - options(att_syntax) - ) +pub fn ceil(mut x: f64) -> f64 { + // We save and later restore the FPU control word. + let mut cw_stash = core::mem::MaybeUninit::::uninit(); + let mut cw_tmp = core::mem::MaybeUninit::::uninit(); + unsafe { + core::arch::asm!( + "fstcw ({stash_ptr})", // Save the cw + "movw ({stash_ptr}), %dx", // ... + "orw $0x0800, %dx", // Set rounding control to 0b10 (+∞), + "andw $0xfbff, %dx", // preserving other controls + "movw %dx, ({cw_ptr})", // Apply cw + "fldcw ({cw_ptr})", // ... + "fldl ({x_ptr})", // Push x to the stack + "frndint", // Round + "fldcw ({stash_ptr})", // Restore cw + "fstpl ({x_ptr})", // Save rounded x to mem + cw_ptr = in(reg) &mut cw_tmp, + stash_ptr = in(reg) &mut cw_stash, + x_ptr = in(reg) &mut x, + out("dx") _, // Cw scratch + // All the x87 FPU stack is used, all registers must be clobbered + out("st(0)") _, out("st(1)") _, out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, out("st(6)") _, out("st(7)") _, + options(att_syntax) + ) + } + x } /// Use an alternative implementation on x86, because the @@ -39,29 +41,31 @@ pub extern "C" fn ceil(_: f64) -> f64 { /// /// Based on https://github.com/NetBSD/src/blob/trunk/lib/libm/arch/i387/s_floor.S /// (written by J.T. Conklin ). -#[unsafe(naked)] -pub extern "C" fn floor(_: f64) -> f64 { - core::arch::naked_asm!( - "pushl %ebp", - "movl %esp,%ebp", - "subl $8,%esp", - // Store fpu control word. - "fstcw -4(%ebp)", - "movw -4(%ebp),%dx", - // Round towards -oo. - "orw $0x0400,%dx", - "andw $0xf7ff,%dx", - "movw %dx,-8(%ebp)", - // Load modified control word - "fldcw -8(%ebp)", - // Round. - "fldl 8(%ebp)", - "frndint", - // Restore original control word. - "fldcw -4(%ebp)", - // Restore esp and ebp and return - "leave", - "ret", - options(att_syntax) - ) +pub fn floor(mut x: f64) -> f64 { + // We save and later restore the FPU control word. + let mut cw_stash = core::mem::MaybeUninit::::uninit(); + let mut cw_tmp = core::mem::MaybeUninit::::uninit(); + unsafe { + core::arch::asm!( + "fstcw ({stash_ptr})", // Save the cw + "movw ({stash_ptr}), %dx", // ... + "orw $0x0400, %dx", // Set rounding control to 0b01 (-∞), + "andw $0xf7ff, %dx", // preserving other controls + "movw %dx, ({cw_ptr})", // Apply cw + "fldcw ({cw_ptr})", // ... + "fldl ({x_ptr})", // Push x to the stack + "frndint", // Round + "fldcw ({stash_ptr})", // Restore cw + "fstpl ({x_ptr})", // Save rounded x to mem + cw_ptr = in(reg) &mut cw_tmp, + stash_ptr = in(reg) &mut cw_stash, + x_ptr = in(reg) &mut x, + out("dx") _, // Cw scratch + // All the x87 FPU stack is used, all registers must be clobbered + out("st(0)") _, out("st(1)") _, out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, out("st(6)") _, out("st(7)") _, + options(att_syntax) + ) + } + x }