From 28e85bf45dfa5fd01193b1e6a5fb3182e611c8d7 Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Sun, 14 Apr 2024 08:05:18 +0100
Subject: [PATCH] Add builtins for `f16`/`f128` float conversions

---
 README.md           | 59 ++++++++++++++++++++++++---------------------
 build.rs            | 13 ----------
 src/float/extend.rs | 31 ++++++++++++++++++++++++
 src/float/mod.rs    | 17 ++++++++++++-
 src/float/trunc.rs  | 38 +++++++++++++++++++++++++++++
 src/lib.rs          |  2 ++
 6 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/README.md b/README.md
index ffef4e52..2e7a8997 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,6 @@ features = ["c"]
 - [x] divmodsi4.c
 - [x] divsf3.c
 - [x] divsi3.c
-- [ ] extendhfsf2.c
 - [x] extendsfdf2.c
 - [x] fixdfdi.c
 - [x] fixdfsi.c
@@ -181,9 +180,7 @@ features = ["c"]
 - [x] powisf2.c
 - [x] subdf3.c
 - [x] subsf3.c
-- [ ] truncdfhf2.c
 - [x] truncdfsf2.c
-- [ ] truncsfhf2.c
 - [x] udivdi3.c
 - [x] udivmoddi4.c
 - [x] udivmodsi4.c
@@ -213,60 +210,68 @@ These builtins are needed to support 128-bit integers, which are in the process
 - [x] udivti3.c
 - [x] umodti3.c
 
+These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust.
+
+- [ ] addtf3.c
+- [ ] comparetf2.c
+- [ ] divtf3.c
+- [x] extenddftf2.c
+- [x] extendhfsf2.c
+- [x] extendhftf2.c
+- [x] extendsftf2.c
+- [ ] fixtfdi.c
+- [ ] fixtfsi.c
+- [ ] fixtfti.c
+- [ ] fixunstfdi.c
+- [ ] fixunstfsi.c
+- [ ] fixunstfti.c
+- [ ] floatditf.c
+- [ ] floatsitf.c
+- [ ] floatunditf.c
+- [ ] floatunsitf.c
+- [ ] multf3.c
+- [ ] powitf2.c
+- [ ] ppc/fixtfdi.c
+- [ ] ppc/fixunstfdi.c
+- [ ] ppc/floatditf.c
+- [ ] ppc/floatunditf.c
+- [ ] subtf3.c
+- [x] truncdfhf2.c
+- [x] truncsfhf2.c
+- [x] trunctfdf2.c
+- [x] trunctfhf2.c
+- [x] trunctfsf2.c
+
 ## Unimplemented functions
 
 These builtins involve floating-point types ("`f128`", "`f80`" and complex numbers) that are not supported by Rust.
 
-- ~~addtf3.c~~
-- ~~comparetf2.c~~
 - ~~divdc3.c~~
 - ~~divsc3.c~~
 - ~~divtc3.c~~
-- ~~divtf3.c~~
 - ~~divxc3.c~~
-- ~~extenddftf2.c~~
-- ~~extendsftf2.c~~
-- ~~fixtfdi.c~~
-- ~~fixtfsi.c~~
-- ~~fixtfti.c~~
-- ~~fixunstfdi.c~~
-- ~~fixunstfsi.c~~
-- ~~fixunstfti.c~~
 - ~~fixunsxfdi.c~~
 - ~~fixunsxfsi.c~~
 - ~~fixunsxfti.c~~
 - ~~fixxfdi.c~~
 - ~~fixxfti.c~~
-- ~~floatditf.c~~
 - ~~floatdixf.c~~
-- ~~floatsitf.c~~
 - ~~floattixf.c~~
-- ~~floatunditf.c~~
 - ~~floatundixf.c~~
-- ~~floatunsitf.c~~
 - ~~floatuntixf.c~~
 - ~~i386/floatdixf.S~~
 - ~~i386/floatundixf.S~~
 - ~~muldc3.c~~
 - ~~mulsc3.c~~
 - ~~multc3.c~~
-- ~~multf3.c~~
 - ~~mulxc3.c~~
-- ~~powitf2.c~~
 - ~~powixf2.c~~
 - ~~ppc/divtc3.c~~
-- ~~ppc/fixtfdi.c~~
-- ~~ppc/fixunstfdi.c~~
-- ~~ppc/floatditf.c~~
-- ~~ppc/floatunditf.c~~
 - ~~ppc/gcc_qadd.c~~
 - ~~ppc/gcc_qdiv.c~~
 - ~~ppc/gcc_qmul.c~~
 - ~~ppc/gcc_qsub.c~~
 - ~~ppc/multc3.c~~
-- ~~subtf3.c~~
-- ~~trunctfdf2.c~~
-- ~~trunctfsf2.c~~
 - ~~x86_64/floatdixf.c~~
 - ~~x86_64/floatundixf.S~~
 
diff --git a/build.rs b/build.rs
index 44946c12..ec38feca 100644
--- a/build.rs
+++ b/build.rs
@@ -288,13 +288,10 @@ mod c {
             sources.extend(&[
                 ("__divdc3", "divdc3.c"),
                 ("__divsc3", "divsc3.c"),
-                ("__extendhfsf2", "extendhfsf2.c"),
                 ("__muldc3", "muldc3.c"),
                 ("__mulsc3", "mulsc3.c"),
                 ("__negdf2", "negdf2.c"),
                 ("__negsf2", "negsf2.c"),
-                ("__truncdfhf2", "truncdfhf2.c"),
-                ("__truncsfhf2", "truncsfhf2.c"),
             ]);
         }
 
@@ -464,8 +461,6 @@ mod c {
         if (target_arch == "aarch64" || target_arch == "arm64ec") && consider_float_intrinsics {
             sources.extend(&[
                 ("__comparetf2", "comparetf2.c"),
-                ("__extenddftf2", "extenddftf2.c"),
-                ("__extendsftf2", "extendsftf2.c"),
                 ("__fixtfdi", "fixtfdi.c"),
                 ("__fixtfsi", "fixtfsi.c"),
                 ("__fixtfti", "fixtfti.c"),
@@ -476,8 +471,6 @@ mod c {
                 ("__floatsitf", "floatsitf.c"),
                 ("__floatunditf", "floatunditf.c"),
                 ("__floatunsitf", "floatunsitf.c"),
-                ("__trunctfdf2", "trunctfdf2.c"),
-                ("__trunctfsf2", "trunctfsf2.c"),
                 ("__addtf3", "addtf3.c"),
                 ("__multf3", "multf3.c"),
                 ("__subtf3", "subtf3.c"),
@@ -498,7 +491,6 @@ mod c {
 
         if target_arch == "mips64" {
             sources.extend(&[
-                ("__extenddftf2", "extenddftf2.c"),
                 ("__netf2", "comparetf2.c"),
                 ("__addtf3", "addtf3.c"),
                 ("__multf3", "multf3.c"),
@@ -509,14 +501,11 @@ mod c {
                 ("__floatunsitf", "floatunsitf.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__divtf3", "divtf3.c"),
-                ("__trunctfdf2", "trunctfdf2.c"),
-                ("__trunctfsf2", "trunctfsf2.c"),
             ]);
         }
 
         if target_arch == "loongarch64" {
             sources.extend(&[
-                ("__extenddftf2", "extenddftf2.c"),
                 ("__netf2", "comparetf2.c"),
                 ("__addtf3", "addtf3.c"),
                 ("__multf3", "multf3.c"),
@@ -527,8 +516,6 @@ mod c {
                 ("__floatunsitf", "floatunsitf.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__divtf3", "divtf3.c"),
-                ("__trunctfdf2", "trunctfdf2.c"),
-                ("__trunctfsf2", "trunctfsf2.c"),
             ]);
         }
 
diff --git a/src/float/extend.rs b/src/float/extend.rs
index 0e6673b9..ddb74aef 100644
--- a/src/float/extend.rs
+++ b/src/float/extend.rs
@@ -81,4 +81,35 @@ intrinsics! {
     pub extern "C" fn  __extendsfdf2vfp(a: f32) -> f64 {
         a as f64 // LLVM generate 'fcvtds'
     }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_h2f]
+    pub extern "C" fn __extendhfsf2(a: f16) -> f32 {
+        extend(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 {
+        extend(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __extendhftf2(a: f16) -> f128 {
+        extend(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __extendsftf2(a: f32) -> f128 {
+        extend(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __extenddftf2(a: f64) -> f128 {
+        extend(a)
+    }
 }
diff --git a/src/float/mod.rs b/src/float/mod.rs
index fdbe9dde..4d1964c7 100644
--- a/src/float/mod.rs
+++ b/src/float/mod.rs
@@ -127,7 +127,20 @@ macro_rules! float_impl {
                 self.to_bits() as Self::SignedInt
             }
             fn eq_repr(self, rhs: Self) -> bool {
-                if self.is_nan() && rhs.is_nan() {
+                #[cfg(feature = "mangled-names")]
+                fn is_nan(x: $ty) -> bool {
+                    // When using mangled-names, the "real" compiler-builtins might not have the
+                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
+                    // FIXME: Remove once the nightly toolchain has the __unordtf2 builtin
+                    // x is NaN if all the bits of the exponent are set and the significant is non-0
+                    x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
+                        && x.repr() & $ty::SIGNIFICAND_MASK != 0
+                }
+                #[cfg(not(feature = "mangled-names"))]
+                fn is_nan(x: $ty) -> bool {
+                    x.is_nan()
+                }
+                if is_nan(self) && is_nan(rhs) {
                     true
                 } else {
                     self.repr() == rhs.repr()
@@ -171,5 +184,7 @@ macro_rules! float_impl {
     };
 }
 
+float_impl!(f16, u16, i16, i8, 16, 10);
 float_impl!(f32, u32, i32, i16, 32, 23);
 float_impl!(f64, u64, i64, i16, 64, 52);
+float_impl!(f128, u128, i128, i16, 128, 112);
diff --git a/src/float/trunc.rs b/src/float/trunc.rs
index 0beeb9f9..b21c7c69 100644
--- a/src/float/trunc.rs
+++ b/src/float/trunc.rs
@@ -123,4 +123,42 @@ intrinsics! {
     pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 {
         a as f32
     }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_f2h]
+    pub extern "C" fn __truncsfhf2(a: f32) -> f16 {
+        trunc(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 {
+        trunc(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_d2h]
+    pub extern "C" fn __truncdfhf2(a: f32) -> f16 {
+        trunc(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __trunctfhf2(a: f128) -> f16 {
+        trunc(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __trunctfsf2(a: f128) -> f32 {
+        trunc(a)
+    }
+
+    #[avr_skip]
+    #[aapcs_on_arm]
+    pub extern "C" fn __trunctfdf2(a: f128) -> f64 {
+        trunc(a)
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index a414efde..2aec681e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,8 @@
 #![feature(naked_functions)]
 #![feature(repr_simd)]
 #![feature(c_unwind)]
+#![feature(f16)]
+#![feature(f128)]
 #![no_builtins]
 #![no_std]
 #![allow(unused_features)]