From 97044a895e5df9ec591775589495c7f9f7e855a7 Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Mon, 14 Jul 2025 06:21:27 -0700 Subject: [PATCH] [X86][AVX10.2] Decouple AMX-AVX512 from AVX10.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to AVX10.2 rev. 4: AMX-AVX512's explicit AVX10.2 sensitivity is removed and the instructions are removed in favor of inclusion in the ISE/SDM. Users of AMX-AVX512 ISA should follow enabling and checking rules for both AMX and IntelĀ® AVX-512/AVX10. Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 We set amx-avx512 as implying amx-tile, avx512f and evex512 when avx512fp16 and avx512bf16 need to be specified separately. --- clang/include/clang/Basic/BuiltinsX86_64.td | 20 +++++++++--- clang/lib/Headers/amxavx512intrin.h | 32 ++++++++++++------- clang/test/CodeGen/X86/amx_avx512_api.c | 8 +++-- clang/test/CodeGen/X86/amxavx512-builtins.c | 6 +++- llvm/lib/Target/X86/X86.td | 3 +- llvm/lib/Target/X86/X86InstrAMX.td | 14 ++++---- llvm/lib/TargetParser/X86TargetParser.cpp | 2 +- .../CodeGen/X86/amx-across-func-tilemovrow.ll | 8 ++--- .../test/CodeGen/X86/amx-avx512-intrinsics.ll | 18 +++++------ .../CodeGen/X86/amx-tile-avx512-internals.ll | 4 +-- 10 files changed, 73 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td index f2b35874e3876..fecaaed37a868 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.td +++ b/clang/include/clang/Basic/BuiltinsX86_64.td @@ -290,13 +290,19 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in { def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">; } -let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in { +let Features = "amx-avx512", Attributes = [NoThrow] in { def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; + def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; +} + +let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in { def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; +} + +let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in { def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; - def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; } let Features = "amx-tf32", Attributes = [NoThrow] in { @@ -382,13 +388,19 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in { def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">; } -let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in { +let Features = "amx-avx512", Attributes = [NoThrow] in { def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">; + def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">; +} + +let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in { def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; +} + +let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in { def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">; def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">; - def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">; } let Features = "amx-fp16", Attributes = [NoThrow] in { diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h index bbde44fc265b3..e6c58e5c138a1 100644 --- a/clang/lib/Headers/amxavx512intrin.h +++ b/clang/lib/Headers/amxavx512intrin.h @@ -16,7 +16,15 @@ #define __DEFAULT_FN_ATTRS_AVX512 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-avx512,avx10.2-512"))) + __target__("amx-avx512"))) + +#define __DEFAULT_FN_ATTRS_AVX512BF16 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-avx512,avx512bf16"))) + +#define __DEFAULT_FN_ATTRS_AVX512FP16 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-avx512,avx512fp16"))) /// Moves a row from a tile register to a zmm destination register, converting /// the int32 source elements to fp32. The row of the tile is selected by a @@ -237,25 +245,27 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal( return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 +static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512BF16 _tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n, _tile1024i src, unsigned u) { return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 +static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512BF16 _tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n, _tile1024i src, unsigned u) { return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u); } -static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal( - unsigned short m, unsigned short n, _tile1024i src, unsigned u) { +static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512FP16 +_tile_cvtrowps2phh_internal(unsigned short m, unsigned short n, + _tile1024i src, unsigned u) { return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u); } -static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal( - unsigned short m, unsigned short n, _tile1024i src, unsigned u) { +static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512FP16 +_tile_cvtrowps2phl_internal(unsigned short m, unsigned short n, + _tile1024i src, unsigned u) { return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u); } @@ -298,7 +308,7 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) { /// The 2nd source r32. Size is 4 Bytes. /// \returns /// The destination v32bf16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 +__DEFAULT_FN_ATTRS_AVX512BF16 static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) { return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1); } @@ -317,7 +327,7 @@ static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) { /// The 2nd source r32. Size is 4 Bytes. /// \returns /// The destination v32bf16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 +__DEFAULT_FN_ATTRS_AVX512BF16 static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) { return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1); } @@ -336,7 +346,7 @@ static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) { /// The 2nd source r32. Size is 4 Bytes. /// \returns /// The destination v32fp16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 +__DEFAULT_FN_ATTRS_AVX512FP16 static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) { return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1); } @@ -355,7 +365,7 @@ static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) { /// The 2nd source r32. Size is 4 Bytes. /// \returns /// The destination v32fp16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 +__DEFAULT_FN_ATTRS_AVX512FP16 static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) { return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1); } diff --git a/clang/test/CodeGen/X86/amx_avx512_api.c b/clang/test/CodeGen/X86/amx_avx512_api.c index fac41ea6c214f..54bf72a8f389b 100644 --- a/clang/test/CodeGen/X86/amx_avx512_api.c +++ b/clang/test/CodeGen/X86/amx_avx512_api.c @@ -1,6 +1,6 @@ // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \ -// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK +// RUN: -target-feature +amx-avx512 -emit-llvm -o - -Werror -pedantic | \ +// RUN: FileCheck %s --check-prefixes=CHECK #include @@ -16,6 +16,7 @@ __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) { return __tile_cvtrowd2ps(a, b); } +__attribute__((__target__("avx512bf16"))) __m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) { //CHECK-LABEL: @test_tile_cvtrowps2bf16h //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) @@ -23,6 +24,7 @@ __m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) { return __tile_cvtrowps2bf16h(a, b); } +__attribute__((__target__("avx512bf16"))) __m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) { //CHECK-LABEL: @test_tile_cvtrowps2bf16l //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) @@ -30,6 +32,7 @@ __m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) { return __tile_cvtrowps2bf16l(a, b); } +__attribute__((__target__("avx512fp16"))) __m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) { //CHECK-LABEL: @test_tile_cvtrowps2phh //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) @@ -37,6 +40,7 @@ __m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) { return __tile_cvtrowps2phh(a, b); } +__attribute__((__target__("avx512fp16"))) __m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) { //CHECK-LABEL: @test_tile_cvtrowps2phl //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) diff --git a/clang/test/CodeGen/X86/amxavx512-builtins.c b/clang/test/CodeGen/X86/amxavx512-builtins.c index d60929994901a..f6b8cb421407e 100644 --- a/clang/test/CodeGen/X86/amxavx512-builtins.c +++ b/clang/test/CodeGen/X86/amxavx512-builtins.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \ -// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s #include #include @@ -10,24 +10,28 @@ __m512 test_tile_cvtrowd2ps(unsigned int A) { return _tile_cvtrowd2ps(1, A); } +__attribute__((__target__("avx512bf16"))) __m512bh test_tile_cvtrowps2bf16h(unsigned int A) { // CHECK-LABEL: @test_tile_cvtrowps2bf16h( // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %{{.*}}) return _tile_cvtrowps2bf16h(1, A); } +__attribute__((__target__("avx512bf16"))) __m512bh test_tile_cvtrowps2bf16l(unsigned int A) { // CHECK-LABEL: @test_tile_cvtrowps2bf16l( // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %{{.*}}) return _tile_cvtrowps2bf16l(1, A); } +__attribute__((__target__("avx512fp16"))) __m512h test_tile_cvtrowps2phh(unsigned int A) { // CHECK-LABEL: @test_tile_cvtrowps2phh( // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}}) return _tile_cvtrowps2phh(1, A); } +__attribute__((__target__("avx512fp16"))) __m512h test_tile_cvtrowps2phl(unsigned int A) { // CHECK-LABEL: @test_tile_cvtrowps2phl( // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 990b381341f07..83b633b73cd5d 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -277,7 +277,8 @@ def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", " def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512", "HasAMXAVX512", "true", "Support AMX-AVX512 instructions", - [FeatureAMXTILE]>; + [FeatureAMXTILE, FeatureAVX512, + FeatureEVEX512]>; def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true", "Support AMX-TF32 instructions", [FeatureAMXTILE]>; diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index 1beaaafb159e3..5d93e6a4089ca 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -550,7 +550,7 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in { } // HasAMXMOVRS, In64BitMode multiclass m_tcvtrowd2ps { - let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { + let Predicates = [HasAMXAVX512, In64BitMode] in { let SchedRW = [WriteSystem] in { def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), (ins TILE:$src1, i32u8imm:$src2), @@ -561,12 +561,12 @@ multiclass m_tcvtrowd2ps { "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, T8,XS, EVEX, VVVV, EVEX_V512; } - } // HasAMXAVX512, HasAVX10_2_512, In64BitMode + } // HasAMXAVX512, In64BitMode } defm TCVTROWD2PS : m_tcvtrowd2ps; -let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { +let Predicates = [HasAMXAVX512, In64BitMode] in { let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), @@ -630,7 +630,7 @@ let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { multiclass AMXAVX512_BASE Opcode1, bits<8> Opcode2, string Opstr, Prefix P1, Prefix P2> { - let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in { + let Predicates = [HasAMXAVX512, In64BitMode], SchedRW = [WriteSystem] in { let OpPrefix = P1 in def rre : I; defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>; multiclass m_tilemovrow { - let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { + let Predicates = [HasAMXAVX512, In64BitMode] in { let SchedRW = [WriteSystem] in { def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), (ins TILE:$src1, u8imm:$src2), @@ -669,12 +669,12 @@ multiclass m_tilemovrow { "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, T8,PD, EVEX, VVVV, EVEX_V512; } - } // HasAMXAVX512, HasAVX10_2_512, In64BitMode + } // HasAMXAVX512, In64BitMode } defm TILEMOVROW : m_tilemovrow; -let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { +let Predicates = [HasAMXAVX512, In64BitMode] in { let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 57fbc71fa22ee..f0349fec8af4e 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -616,7 +616,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_MOVRS = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 = - FeatureAMX_TILE | FeatureAVX10_2_512; + FeatureAMX_TILE | FeatureAVX512F | FeatureEVEX512; constexpr FeatureBitset ImpliedFeaturesAMX_TF32 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll index 71f8f231747fe..9e73d8c494443 100644 --- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll +++ b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s -; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s @buf = dso_local global [3072 x i8] zeroinitializer, align 64 @@ -95,7 +95,7 @@ define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind { ; O0-NEXT: movq %rsp, %rbp ; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 ; O0-NEXT: subq $4096, %rsp # imm = 0x1000 -; O0-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; O0-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; O0-NEXT: movb $1, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %si, %cx diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll index 8f82bd2587ec3..94db7609dfd5d 100644 --- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512 | FileCheck %s define <16 x float> @test_tcvtrowd2ps(i32 %A) { ; CHECK-LABEL: test_tcvtrowd2ps: @@ -20,7 +20,7 @@ define <16 x float> @test_tcvtrowd2psi() { } declare <16 x float> @llvm.x86.tcvtrowd2ps(i8 %A, i32 %B) -define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) { +define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) "target-features"="+avx512bf16" { ; CHECK-LABEL: test_tcvtrowps2bf16h: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2bf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1] @@ -29,7 +29,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) { ret <32 x bfloat> %ret } -define <32 x bfloat> @test_tcvtrowps2bf16hi() { +define <32 x bfloat> @test_tcvtrowps2bf16hi() "target-features"="+avx512bf16" { ; CHECK-LABEL: test_tcvtrowps2bf16hi: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2bf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f] @@ -39,7 +39,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16hi() { } declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 %A, i32 %B) -define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) { +define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) "target-features"="+avx512bf16" { ; CHECK-LABEL: test_tcvtrowps2bf16l: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2bf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1] @@ -48,7 +48,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) { ret <32 x bfloat> %ret } -define <32 x bfloat> @test_tcvtrowps2bf16li() { +define <32 x bfloat> @test_tcvtrowps2bf16li() "target-features"="+avx512bf16" { ; CHECK-LABEL: test_tcvtrowps2bf16li: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2bf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f] @@ -58,7 +58,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16li() { } declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 %A, i32 %B) -define <32 x half> @test_tcvtrowps2phh(i32 %A) { +define <32 x half> @test_tcvtrowps2phh(i32 %A) "target-features"="+avx512fp16" { ; CHECK-LABEL: test_tcvtrowps2phh: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2phh %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x44,0x48,0x6d,0xc1] @@ -67,7 +67,7 @@ define <32 x half> @test_tcvtrowps2phh(i32 %A) { ret <32 x half> %ret } -define <32 x half> @test_tcvtrowps2phhi() { +define <32 x half> @test_tcvtrowps2phhi() "target-features"="+avx512fp16" { ; CHECK-LABEL: test_tcvtrowps2phhi: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2phh $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7c,0x48,0x07,0xc1,0x7f] @@ -77,7 +77,7 @@ define <32 x half> @test_tcvtrowps2phhi() { } declare <32 x half> @llvm.x86.tcvtrowps2phh(i8 %A, i32 %B) -define <32 x half> @test_tcvtrowps2phl(i32 %A) { +define <32 x half> @test_tcvtrowps2phl(i32 %A) "target-features"="+avx512fp16" { ; CHECK-LABEL: test_tcvtrowps2phl: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2phl %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x45,0x48,0x6d,0xc1] @@ -86,7 +86,7 @@ define <32 x half> @test_tcvtrowps2phl(i32 %A) { ret <32 x half> %ret } -define <32 x half> @test_tcvtrowps2phli() { +define <32 x half> @test_tcvtrowps2phli() "target-features"="+avx512fp16" { ; CHECK-LABEL: test_tcvtrowps2phli: ; CHECK: # %bb.0: ; CHECK-NEXT: tcvtrowps2phl $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x77,0xc1,0x7f] diff --git a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll index fd3925fabc513..b3b7198979a93 100644 --- a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll +++ b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2-512, \ -; RUN: -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+amx-avx512,avx512fp16,+avx512bf16 \ +; RUN: -verify-machineinstrs | FileCheck %s define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) { ; CHECK-LABEL: test_amx: