Skip to content

[X86][AVX10.2] Decouple AMX-AVX512 from AVX10.2 #148633

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86_64.td
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,19 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
}

let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
let Features = "amx-avx512", Attributes = [NoThrow] in {
def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
}

let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in {
def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
}

let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in {
def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
}

let Features = "amx-tf32", Attributes = [NoThrow] in {
Expand Down Expand Up @@ -382,13 +388,19 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
}

let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
let Features = "amx-avx512", Attributes = [NoThrow] in {
def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
}

let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in {
def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
}

let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in {
def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
}

let Features = "amx-fp16", Attributes = [NoThrow] in {
Expand Down
32 changes: 21 additions & 11 deletions clang/lib/Headers/amxavx512intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,15 @@

#define __DEFAULT_FN_ATTRS_AVX512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("amx-avx512,avx10.2-512")))
__target__("amx-avx512")))

#define __DEFAULT_FN_ATTRS_AVX512BF16 \
__attribute__((__always_inline__, __nodebug__, \
__target__("amx-avx512,avx512bf16")))

#define __DEFAULT_FN_ATTRS_AVX512FP16 \
__attribute__((__always_inline__, __nodebug__, \
__target__("amx-avx512,avx512fp16")))
Comment on lines +21 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need avx512bf/fp16 here. We don't rely on their specific instructions.


/// Moves a row from a tile register to a zmm destination register, converting
/// the int32 source elements to fp32. The row of the tile is selected by a
Expand Down Expand Up @@ -237,25 +245,27 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512BF16
_tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512BF16
_tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512FP16
_tile_cvtrowps2phh_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512FP16
_tile_cvtrowps2phl_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
}

Expand Down Expand Up @@ -298,7 +308,7 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
__DEFAULT_FN_ATTRS_AVX512BF16
static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1);
}
Expand All @@ -317,7 +327,7 @@ static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
__DEFAULT_FN_ATTRS_AVX512BF16
static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1);
}
Expand All @@ -336,7 +346,7 @@ static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
__DEFAULT_FN_ATTRS_AVX512FP16
static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
}
Expand All @@ -355,7 +365,7 @@ static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
__DEFAULT_FN_ATTRS_AVX512FP16
static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
}
Expand Down
8 changes: 6 additions & 2 deletions clang/test/CodeGen/X86/amx_avx512_api.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \
// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
// RUN: -target-feature +amx-avx512 -emit-llvm -o - -Werror -pedantic | \
// RUN: FileCheck %s --check-prefixes=CHECK

#include <immintrin.h>

Expand All @@ -16,27 +16,31 @@ __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
return __tile_cvtrowd2ps(a, b);
}

__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16h
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal
return __tile_cvtrowps2bf16h(a, b);
}

__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16l
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal
return __tile_cvtrowps2bf16l(a, b);
}

__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phh
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phh.internal
return __tile_cvtrowps2phh(a, b);
}

__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phl
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
Expand Down
6 changes: 5 additions & 1 deletion clang/test/CodeGen/X86/amxavx512-builtins.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s
// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s

#include <immintrin.h>
#include <stddef.h>
Expand All @@ -10,24 +10,28 @@ __m512 test_tile_cvtrowd2ps(unsigned int A) {
return _tile_cvtrowd2ps(1, A);
}

__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16h(1, A);
}

__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16l(1, A);
}

__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phh(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phh(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}})
return _tile_cvtrowps2phh(1, A);
}

__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phl(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phl(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}})
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "
def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
"HasAMXAVX512", "true",
"Support AMX-AVX512 instructions",
[FeatureAMXTILE]>;
[FeatureAMXTILE, FeatureAVX512,
FeatureEVEX512]>;
def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true",
"Support AMX-TF32 instructions",
[FeatureAMXTILE]>;
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/X86/X86InstrAMX.td
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
} // HasAMXMOVRS, In64BitMode

multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, i32u8imm:$src2),
Expand All @@ -561,12 +561,12 @@ multiclass m_tcvtrowd2ps {
"tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T8,XS, EVEX, VVVV, EVEX_V512;
}
} // HasAMXAVX512, HasAVX10_2_512, In64BitMode
} // HasAMXAVX512, In64BitMode
}

defm TCVTROWD2PS : m_tcvtrowd2ps;

let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
Expand Down Expand Up @@ -630,7 +630,7 @@ let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {

multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
Prefix P1, Prefix P2> {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
let Predicates = [HasAMXAVX512, In64BitMode], SchedRW = [WriteSystem] in {
let OpPrefix = P1 in
def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
(ins TILE:$src1, GR32:$src2),
Expand Down Expand Up @@ -658,7 +658,7 @@ defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;

multiclass m_tilemovrow {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, u8imm:$src2),
Expand All @@ -669,12 +669,12 @@ multiclass m_tilemovrow {
"tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T8,PD, EVEX, VVVV, EVEX_V512;
}
} // HasAMXAVX512, HasAVX10_2_512, In64BitMode
} // HasAMXAVX512, In64BitMode
}

defm TILEMOVROW : m_tilemovrow;

let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/TargetParser/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_MOVRS = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 =
FeatureAMX_TILE | FeatureAVX10_2_512;
FeatureAMX_TILE | FeatureAVX512F | FeatureEVEX512;
constexpr FeatureBitset ImpliedFeaturesAMX_TF32 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s

@buf = dso_local global [3072 x i8] zeroinitializer, align 64

Expand Down Expand Up @@ -95,7 +95,7 @@ define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind {
; O0-NEXT: movq %rsp, %rbp
; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00
; O0-NEXT: subq $4096, %rsp # imm = 0x1000
; O0-NEXT: vpxor %xmm0, %xmm0, %xmm0
; O0-NEXT: vxorps %xmm0, %xmm0, %xmm0
; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
; O0-NEXT: movb $1, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %si, %cx
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512 | FileCheck %s

define <16 x float> @test_tcvtrowd2ps(i32 %A) {
; CHECK-LABEL: test_tcvtrowd2ps:
Expand All @@ -20,7 +20,7 @@ define <16 x float> @test_tcvtrowd2psi() {
}
declare <16 x float> @llvm.x86.tcvtrowd2ps(i8 %A, i32 %B)

define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) {
define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16h:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1]
Expand All @@ -29,7 +29,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) {
ret <32 x bfloat> %ret
}

define <32 x bfloat> @test_tcvtrowps2bf16hi() {
define <32 x bfloat> @test_tcvtrowps2bf16hi() "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16hi:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f]
Expand All @@ -39,7 +39,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16hi() {
}
declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 %A, i32 %B)

define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) {
define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16l:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1]
Expand All @@ -48,7 +48,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) {
ret <32 x bfloat> %ret
}

define <32 x bfloat> @test_tcvtrowps2bf16li() {
define <32 x bfloat> @test_tcvtrowps2bf16li() "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16li:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f]
Expand All @@ -58,7 +58,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16li() {
}
declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 %A, i32 %B)

define <32 x half> @test_tcvtrowps2phh(i32 %A) {
define <32 x half> @test_tcvtrowps2phh(i32 %A) "target-features"="+avx512fp16" {
; CHECK-LABEL: test_tcvtrowps2phh:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2phh %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x44,0x48,0x6d,0xc1]
Expand All @@ -67,7 +67,7 @@ define <32 x half> @test_tcvtrowps2phh(i32 %A) {
ret <32 x half> %ret
}

define <32 x half> @test_tcvtrowps2phhi() {
define <32 x half> @test_tcvtrowps2phhi() "target-features"="+avx512fp16" {
; CHECK-LABEL: test_tcvtrowps2phhi:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2phh $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7c,0x48,0x07,0xc1,0x7f]
Expand All @@ -77,7 +77,7 @@ define <32 x half> @test_tcvtrowps2phhi() {
}
declare <32 x half> @llvm.x86.tcvtrowps2phh(i8 %A, i32 %B)

define <32 x half> @test_tcvtrowps2phl(i32 %A) {
define <32 x half> @test_tcvtrowps2phl(i32 %A) "target-features"="+avx512fp16" {
; CHECK-LABEL: test_tcvtrowps2phl:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2phl %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x45,0x48,0x6d,0xc1]
Expand All @@ -86,7 +86,7 @@ define <32 x half> @test_tcvtrowps2phl(i32 %A) {
ret <32 x half> %ret
}

define <32 x half> @test_tcvtrowps2phli() {
define <32 x half> @test_tcvtrowps2phli() "target-features"="+avx512fp16" {
; CHECK-LABEL: test_tcvtrowps2phli:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2phl $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x77,0xc1,0x7f]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2-512, \
; RUN: -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+amx-avx512,avx512fp16,+avx512bf16 \
; RUN: -verify-machineinstrs | FileCheck %s

define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) {
; CHECK-LABEL: test_amx:
Expand Down
Loading