Skip to content

Commit 58d79aa

Browse files
authored
[AArch64] Guard against non-simple types in udiv sve costs. (#148580)
The code here probably needs to change to handle types more uniformly, but this patch prevents it from trying to use a simple type where it does not exist. Fixes #148438.
1 parent 7345508 commit 58d79aa

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4126,10 +4126,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
41264126
if (TLI->isOperationLegalOrCustom(ISD, LT.second) && ST->hasSVE()) {
41274127
// SDIV/UDIV operations are lowered using SVE, then we can have less
41284128
// costs.
4129-
if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
4130-
->getPrimitiveSizeInBits()
4131-
.getFixedValue() < 128) {
4132-
EVT VT = TLI->getValueType(DL, Ty);
4129+
if (VT.isSimple() && isa<FixedVectorType>(Ty) &&
4130+
Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
41334131
static const CostTblEntry DivTbl[]{
41344132
{ISD::SDIV, MVT::v2i8, 5}, {ISD::SDIV, MVT::v4i8, 8},
41354133
{ISD::SDIV, MVT::v8i8, 8}, {ISD::SDIV, MVT::v2i16, 5},

llvm/test/Analysis/CostModel/AArch64/sve-div.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ define void @sdiv() {
1010
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, undef
1111
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, undef
1212
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, undef
13+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V6i32 = sdiv <6 x i32> undef, undef
1314
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i32 = sdiv <8 x i32> undef, undef
1415
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, undef
1516
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, undef
@@ -19,6 +20,7 @@ define void @sdiv() {
1920
; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, undef
2021
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, undef
2122
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, undef
23+
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V6i8 = sdiv <6 x i8> undef, undef
2224
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, undef
2325
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, undef
2426
; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, undef
@@ -48,6 +50,7 @@ define void @sdiv() {
4850
%V8i64 = sdiv <8 x i64> undef, undef
4951
%V2i32 = sdiv <2 x i32> undef, undef
5052
%V4i32 = sdiv <4 x i32> undef, undef
53+
%V6i32 = sdiv <6 x i32> undef, undef
5154
%V8i32 = sdiv <8 x i32> undef, undef
5255
%V16i32 = sdiv <16 x i32> undef, undef
5356
%V2i16 = sdiv <2 x i16> undef, undef
@@ -57,6 +60,7 @@ define void @sdiv() {
5760
%V32i16 = sdiv <32 x i16> undef, undef
5861
%V2i8 = sdiv <2 x i8> undef, undef
5962
%V4i8 = sdiv <4 x i8> undef, undef
63+
%V6i8 = sdiv <6 x i8> undef, undef
6064
%V8i8 = sdiv <8 x i8> undef, undef
6165
%V16i8 = sdiv <16 x i8> undef, undef
6266
%V32i8 = sdiv <32 x i8> undef, undef
@@ -89,6 +93,7 @@ define void @udiv() {
8993
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, undef
9094
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, undef
9195
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, undef
96+
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V6i32 = udiv <6 x i32> undef, undef
9297
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i32 = udiv <8 x i32> undef, undef
9398
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, undef
9499
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, undef
@@ -98,6 +103,7 @@ define void @udiv() {
98103
; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, undef
99104
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, undef
100105
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, undef
106+
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V6i8 = udiv <6 x i8> undef, undef
101107
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, undef
102108
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, undef
103109
; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, undef
@@ -127,6 +133,7 @@ define void @udiv() {
127133
%V8i64 = udiv <8 x i64> undef, undef
128134
%V2i32 = udiv <2 x i32> undef, undef
129135
%V4i32 = udiv <4 x i32> undef, undef
136+
%V6i32 = udiv <6 x i32> undef, undef
130137
%V8i32 = udiv <8 x i32> undef, undef
131138
%V16i32 = udiv <16 x i32> undef, undef
132139
%V2i16 = udiv <2 x i16> undef, undef
@@ -136,6 +143,7 @@ define void @udiv() {
136143
%V32i16 = udiv <32 x i16> undef, undef
137144
%V2i8 = udiv <2 x i8> undef, undef
138145
%V4i8 = udiv <4 x i8> undef, undef
146+
%V6i8 = udiv <6 x i8> undef, undef
139147
%V8i8 = udiv <8 x i8> undef, undef
140148
%V16i8 = udiv <16 x i8> undef, undef
141149
%V32i8 = udiv <32 x i8> undef, undef

0 commit comments

Comments
 (0)