Skip to content

Commit ae97069

Browse files
committed
[DAG] SelectionDAG::canCreateUndefOrPoison - Mark AVGFLOORS and AVGCEILS as safe
This patch updates `SelectionDAG::canCreateUndefOrPoison` to indicate that `ISD::AVGFLOORS` and `ISD::AVGCEILS` do not introduce poison or undef values. | Opcode | Operation Type | Alive2 Proof Link | |--------------|-------------------|--------------------| | `AVGFLOORS` | Signed Floor Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/Dwy8a5) | | `AVGCEILS` | Signed Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/_JKF8A) | | `AVGFLOORU` | Unsigned Floor Avg| [Alive2 Link](https://alive2.llvm.org/ce/z/2-B6RM) | | `AVGCEILU` | Unsigned Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/t5WZ6K) | These patterns are safe due to the use of `sext i8` into `i32`, which ensures no signed overflow occurs. The arithmetic is done in the wider domain before truncating safely back to `i8`. Includes test coverage to ensure correctness.
1 parent a4d95c2 commit ae97069

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5542,6 +5542,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
55425542
case ISD::UADDSAT:
55435543
case ISD::SSUBSAT:
55445544
case ISD::USUBSAT:
5545+
case ISD::AVGFLOORS:
5546+
case ISD::AVGFLOORU:
5547+
case ISD::AVGCEILS:
5548+
case ISD::AVGCEILU:
55455549
case ISD::MULHU:
55465550
case ISD::MULHS:
55475551
case ISD::SMIN:
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -march=arm64 -mattr=+neon | FileCheck %s
4+
; Test that the presence of 'freeze' does not block instruction selection of:
5+
; - uhadd
6+
; - urhadd
7+
; - shadd
8+
; - srhadd
9+
10+
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
11+
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
12+
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
13+
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
14+
15+
;===---------------------------------------------------------------------===;
16+
; Test: freeze does not block uhadd instruction selection
17+
;===---------------------------------------------------------------------===;
18+
19+
define <8 x i16> @uhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
20+
; CHECK-LABEL: uhadd_freeze:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: movi v2.8h, #15
23+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
24+
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
25+
; CHECK-NEXT: movi v2.8h, #31
26+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
27+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
28+
; CHECK-NEXT: ret
29+
%m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
30+
%m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
31+
%avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
32+
%frozen = freeze <8 x i16> %avg
33+
%masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
34+
ret <8 x i16> %masked
35+
}
36+
37+
;===---------------------------------------------------------------------===;
38+
; Test: freeze does not block urhadd instruction selection
39+
;===---------------------------------------------------------------------===;
40+
41+
define <8 x i16> @urhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
42+
; CHECK-LABEL: urhadd_freeze:
43+
; CHECK: // %bb.0:
44+
; CHECK-NEXT: movi v2.8h, #15
45+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
46+
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
47+
; CHECK-NEXT: movi v2.8h, #31
48+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
49+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
50+
; CHECK-NEXT: ret
51+
%m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
52+
%m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
53+
%avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
54+
%frozen = freeze <8 x i16> %avg
55+
%masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
56+
ret <8 x i16> %masked
57+
}
58+
59+
;===---------------------------------------------------------------------===;
60+
; Test: freeze does not block shadd instruction selection
61+
;===---------------------------------------------------------------------===;
62+
63+
define <8 x i16> @shadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
64+
; CHECK-LABEL: shadd_freeze:
65+
; CHECK: // %bb.0:
66+
; CHECK-NEXT: bic v0.8h, #15
67+
; CHECK-NEXT: bic v1.8h, #15
68+
; CHECK-NEXT: movi v2.8h, #63
69+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
70+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
71+
; CHECK-NEXT: ret
72+
%m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
73+
%m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
74+
%avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
75+
%frozen = freeze <8 x i16> %avg
76+
%masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
77+
ret <8 x i16> %masked
78+
}
79+
80+
;===---------------------------------------------------------------------===;
81+
; Test: freeze does not block srhadd instruction selection
82+
;===---------------------------------------------------------------------===;
83+
84+
define <8 x i16> @srhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
85+
; CHECK-LABEL: srhadd_freeze:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: bic v0.8h, #15
88+
; CHECK-NEXT: bic v1.8h, #15
89+
; CHECK-NEXT: movi v2.8h, #63
90+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
91+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
92+
; CHECK-NEXT: ret
93+
%m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
94+
%m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
95+
%avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
96+
%frozen = freeze <8 x i16> %avg
97+
%masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
98+
ret <8 x i16> %masked
99+
}

0 commit comments

Comments
 (0)