[DAG] SelectionDAG::canCreateUndefOrPoison - Mark AVGFLOORS and AVGCEILS as safe

aabhinavg1 · aabhinavg1 · commit ae9706985032 · 2025-07-17T16:16:08.000+05:30
This patch updates `SelectionDAG::canCreateUndefOrPoison` to indicate that `ISD::AVGFLOORS` and `ISD::AVGCEILS` do not introduce poison or undef values. | Opcode | Operation Type | Alive2 Proof Link | |--------------|-------------------|--------------------| | `AVGFLOORS` | Signed Floor Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/Dwy8a5) | | `AVGCEILS` | Signed Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/_JKF8A) | | `AVGFLOORU` | Unsigned Floor Avg| [Alive2 Link](https://alive2.llvm.org/ce/z/2-B6RM) | | `AVGCEILU` | Unsigned Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/t5WZ6K) | These patterns are safe due to the use of `sext i8` into `i32`, which ensures no signed overflow occurs. The arithmetic is done in the wider domain before truncating safely back to `i8`. Includes test coverage to ensure correctness.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5542,6 +5542,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
   case ISD::USUBSAT:
+  case ISD::AVGFLOORS:
+  case ISD::AVGFLOORU:
+  case ISD::AVGCEILS:
+  case ISD::AVGCEILU:
   case ISD::MULHU:
   case ISD::MULHS:
   case ISD::SMIN:
diff --git a/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll b/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -march=arm64 -mattr=+neon | FileCheck %s
+; Test that the presence of 'freeze' does not block instruction selection of:
+; - uhadd
+; - urhadd
+; - shadd
+; - srhadd
+
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block uhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @uhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: uhadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #15
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    movi v2.8h, #31
+; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+  ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block urhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @urhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: urhadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #15
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    movi v2.8h, #31
+; CHECK-NEXT:    urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+  ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block shadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @shadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: shadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic v0.8h, #15
+; CHECK-NEXT:    bic v1.8h, #15
+; CHECK-NEXT:    movi v2.8h, #63
+; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
+  ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block srhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @srhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: srhadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic v0.8h, #15
+; CHECK-NEXT:    bic v1.8h, #15
+; CHECK-NEXT:    movi v2.8h, #63
+; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
+  ret <8 x i16> %masked
+}