From a7716111ed933725f72f669f92b428325f47654f Mon Sep 17 00:00:00 2001 From: Pankaj kumar divedi Date: Mon, 28 Apr 2025 17:08:43 +0530 Subject: [PATCH 1/2] adds target hook to support more inst uniformity --- .../llvm/Analysis/TargetTransformInfo.h | 8 ++++++ .../llvm/Analysis/TargetTransformInfoImpl.h | 5 ++++ llvm/lib/Analysis/TargetTransformInfo.cpp | 5 ++++ llvm/lib/Analysis/UniformityAnalysis.cpp | 13 ++++++++++ .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 21 ++++++++++++++++ .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 ++ .../AMDGPU/uniform_intrinsic.ll | 25 +++++++++++++++++++ 7 files changed, 79 insertions(+) create mode 100644 llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 022530dc846ea..9af5006ce9c6d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -23,6 +23,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Uniformity.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/IR/FMF.h" #include "llvm/IR/InstrTypes.h" @@ -1916,6 +1917,13 @@ class TargetTransformInfo { const Function &F, SmallVectorImpl> &LB) const; + /// Target can implement more complex patterns for getting Uniformity of an + /// instruction.Currently Uniformity analysis catagorises instructions with a + /// fixed set of InstructionUniformity values: Default, AlwaysUniform and + /// NeverUniform. + std::optional + getInstructionUniformity(const Instruction &I) const; + private: std::unique_ptr TTIImpl; }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 990252b1e5743..5bee462575181 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1147,6 +1147,11 @@ class TargetTransformInfoImplBase { const Function &F, SmallVectorImpl> &LB) const {} + virtual std::optional + getInstructionUniformity(const Instruction &I) const { + return std::nullopt; + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8548afea72964..50157a7714bf7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds( return TTIImpl->collectKernelLaunchBounds(F, LB); } +std::optional +TargetTransformInfo::getInstructionUniformity(const Instruction &I) const { + return TTIImpl->getInstructionUniformity(I); +} + TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp index 2101fdfacfc8f..2fc6f523139a7 100644 --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -35,7 +35,20 @@ template <> void llvm::GenericUniformityAnalysisImpl::initialize() { markDivergent(I); else if (TTI->isAlwaysUniform(&I)) addUniformOverride(I); + else if (auto Uniformity = TTI->getInstructionUniformity(I)) { + switch (*Uniformity) { + case InstructionUniformity::AlwaysUniform: + addUniformOverride(I); + break; + case InstructionUniformity::NeverUniform: + markDivergent(I); + break; + case InstructionUniformity::Default: + break; + } + } } + for (auto &Arg : F.args()) { if (TTI->isSourceOfDivergence(&Arg)) { markDivergent(&Arg); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 204d3df546bbf..5c59847dfeb62 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1422,3 +1422,24 @@ void GCNTTIImpl::collectKernelLaunchBounds( LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first}); LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second}); } + +std::optional +GCNTTIImpl::getInstructionUniformity(const Instruction &I) const { + if (const auto *II = dyn_cast(&I)) { + // We can define the custom rules for the intrinsics uniformity, depending + // on argument. + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_permlane64: + // If either operand is uniform, the result is uniform. + for (unsigned Arg_i = 0, NumArg = II->arg_size(); Arg_i < NumArg; + Arg_i++) { + if (!isSourceOfDivergence(II->getArgOperand(Arg_i))) + return InstructionUniformity::AlwaysUniform; + } + return InstructionUniformity::Default; + default: + break; + } + } + return std::nullopt; +} \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index f6f7bd4bfcf5b..bea0b024d745b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -290,6 +290,8 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + std::optional + getInstructionUniformity(const Instruction &I) const override; }; } // end namespace llvm diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll new file mode 100644 index 0000000000000..4bb89516b2e81 --- /dev/null +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll @@ -0,0 +1,25 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s + +; CHECK: ALL VALUES UNIFORM +define amdgpu_kernel void @permlane64_constant(ptr addrspace(1) %out) { + %v = call i32 @llvm.amdgcn.permlane64(i32 7) + store i32 %v, ptr addrspace(1) %out + ret void +} + +; CHECK: ALL VALUES UNIFORM +define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) { + %v = call i32 @llvm.amdgcn.permlane64(i32 %src) + store i32 %v, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane64.i32(i32 %tid) +define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %v = call i32 @llvm.amdgcn.permlane64(i32 %tid) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} From 8cca1b2fa5fbfe12eb29ca75770e0315d5d55940 Mon Sep 17 00:00:00 2001 From: Pankaj kumar divedi Date: Tue, 6 May 2025 18:21:50 +0530 Subject: [PATCH 2/2] [WIP] currently users of a divergent value are marked as divergent, instead target could customize it --- llvm/include/llvm/ADT/GenericUniformityImpl.h | 6 ++ .../llvm/Analysis/TargetTransformInfo.h | 5 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 5 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 6 +- llvm/lib/Analysis/UniformityAnalysis.cpp | 70 ++++++++++++------- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 17 ++--- .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 5 +- 7 files changed, 70 insertions(+), 44 deletions(-) diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h index d10355fff1bea..92661eeb02604 100644 --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -51,6 +51,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Uniformity.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "uniformity" @@ -406,6 +407,11 @@ template class GenericUniformityAnalysisImpl { void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *); + /// @brief Uniformity of any instruction operands. + /// @param I instruction. + /// @return vector containing boolean value for corrosponding operands. + llvm::SmallVector + getOperandUniformities(const Instruction &I) const; protected: /// \brief Value/block pair representing a single phi input. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 9af5006ce9c6d..81bc8d291c2b2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1921,8 +1921,9 @@ class TargetTransformInfo { /// instruction.Currently Uniformity analysis catagorises instructions with a /// fixed set of InstructionUniformity values: Default, AlwaysUniform and /// NeverUniform. - std::optional - getInstructionUniformity(const Instruction &I) const; + std::optional getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const; private: std::unique_ptr TTIImpl; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 5bee462575181..6412aa56a4ab0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1147,8 +1147,9 @@ class TargetTransformInfoImplBase { const Function &F, SmallVectorImpl> &LB) const {} - virtual std::optional - getInstructionUniformity(const Instruction &I) const { + virtual std::optional getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const { return std::nullopt; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 50157a7714bf7..b9e85a15c4315 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1477,8 +1477,10 @@ void TargetTransformInfo::collectKernelLaunchBounds( } std::optional -TargetTransformInfo::getInstructionUniformity(const Instruction &I) const { - return TTIImpl->getInstructionUniformity(I); +TargetTransformInfo::getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const { + return TTIImpl->getInstructionUniformity(I, OperandUniformities); } TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default; diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp index 2fc6f523139a7..0228fcd62f6df 100644 --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -29,24 +29,41 @@ bool llvm::GenericUniformityAnalysisImpl::markDefsDivergent( return markDivergent(cast(&Instr)); } +template <> +bool llvm::GenericUniformityAnalysisImpl::isDivergentUse( + const Use &U) const { + const auto *V = U.get(); + if (isDivergent(V)) + return true; + if (const auto *DefInstr = dyn_cast(V)) { + const auto *UseInstr = cast(U.getUser()); + return isTemporalDivergent(*UseInstr->getParent(), *DefInstr); + } + return false; +} + +template <> +llvm::SmallVector +llvm::GenericUniformityAnalysisImpl::getOperandUniformities( + const Instruction &I) const { + SmallVector OperandUniformities; + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + if (!isa(I.getOperand(i)) && !isa(I.getOperand(i))) + continue; + const Use &U = I.getOperandUse(i); + OperandUniformities.push_back(isDivergentUse(U) + ? InstructionUniformity::NeverUniform + : InstructionUniformity::AlwaysUniform); + } + return OperandUniformities; +} + template <> void llvm::GenericUniformityAnalysisImpl::initialize() { for (auto &I : instructions(F)) { if (TTI->isSourceOfDivergence(&I)) markDivergent(I); else if (TTI->isAlwaysUniform(&I)) addUniformOverride(I); - else if (auto Uniformity = TTI->getInstructionUniformity(I)) { - switch (*Uniformity) { - case InstructionUniformity::AlwaysUniform: - addUniformOverride(I); - break; - case InstructionUniformity::NeverUniform: - markDivergent(I); - break; - case InstructionUniformity::Default: - break; - } - } } for (auto &Arg : F.args()) { @@ -60,9 +77,23 @@ template <> void llvm::GenericUniformityAnalysisImpl::pushUsers( const Value *V) { for (const auto *User : V->users()) { - if (const auto *UserInstr = dyn_cast(User)) { + const auto *UserInstr = dyn_cast(User); + if (!UserInstr) + continue; + + if (!TTI) { markDivergent(*UserInstr); + continue; } + + auto Uniformity = TTI->getInstructionUniformity( + *UserInstr, getOperandUniformities(*UserInstr)); + if (!Uniformity || *Uniformity == InstructionUniformity::Default) + markDivergent(*UserInstr); // fallback: conservative + else if (*Uniformity == InstructionUniformity::NeverUniform) + markDivergent(*UserInstr); + else if (*Uniformity == InstructionUniformity::AlwaysUniform) + addUniformOverride(*UserInstr); } } @@ -101,19 +132,6 @@ void llvm::GenericUniformityAnalysisImpl< } } -template <> -bool llvm::GenericUniformityAnalysisImpl::isDivergentUse( - const Use &U) const { - const auto *V = U.get(); - if (isDivergent(V)) - return true; - if (const auto *DefInstr = dyn_cast(V)) { - const auto *UseInstr = cast(U.getUser()); - return isTemporalDivergent(*UseInstr->getParent(), *DefInstr); - } - return false; -} - // This ensures explicit instantiation of // GenericUniformityAnalysisImpl::ImplDeleter::operator() template class llvm::GenericUniformityInfo; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 5c59847dfeb62..57c8fdcf085ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1423,19 +1423,16 @@ void GCNTTIImpl::collectKernelLaunchBounds( LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second}); } -std::optional -GCNTTIImpl::getInstructionUniformity(const Instruction &I) const { +std::optional GCNTTIImpl::getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const { if (const auto *II = dyn_cast(&I)) { - // We can define the custom rules for the intrinsics uniformity, depending - // on argument. switch (II->getIntrinsicID()) { case Intrinsic::amdgcn_permlane64: - // If either operand is uniform, the result is uniform. - for (unsigned Arg_i = 0, NumArg = II->arg_size(); Arg_i < NumArg; - Arg_i++) { - if (!isSourceOfDivergence(II->getArgOperand(Arg_i))) - return InstructionUniformity::AlwaysUniform; - } + if (llvm::any_of(OperandUniformities, [](InstructionUniformity U) { + return U == InstructionUniformity::AlwaysUniform; + })) + return InstructionUniformity::AlwaysUniform; return InstructionUniformity::Default; default: break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index bea0b024d745b..af47cf4b8f068 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -290,8 +290,9 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; - std::optional - getInstructionUniformity(const Instruction &I) const override; + std::optional getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const override; }; } // end namespace llvm