From 858c7b9c04123c96fffec247d63e2ee68312ab23 Mon Sep 17 00:00:00 2001 From: hanhanW Date: Mon, 12 Aug 2024 11:34:47 -0700 Subject: [PATCH 1/2] move createXXXPass where it is defined (i.e., LLVMGPU/). Signed-off-by: hanhanW --- compiler/src/iree/compiler/Codegen/Common/Passes.h | 5 ----- compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.h b/compiler/src/iree/compiler/Codegen/Common/Passes.h index b6db41b616f1..e9ca9d81f1be 100644 --- a/compiler/src/iree/compiler/Codegen/Common/Passes.h +++ b/compiler/src/iree/compiler/Codegen/Common/Passes.h @@ -88,11 +88,6 @@ createTileAndDistributeToWorkgroupsPass( int32_t maxWorkgroupParallelDims, linalg::DistributionMethod distributionMethod); -// TODO(hanchung): Move it where it is defined (i.e., Codegen/LLVMGPU). -// Extract address computations (including the ones with GPU instructions) into -// their own separate instructions. -std::unique_ptr createExtractAddressComputationGPUPass(); - //----------------------------------------------------------------------------// // CodeGen Common Patterns //----------------------------------------------------------------------------// diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h index fb8427502278..527317cfb253 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h @@ -130,6 +130,10 @@ createLLVMGPUCastTypeToFitMMAPass(); std::unique_ptr> createLLVMGPUDistribute(); +// Extract address computations (including the ones with GPU instructions) into +// their own separate instructions. +std::unique_ptr createExtractAddressComputationGPUPass(); + /// Create pass selecting the lowering strategy for LLVMGPU. std::unique_ptr> createLLVMGPUSelectLoweringStrategyPass(); From 354ab2ee13a7c9ad0eb108e66d4bfbe7ec2aef49 Mon Sep 17 00:00:00 2001 From: hanhanW Date: Tue, 13 Aug 2024 16:32:12 -0700 Subject: [PATCH 2/2] [LLVMGPU] Switch to new pass generation tablegen definitions. This is mostly an NFC change. The revision applies a little cleanups: - Switch a couple of passes to follow `create.*Pass` naming convention. Signed-off-by: hanhanW --- .../LLVMGPU/AMDGPUChainedMatmulPass.cpp | 15 +-- .../iree/compiler/Codegen/LLVMGPU/BUILD.bazel | 2 - .../compiler/Codegen/LLVMGPU/CMakeLists.txt | 2 - .../Codegen/LLVMGPU/ConvertToLLVM.cpp | 13 +- .../Codegen/LLVMGPU/ConvertToNVVM.cpp | 15 ++- .../Codegen/LLVMGPU/ConvertToROCDL.cpp | 15 ++- .../ExtractAddressComputationGPUPass.cpp | 15 +-- .../LLVMGPUCastAddressSpaceFunction.cpp | 15 +-- .../LLVMGPU/LLVMGPUCastTypeToFitMMA.cpp | 14 +- .../LLVMGPU/LLVMGPUConfigureVectorLayouts.cpp | 15 +-- .../LLVMGPU/LLVMGPULowerExecutableTarget.cpp | 20 ++- .../LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp | 10 +- .../Codegen/LLVMGPU/LLVMGPUPrefetching.cpp | 13 +- .../LLVMGPU/LLVMGPUPromoteMatmulToFitMMA.cpp | 11 +- .../LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp | 20 ++- .../LLVMGPUTensorCoreVectorization.cpp | 15 ++- .../Codegen/LLVMGPU/LLVMGPUTensorPad.cpp | 14 +- .../LLVMGPU/LLVMGPUTileAndDistribute.cpp | 15 ++- .../LLVMGPU/LLVMGPUVectorDistribute.cpp | 15 +-- .../Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp | 15 +-- .../Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp | 13 +- .../compiler/Codegen/LLVMGPU/PassDetail.h | 26 ---- .../iree/compiler/Codegen/LLVMGPU/Passes.cpp | 25 ++-- .../iree/compiler/Codegen/LLVMGPU/Passes.h | 121 +++--------------- .../iree/compiler/Codegen/LLVMGPU/Passes.td | 82 ++++++------ .../LLVMGPU/ROCDLLowerExecutableTarget.cpp | 16 +-- .../Codegen/LLVMGPU/ROCDLPassDetail.h | 21 --- .../compiler/Codegen/LLVMGPU/ROCDLPasses.h | 16 +-- .../compiler/Codegen/LLVMGPU/ROCDLPasses.td | 8 +- .../LLVMGPU/ROCDLSelectLoweringStrategy.cpp | 15 +-- 30 files changed, 224 insertions(+), 388 deletions(-) delete mode 100644 compiler/src/iree/compiler/Codegen/LLVMGPU/PassDetail.h delete mode 100644 compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPassDetail.h diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/AMDGPUChainedMatmulPass.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/AMDGPUChainedMatmulPass.cpp index d22bd206b053..6a1646e5c4ac 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/AMDGPUChainedMatmulPass.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/AMDGPUChainedMatmulPass.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" + #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/VectorOpUtils.h" #include "mlir/Analysis/SliceAnalysis.h" @@ -13,6 +13,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_AMDGPUPREPAREFORCHAINEDMATMULPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + using VectorValue = TypedValue; namespace { @@ -59,8 +62,8 @@ namespace { /// C = A @ B --> C.T = B.T @ A.T /// is only defined on standard "@" function, it may be a different /// transformation for other indexing maps. -struct AMDGPUPrepareForChainedMatmulPass - : public AMDGPUPrepareForChainedMatmulBase< +struct AMDGPUPrepareForChainedMatmulPass final + : impl::AMDGPUPrepareForChainedMatmulPassBase< AMDGPUPrepareForChainedMatmulPass> { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); @@ -255,10 +258,4 @@ struct AMDGPUPrepareForChainedMatmulPass }; } // namespace - -std::unique_ptr> -createAMDGPUPrepareForChainedMatmulPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel index c57e1f585186..9ef45c757d63 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel @@ -32,7 +32,6 @@ iree_gentbl_cc_library( iree_compiler_cc_library( name = "PassHeaders", hdrs = [ - "PassDetail.h", "Passes.h", "Passes.h.inc", ], @@ -69,7 +68,6 @@ iree_gentbl_cc_library( iree_compiler_cc_library( name = "ROCDLPassHeaders", hdrs = [ - "ROCDLPassDetail.h", "ROCDLPasses.h", "ROCDLPasses.h.inc", ], diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt index 30e722e39307..a5d3b0844462 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt @@ -23,7 +23,6 @@ iree_cc_library( NAME PassHeaders HDRS - "PassDetail.h" "Passes.h" "Passes.h.inc" DEPS @@ -52,7 +51,6 @@ iree_cc_library( NAME ROCDLPassHeaders HDRS - "ROCDLPassDetail.h" "ROCDLPasses.h" "ROCDLPasses.h.inc" DEPS diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp index 821623ddf609..a1112454fcb9 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.cpp @@ -6,7 +6,6 @@ #include "iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "iree/compiler/Codegen/Utils/Utils.h" @@ -25,6 +24,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_TESTLLVMGPUSCALARIZEMATHOPPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + void ConvertToDynamicSharedMemory(ModuleOp moduleOp) { SymbolTableCollection symbolTableCollection; // Collect all the adressOfOps to static shared memory globals. @@ -183,8 +185,9 @@ struct ConvertSharedMemAllocOp : public OpRewritePattern { /// Pass to test in dialect transformation used to legalize the IR before /// convertToNVVM/ConvertToROCDL. -class TestLLVMGPULegalizeOpPass - : public TestLLVMGPUScalarizeMathOpBase { +class TestLLVMGPULegalizeOpPass final + : public impl::TestLLVMGPUScalarizeMathOpPassBase< + TestLLVMGPULegalizeOpPass> { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } @@ -542,10 +545,6 @@ void populateLowerHALInterfaceOp(RewritePatternSet &patterns) { patterns.getContext()); } -std::unique_ptr> createTestLLVMGPULegalizePass() { - return std::make_unique(); -} - static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) { return IntegerAttr::get(IntegerType::get(ctx, 64), space); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp index edcb2bb59cfb..b1ac58b1b62f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp @@ -8,7 +8,6 @@ #include "iree/compiler/Codegen/Common/Transforms.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" #include "iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" @@ -24,6 +23,7 @@ #include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h" #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h" #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" @@ -35,6 +35,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_CONVERTTONVVMPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { /// A pass that replaces all occurrences of GPU device operations with their @@ -42,7 +45,10 @@ namespace { /// /// This pass only handles device code and is not meant to be run on GPU host /// code. -struct ConvertToNVVMPass : public ConvertToNVVMBase { +struct ConvertToNVVMPass final + : impl::ConvertToNVVMPassBase { + using impl::ConvertToNVVMPassBase::ConvertToNVVMPassBase; + void getDependentDialects(DialectRegistry ®istry) const override { registry .insert { }; } // namespace - -std::unique_ptr> createConvertToNVVMPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp index d66156ac4d24..215a4bb8a237 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp @@ -8,7 +8,6 @@ #include "iree/compiler/Codegen/Common/Transforms.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" #include "iree/compiler/Codegen/LLVMGPU/ConvertToLLVM.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h" @@ -40,6 +39,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_CONVERTTOROCDLPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + static llvm::cl::opt clROCMIndexingBits("iree-rocm-index-bits", llvm::cl::desc("Set the bit width of indices in ROCm."), @@ -75,7 +77,11 @@ static void populateConvertGPUToAMDGPUPatterns(RewritePatternSet &patterns) { /// /// This pass only handles device code and is not meant to be run on GPU host /// code. -struct ConvertToROCDLPass : public ConvertToROCDLBase { +struct ConvertToROCDLPass final + : impl::ConvertToROCDLPassBase { + using impl::ConvertToROCDLPassBase< + ConvertToROCDLPass>::ConvertToROCDLPassBase; + void getDependentDialects(DialectRegistry ®istry) const override { registry .insert { LDBG("After converting to dynamic shared memory\n" << m); } }; - -std::unique_ptr> createConvertToROCDLPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp index 6966f4127522..359aaff24d00 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ExtractAddressComputationGPUPass.cpp @@ -5,20 +5,22 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "iree/compiler/Codegen/Common/ExtractAddressComputation.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "llvm/Support/Debug.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #define DEBUG_TYPE "extract-address-computation-gpu" -using namespace mlir; - namespace mlir::iree_compiler { +#define GEN_PASS_DEF_EXTRACTADDRESSCOMPUTATIONGPUPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + //===----------------------------------------------------------------------===// // Helper functions for the `load base[off0...]` // => `load (subview base[off0...])[0...]` pattern. @@ -80,8 +82,8 @@ populateExtractAddressComputationGPUPatterns(RewritePatternSet &patterns) { // Pass registration //===----------------------------------------------------------------------===// namespace { -struct ExtractAddressComputationGPUPass - : public ExtractAddressComputationGPUBase< +struct ExtractAddressComputationGPUPass final + : impl::ExtractAddressComputationGPUPassBase< ExtractAddressComputationGPUPass> { void runOnOperation() override; }; @@ -96,7 +98,4 @@ void ExtractAddressComputationGPUPass::runOnOperation() { } } -std::unique_ptr createExtractAddressComputationGPUPass() { - return std::make_unique(); -} } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp index 624361fadec8..aad0618e54b4 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastAddressSpaceFunction.cpp @@ -4,7 +4,6 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" @@ -12,15 +11,19 @@ #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h" #include "mlir/Interfaces/FunctionInterfaces.h" +#include "mlir/Pass/Pass.h" #define DEBUG_TYPE "iree-llvmgpu-cast-address-space-function" namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUCASTADDRESSSPACEFUNCTIONPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { -struct LLVMGPUCastAddressSpaceFunctionPass - : public LLVMGPUCastAddressSpaceFunctionBase< +struct LLVMGPUCastAddressSpaceFunctionPass final + : impl::LLVMGPUCastAddressSpaceFunctionPassBase< LLVMGPUCastAddressSpaceFunctionPass> { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); @@ -75,10 +78,4 @@ struct LLVMGPUCastAddressSpaceFunctionPass }; } // namespace - -std::unique_ptr> -createLLVMGPUCastAddressSpaceFunction() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastTypeToFitMMA.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastTypeToFitMMA.cpp index 621430b7e064..013745ef072e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastTypeToFitMMA.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCastTypeToFitMMA.cpp @@ -6,7 +6,6 @@ #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUInterfaces.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/VectorOpUtils.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -21,6 +20,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUCASTTYPETOFITMMAPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { struct UpcastContractOutput final : OpRewritePattern { @@ -72,9 +74,8 @@ struct UpcastContractOutput final : OpRewritePattern { } }; -struct LLVMGPUCastTypeToFitMMAPass - : public LLVMGPUCastTypeToFitMMABase { -public: +struct LLVMGPUCastTypeToFitMMAPass final + : impl::LLVMGPUCastTypeToFitMMAPassBase { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); registry.insert(); @@ -114,9 +115,4 @@ struct LLVMGPUCastTypeToFitMMAPass } }; } // namespace -std::unique_ptr> -createLLVMGPUCastTypeToFitMMAPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureVectorLayouts.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureVectorLayouts.cpp index dee5e89b086b..b37d25af33e6 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureVectorLayouts.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUConfigureVectorLayouts.cpp @@ -9,7 +9,6 @@ #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" #include "iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtDialect.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "llvm/ADT/SetVector.h" @@ -27,6 +26,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUCONFIGUREVECTORLAYOUTSPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { // Sets an anchoring layout for the given contraction op. Looks for a @@ -279,10 +281,9 @@ LogicalResult setTransferReadAnchor(ArrayRef workgroupSize, return success(); } -struct LLVMGPUConfigureVectorLayoutsPass - : public LLVMGPUConfigureVectorLayoutsBase< +struct LLVMGPUConfigureVectorLayoutsPass final + : impl::LLVMGPUConfigureVectorLayoutsPassBase< LLVMGPUConfigureVectorLayoutsPass> { -public: void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); registry.insert(); @@ -360,10 +361,4 @@ struct LLVMGPUConfigureVectorLayoutsPass } }; } // namespace - -std::unique_ptr> -createLLVMGPUConfigureVectorLayouts() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp index 2b72c8ab40b5..dc96c92b0c50 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp @@ -10,7 +10,6 @@ #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" #include "iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtDialect.h" #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" @@ -39,6 +38,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPULOWEREXECUTABLETARGETPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { /// Lowers an hal.executable.variant operation to scalar/native-vector /// code. Invokes different compilation pipeline to @@ -46,10 +48,13 @@ namespace { /// - then convert to NVVM/ROCDL dialect. /// This should be merged with the equivalent pass in LinalgToLLVM. Fo /// simplicity it is currently a separate pass. -class LLVMGPULowerExecutableTargetPass - : public LLVMGPULowerExecutableTargetBase< +class LLVMGPULowerExecutableTargetPass final + : public impl::LLVMGPULowerExecutableTargetPassBase< LLVMGPULowerExecutableTargetPass> { public: + using impl::LLVMGPULowerExecutableTargetPassBase< + LLVMGPULowerExecutableTargetPass>::LLVMGPULowerExecutableTargetPassBase; + void getDependentDialects(DialectRegistry ®istry) const override { // clang-format off registry @@ -69,10 +74,6 @@ class LLVMGPULowerExecutableTargetPass // clang-format on } - LLVMGPULowerExecutableTargetPass() = default; - LLVMGPULowerExecutableTargetPass( - const LLVMGPULowerExecutableTargetPass &pass) {} - void runOnOperation() override; }; @@ -217,9 +218,4 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() { } } -std::unique_ptr> -createLLVMGPULowerExecutableTargetPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp index 8aaf2731a41a..a3db9be2049f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPackSharedMemoryAlloc.cpp @@ -4,19 +4,19 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include - -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h" #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUPACKSHAREDMEMORYALLOCPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { -struct LLVMGPUPackSharedMemoryAllocPass - : public LLVMGPUPackSharedMemoryAllocBase< +struct LLVMGPUPackSharedMemoryAllocPass final + : impl::LLVMGPUPackSharedMemoryAllocPassBase< LLVMGPUPackSharedMemoryAllocPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPrefetching.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPrefetching.cpp index 3f904be7752a..e131252c9307 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPrefetching.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPrefetching.cpp @@ -4,7 +4,6 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h" #include "llvm/ADT/SmallVector.h" @@ -15,10 +14,14 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUPREFETCHSHAREDMEMORYPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { struct LLVMGPUPrefetchSharedMemoryPass final - : LLVMGPUPrefetchSharedMemoryBase { + : impl::LLVMGPUPrefetchSharedMemoryPassBase< + LLVMGPUPrefetchSharedMemoryPass> { void runOnOperation() override { FunctionOpInterface funcOp = getOperation(); IRRewriter rewriter(funcOp.getContext()); @@ -37,10 +40,4 @@ struct LLVMGPUPrefetchSharedMemoryPass final }; } // namespace - -std::unique_ptr> -createLLVMGPUPrefetchSharedMemoryPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPromoteMatmulToFitMMA.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPromoteMatmulToFitMMA.cpp index 5e61991070df..26b96d7ad91b 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPromoteMatmulToFitMMA.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUPromoteMatmulToFitMMA.cpp @@ -4,7 +4,6 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" @@ -16,14 +15,18 @@ #define DEBUG_TYPE "iree-llvmgpu-promote-matmul-to-fit-mma" namespace mlir::iree_compiler { -#define GEN_PASS_DECL_LLVMGPUPROMOTEMATMULTOFITMMA + +#define GEN_PASS_DEF_LLVMGPUPROMOTEMATMULTOFITMMAPASS #include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { -class LLVMGPUPromoteMatmulToFitMMAPass - : public LLVMGPUPromoteMatmulToFitMMABase< +class LLVMGPUPromoteMatmulToFitMMAPass final + : public impl::LLVMGPUPromoteMatmulToFitMMAPassBase< LLVMGPUPromoteMatmulToFitMMAPass> { public: + using impl::LLVMGPUPromoteMatmulToFitMMAPassBase< + LLVMGPUPromoteMatmulToFitMMAPass>::LLVMGPUPromoteMatmulToFitMMAPassBase; explicit LLVMGPUPromoteMatmulToFitMMAPass( const LLVMGPUMatmulPadOption &option) { this->targetDimensions.setValue(option); diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp index b41a5deef11a..a6d630717bb6 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp @@ -7,7 +7,6 @@ #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h" #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" @@ -28,13 +27,19 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUSELECTLOWERINGSTRATEGYPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { /// Selects a lowering strategy for taking a hal.executable.variant operation /// to scalar/native-vector code. -class LLVMGPUSelectLoweringStrategyPass - : public LLVMGPUSelectLoweringStrategyBase< +class LLVMGPUSelectLoweringStrategyPass final + : public impl::LLVMGPUSelectLoweringStrategyPassBase< LLVMGPUSelectLoweringStrategyPass> { public: + using impl::LLVMGPUSelectLoweringStrategyPassBase< + LLVMGPUSelectLoweringStrategyPass>::LLVMGPUSelectLoweringStrategyPassBase; + void getDependentDialects(DialectRegistry ®istry) const override { // TODO(qedawkins): Once TransformStrategies is deprecated, drop the // unnecessary dialect registrations. @@ -56,10 +61,6 @@ class LLVMGPUSelectLoweringStrategyPass // clang-format on } - LLVMGPUSelectLoweringStrategyPass() = default; - LLVMGPUSelectLoweringStrategyPass( - const LLVMGPUSelectLoweringStrategyPass &pass) {} - void runOnOperation() override; }; } // namespace @@ -116,9 +117,4 @@ void LLVMGPUSelectLoweringStrategyPass::runOnOperation() { } } -std::unique_ptr> -createLLVMGPUSelectLoweringStrategyPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp index 3ed9b705bbd7..aa72280e5fcc 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorCoreVectorization.cpp @@ -6,7 +6,6 @@ #include "iree/compiler/Codegen/Common/GPU/GPUPatterns.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" @@ -26,6 +25,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUTENSORCOREVECTORIZATIONPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + //====---------------------------------------------------------------------===// // Patterns for vectorization //====---------------------------------------------------------------------===// @@ -66,11 +68,16 @@ static void populateVectorUnrollPatterns(RewritePatternSet &patterns, } namespace { -struct LLVMGPUTensorCoreVectorizationPass - : public LLVMGPUTensorCoreVectorizationBase< +class LLVMGPUTensorCoreVectorizationPass final + : public impl::LLVMGPUTensorCoreVectorizationPassBase< LLVMGPUTensorCoreVectorizationPass> { - LLVMGPUTensorCoreVectorizationPass(GPUTensorCoreType tensorCoreType) +public: + using impl::LLVMGPUTensorCoreVectorizationPassBase< + LLVMGPUTensorCoreVectorizationPass>:: + LLVMGPUTensorCoreVectorizationPassBase; + explicit LLVMGPUTensorCoreVectorizationPass(GPUTensorCoreType tensorCoreType) : tensorCoreType(tensorCoreType) {} + void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp index cc49c958b14c..1a4ffe4964d4 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp @@ -4,7 +4,6 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "iree/compiler/Codegen/Utils/LinalgOpInfo.h" @@ -21,6 +20,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUTENSORPADPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { static FailureOr> @@ -108,8 +110,8 @@ static bool hasTwoOrThreeLoopsInfo(linalg::LinalgOp linalgOp) { linalgOp.getNumParallelLoops() <= 3; } -struct LLVMGPUTensorPadPass - : public LLVMGPUTensorPadBase { +struct LLVMGPUTensorPadPass final + : impl::LLVMGPUTensorPadPassBase { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } @@ -166,10 +168,4 @@ struct LLVMGPUTensorPadPass } }; } // namespace - -std::unique_ptr> -createLLVMGPUTensorPadPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp index bcbe15a4145c..ac3cec65fa55 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp @@ -7,7 +7,6 @@ #include "iree/compiler/Codegen/Common/GPU/GPUPatterns.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" #include "iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" @@ -15,6 +14,7 @@ #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/SCF/Transforms/Patterns.h" @@ -25,6 +25,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUTILEANDDISTRIBUTEPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + /// Tiles to workgroup level. Workgroup tiling is done at the flow level but we /// may have extra tiling for the reduction dimension. Therefore we tile again /// without distributing. @@ -192,15 +195,19 @@ static LogicalResult tileToInvocation(mlir::FunctionOpInterface funcOp, } namespace { -struct LLVMGPUTileAndDistributePass - : public LLVMGPUTileAndDistributeBase { +class LLVMGPUTileAndDistributePass final + : public impl::LLVMGPUTileAndDistributePassBase< + LLVMGPUTileAndDistributePass> { private: // Distribute the workloads to warp if true otherwise distribute to threads. bool distributeToWarp = false; public: + using impl::LLVMGPUTileAndDistributePassBase< + LLVMGPUTileAndDistributePass>::LLVMGPUTileAndDistributePassBase; LLVMGPUTileAndDistributePass(bool distributeToWarp) : distributeToWarp(distributeToWarp) {} + void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } @@ -304,7 +311,7 @@ struct LLVMGPUTileAndDistributePass } // namespace std::unique_ptr> -createLLVMGPUTileAndDistribute(bool distributeToWarp) { +createLLVMGPUTileAndDistributePass(bool distributeToWarp) { return std::make_unique(distributeToWarp); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorDistribute.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorDistribute.cpp index e4a9a81708ca..466d7bd1bf80 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorDistribute.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorDistribute.cpp @@ -8,7 +8,6 @@ #include "iree/compiler/Codegen/Common/GPU/GPUVectorDistribution.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" #include "iree/compiler/Codegen/Dialect/VectorExt/IR/VectorExtDialect.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" @@ -24,6 +23,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUVECTORDISTRIBUTEPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + namespace { class ContractionVectorLayoutOptions : public VectorLayoutOptions { @@ -44,9 +46,8 @@ class ContractionVectorLayoutOptions : public VectorLayoutOptions { RewritePatternSet patterns; }; -struct LLVMGPUVectorDistributePass - : public LLVMGPUVectorDistributeBase { -public: +struct LLVMGPUVectorDistributePass final + : impl::LLVMGPUVectorDistributePassBase { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); registry.insert(); @@ -116,10 +117,4 @@ struct LLVMGPUVectorDistributePass } }; } // namespace - -std::unique_ptr> -createLLVMGPUVectorDistribute() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp index 689393c2b42f..bcc2d00c69bd 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorLowering.cpp @@ -4,9 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/MemRef/Transforms/Transforms.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" @@ -16,13 +16,16 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUVECTORLOWERINGPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + //====---------------------------------------------------------------------===// // Patterns for late vector op lowering. //====---------------------------------------------------------------------===// namespace { -struct LLVMGPUVectorLoweringPass - : public LLVMGPUVectorLoweringBase { +struct LLVMGPUVectorLoweringPass final + : impl::LLVMGPUVectorLoweringPassBase { void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); registry.insert(); @@ -70,10 +73,4 @@ struct LLVMGPUVectorLoweringPass } }; } // namespace - -std::unique_ptr> -createLLVMGPUVectorLoweringPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp index 4941ee3e8a24..ca631379867c 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUVectorToGPU.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "iree/compiler/Codegen/Common/GPU/GPUPatterns.h" -#include "iree/compiler/Codegen/LLVMGPU/PassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" #include "iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" @@ -21,6 +20,9 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_LLVMGPUVECTORTOGPUPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + static void swizzleSharedMemory(mlir::FunctionOpInterface funcOp) { SmallVector shmAllocOps; funcOp->walk([&](memref::AllocOp allocOp) { @@ -38,10 +40,13 @@ static void swizzleSharedMemory(mlir::FunctionOpInterface funcOp) { } namespace { -struct LLVMGPUVectorToGPUPass - : public LLVMGPUVectorToGPUBase { +struct LLVMGPUVectorToGPUPass final + : impl::LLVMGPUVectorToGPUPassBase { + using impl::LLVMGPUVectorToGPUPassBase< + LLVMGPUVectorToGPUPass>::LLVMGPUVectorToGPUPassBase; LLVMGPUVectorToGPUPass(GPUTensorCoreType tensorCoreType) : tensorCoreType(tensorCoreType) {} + void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); @@ -103,7 +108,7 @@ struct LLVMGPUVectorToGPUPass } // namespace std::unique_ptr> -createLLVMGPUVectorToGPU(GPUTensorCoreType tensorCoreType) { +createLLVMGPUVectorToGPUPass(GPUTensorCoreType tensorCoreType) { return std::make_unique(tensorCoreType); } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/PassDetail.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/PassDetail.h deleted file mode 100644 index e042deb407f0..000000000000 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/PassDetail.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2023 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_COMPILER_CODEGEN_LLVMGPU_PASS_DETAIL_H_ -#define IREE_COMPILER_CODEGEN_LLVMGPU_PASS_DETAIL_H_ - -#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" -#include "iree/compiler/Codegen/LLVMGPU/Passes.h" -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" -#include "mlir/Interfaces/FunctionInterfaces.h" -#include "mlir/Pass/Pass.h" - -namespace mlir::iree_compiler { - -#define GEN_PASS_CLASSES -#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" - -} // namespace mlir::iree_compiler - -#endif // IREE_COMPILER_CODEGEN_LLVMGPU_PASS_DETAIL_H_ diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp index 8b74d1b4a3d4..294e3b81dd57 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp @@ -497,7 +497,7 @@ void addGPUMatmulTensorCorePassPipeline(OpPassManager &funcPassManager, // Distribute linalg onto warps within the workgroup. funcPassManager.addPass( - createLLVMGPUTileAndDistribute(/*distributeToWarp=*/true)); + createLLVMGPUTileAndDistributePass(/*distributeToWarp=*/true)); funcPassManager.addPass(createRemoveSingleIterationLoopPass()); if (pipelineDepth > 1) { funcPassManager.addPass(createGPUMultiBufferingPass( @@ -518,7 +518,8 @@ void addGPUMatmulTensorCorePassPipeline(OpPassManager &funcPassManager, funcPassManager.addPass(createCSEPass()); // Linalg -> vector - funcPassManager.addPass(createLLVMGPUTensorCoreVectorizationPass()); + funcPassManager.addPass( + createLLVMGPUTensorCoreVectorizationPass(GPUTensorCoreType::WMMA)); funcPassManager.addPass(memref::createFoldMemRefAliasOpsPass()); funcPassManager.addPass(createCSEPass()); funcPassManager.addPass(createOptimizeVectorTransferPass()); @@ -537,7 +538,8 @@ void addGPUMatmulTensorCorePassPipeline(OpPassManager &funcPassManager, funcPassManager.addPass(memref::createFoldMemRefAliasOpsPass()); funcPassManager.addPass(createCanonicalizerPass()); funcPassManager.addPass(createCSEPass()); - funcPassManager.addPass(createLLVMGPUVectorToGPU()); + funcPassManager.addPass( + createLLVMGPUVectorToGPUPass(GPUTensorCoreType::WMMA)); funcPassManager.addPass(createCanonicalizerPass()); funcPassManager.addPass(createCSEPass()); @@ -551,7 +553,7 @@ void addGPUMatmulTensorCorePassPipeline(OpPassManager &funcPassManager, llvm::to_underlying(PipeliningSchedulingStrategy::loadGlobalStage0); funcPassManager.addPass(createGPUPipeliningPass(pipelieningOptions)); // Optimize shared memory usage. - funcPassManager.addPass(createLLVMGPUPackSharedMemoryAlloc()); + funcPassManager.addPass(createLLVMGPUPackSharedMemoryAllocPass()); } //===---------------------------------------------------------------------===// @@ -565,7 +567,7 @@ void addGPUMatmulTensorCoreMmaSyncPassPipeline( // Distribute linalg onto warps within the workgroup. funcPassManager.addPass( - createLLVMGPUTileAndDistribute(/*distributeToWarp=*/true)); + createLLVMGPUTileAndDistributePass(/*distributeToWarp=*/true)); funcPassManager.addPass(createRemoveSingleIterationLoopPass()); if (pipelineDepth > 1) { funcPassManager.addPass(createGPUMultiBufferingPass( @@ -604,7 +606,7 @@ void addGPUMatmulTensorCoreMmaSyncPassPipeline( funcPassManager.addPass(createCanonicalizerPass()); funcPassManager.addPass(createCSEPass()); funcPassManager.addPass( - createLLVMGPUVectorToGPU(GPUTensorCoreType::MMA_SYNC)); + createLLVMGPUVectorToGPUPass(GPUTensorCoreType::MMA_SYNC)); funcPassManager.addPass(createCanonicalizerPass()); funcPassManager.addPass(createCSEPass()); @@ -618,7 +620,7 @@ void addGPUMatmulTensorCoreMmaSyncPassPipeline( llvm::to_underlying(PipeliningSchedulingStrategy::nvidiaTensorCore); funcPassManager.addPass(createGPUPipeliningPass(pipelieningOptions)); // Optimize shared memory usage. - funcPassManager.addPass(createLLVMGPUPackSharedMemoryAlloc()); + funcPassManager.addPass(createLLVMGPUPackSharedMemoryAllocPass()); } //===---------------------------------------------------------------------===// @@ -801,8 +803,8 @@ void addGPUVectorDistributePassPipeline(OpPassManager &funcPassManager, funcPassManager.addPass(createAMDGPUPrepareForChainedMatmulPass()); // Vector SIMD -> Vector SIMT - funcPassManager.addPass(createLLVMGPUConfigureVectorLayouts()); - funcPassManager.addPass(createLLVMGPUVectorDistribute()); + funcPassManager.addPass(createLLVMGPUConfigureVectorLayoutsPass()); + funcPassManager.addPass(createLLVMGPUVectorDistributePass()); funcPassManager.addPass(createCanonicalizerPass()); funcPassManager.addPass(createCSEPass()); @@ -889,7 +891,8 @@ void addGPUSimpleDistributePassPipeline(OpPassManager &funcPassManager) { tileAndBufferize(funcPassManager); // Distribute linalg onto threads within the workgroup. - funcPassManager.addPass(createLLVMGPUTileAndDistribute()); + funcPassManager.addPass( + createLLVMGPUTileAndDistributePass(/*distributeToWarp=*/false)); funcPassManager.addPass(createCanonicalizerPass()); funcPassManager.addPass(createCSEPass()); @@ -1019,7 +1022,7 @@ static void addLowerToLLVMGPUPasses(OpPassManager &modulePassManager, // Strip out the debug info for the kernel. modulePassManager.addPass(createStripDebugInfoPass()); // Cast address spaces of all function arguments to generic. - modulePassManager.addPass(createLLVMGPUCastAddressSpaceFunction()); + modulePassManager.addPass(createLLVMGPUCastAddressSpaceFunctionPass()); if (forROCDL) { // convert to ROCDL. diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h index 527317cfb253..2804a32f5533 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h @@ -42,9 +42,9 @@ struct LLVMGPUPipelineOptions { llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const LLVMGPUPipelineOptions &options); -//===----------------------------------------------------------------------===// -// Passes -//===----------------------------------------------------------------------===// +//----------------------------------------------------------------------------// +// LLVMGPU backend Pass Pipelines. +//----------------------------------------------------------------------------// /// Lowering using SIMT CUDA core operations. void addGPUMatmulSimtPassPipeline(OpPassManager &funcPassManager, @@ -112,126 +112,43 @@ void buildLLVMGPUCodegenConfigurationPassPipeline( void buildLLVMGPUCodegenPassPipeline(OpPassManager &variantPassManagery, bool useROCM); -/// Performs the final conversion to NNVM+LLVM dialect. -std::unique_ptr> createConvertToNVVMPass(); - -/// Performs the final conversion to ROCDL+LLVM dialect. -std::unique_ptr> createConvertToROCDLPass(); - -/// Cast address space to generic in CallOp and FuncOp -std::unique_ptr> -createLLVMGPUCastAddressSpaceFunction(); - -/// Perform type extension/truncation over vector.contract types to target GPU -/// MMA intrinsics. -std::unique_ptr> -createLLVMGPUCastTypeToFitMMAPass(); - -std::unique_ptr> -createLLVMGPUDistribute(); - -// Extract address computations (including the ones with GPU instructions) into -// their own separate instructions. -std::unique_ptr createExtractAddressComputationGPUPass(); - -/// Create pass selecting the lowering strategy for LLVMGPU. -std::unique_ptr> -createLLVMGPUSelectLoweringStrategyPass(); - -/// Create pass calling the dynamic pipeline for LLVMGPU. -std::unique_ptr> -createLLVMGPULowerExecutableTargetPass(); - -// Pass to pack shared memory allocations in order to reduce shared memory -// usage. -std::unique_ptr> -createLLVMGPUPackSharedMemoryAlloc(); +/// Lowering calling vectorization patterns. +LogicalResult +verifyGPUMatmulPipeline(Operation *op, + IREE::Codegen::LoweringConfigAttr loweringConfig, + IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize); -std::unique_ptr> -createLLVMGPUPrefetchSharedMemoryPass(); +//------------------------------------------------------------------------------ +// Wrappers that not use tablegen options. +//------------------------------------------------------------------------------ -/// Pass to pad operations on tensors in top-down order. enum class LLVMGPUMatmulPadOption { ParallelDims, ReductionDims }; std::unique_ptr> -createLLVMGPUPromoteMatmulToFitMMAPass( - LLVMGPUMatmulPadOption option = LLVMGPUMatmulPadOption::ParallelDims); - -// Pass to set layouts for vector distribution. -std::unique_ptr> -createLLVMGPUConfigureVectorLayouts(); +createLLVMGPUPromoteMatmulToFitMMAPass(LLVMGPUMatmulPadOption option); enum class GPUTensorCoreType { WMMA = 0, MMA_SYNC = 1, }; -/// Convert Linalg ops to Vector and prepare converstion to GPU MMA ops. -std::unique_ptr> -createLLVMGPUTensorCoreVectorizationPass( - GPUTensorCoreType tensorCoreType = GPUTensorCoreType::WMMA); - -//. Pass to pad out tensors up to static dimensions. std::unique_ptr> -createLLVMGPUTensorPadPass(); -/// Perform tiling and distribution to threads. +createLLVMGPUTensorCoreVectorizationPass(GPUTensorCoreType tensorCoreType); std::unique_ptr> -createLLVMGPUTileAndDistribute(bool distributeToWarp = false); +createLLVMGPUVectorToGPUPass(GPUTensorCoreType tensorCoreType); -// Pass to distribute vectorized functions. std::unique_ptr> -createLLVMGPUVectorDistribute(); - -/// Lower vector ops before convertion to LLVM. -std::unique_ptr> -createLLVMGPUVectorLoweringPass(); - -/// Converts vector ops to gpu dialect. -std::unique_ptr> createLLVMGPUVectorToGPU( - GPUTensorCoreType tensorCoreType = GPUTensorCoreType::WMMA); - -/// Lowering calling vectorization patterns. -LogicalResult -verifyGPUMatmulPipeline(Operation *op, - IREE::Codegen::LoweringConfigAttr loweringConfig, - IREE::Codegen::TranslationInfoAttr translationInfo, - ArrayRef workgroupSize); - -/// Given a chain of matmuls with some or no operations -/// in between, like -/// -/// d = matmul_transpose_b(a, b) + c -/// ... -/// e = matmul_transpose_b(d, f) + g -/// -/// this pattern transforms the above IR to -/// -/// c.t = transpose c -/// d = matmul_transpose_b(b, a) + c.t -/// d.t = transpose d -/// ... -/// g.t = transpose g -/// e = matmul_transpose_b(f, d.t) + g.t -/// e.t = transpose e -/// -/// On CDNA architectures, where the layouts of the RHS and result -/// are the same and transposed from the LHS layout, this type -/// of transformation can avoid trips to shared memory/shuffle instructions -/// on operators like Flash Attention. -std::unique_ptr> -createAMDGPUPrepareForChainedMatmulPass(); +createLLVMGPUTileAndDistributePass(bool distributeToWarp); //----------------------------------------------------------------------------// // Register LLVMGPU Passes //----------------------------------------------------------------------------// -void registerCodegenLLVMGPUPasses(); +#define GEN_PASS_DECL +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" // IWYU pragma: keep -//------------------------------------------------------------------------------ -// Test passes -//------------------------------------------------------------------------------ - -std::unique_ptr> createTestLLVMGPULegalizePass(); +void registerCodegenLLVMGPUPasses(); } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td index 8ea7da497989..33df5d26beba 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td @@ -13,81 +13,93 @@ include "mlir/Pass/PassBase.td" // LLVMGPU Passes (keep alphabetical) //------------------------------------------------------------------------------ -def AMDGPUPrepareForChainedMatmul : +def AMDGPUPrepareForChainedMatmulPass : InterfacePass<"iree-amdgpu-prepare-chained-matmul", "mlir::FunctionOpInterface"> { let summary = "Pass to swap operands and transpose accumulator and result"; - let constructor = "mlir::iree_compiler::createAMDGPUPrepareForChainedMatmulPass()"; + let description = [{ + Given a chain of matmuls with some or no operations + in between, like + + d = matmul_transpose_b(a, b) + c + ... + e = matmul_transpose_b(d, f) + g + + this pattern transforms the above IR to + + c.t = transpose c + d = matmul_transpose_b(b, a) + c.t + d.t = transpose d + ... + g.t = transpose g + e = matmul_transpose_b(f, d.t) + g.t + e.t = transpose e + + On CDNA architectures, where the layouts of the RHS and result + are the same and transposed from the LHS layout, this type + of transformation can avoid trips to shared memory/shuffle instructions + on operators like Flash Attention. + }]; } // TODO: Bring the argument in line with the names used elsewhere. -def ConvertToNVVM : +def ConvertToNVVMPass : Pass<"iree-convert-to-nvvm", "ModuleOp"> { let summary = "Perform final conversion from builtin/GPU/HAL/standard dialect to LLVM " "and NVVM dialects"; - let constructor = "mlir::iree_compiler::createConvertToNVVMPass()"; } // TODO: Bring the argument in line with the names used elsewhere. -def ConvertToROCDL : +def ConvertToROCDLPass : Pass<"iree-convert-to-rocdl", "ModuleOp"> { let summary = "Perform final conversion from builtin/GPU/HAL/standard dialect to LLVM " "and ROCDL dialects"; - let constructor = "mlir::iree_compiler::createConvertToROCDLPass()"; } -def ExtractAddressComputationGPU: Pass<"extract-address-computation-gpu"> { +def ExtractAddressComputationGPUPass: Pass<"extract-address-computation-gpu"> { let summary = "Extract address computations from memory accesses"; let description = [{ This pass is similar to `extract-address-computation` except it also supports memory accesses that are specific to GPUs. }]; - let constructor = "mlir::iree_compiler::createExtractAddressComputationGPUPass()"; let dependentDialects = [ "memref::MemRefDialect", "nvgpu::NVGPUDialect", "affine::AffineDialect" ]; } -def LLVMGPUCastAddressSpaceFunction : +def LLVMGPUCastAddressSpaceFunctionPass : Pass<"iree-llvmgpu-cast-address-space-function", "ModuleOp"> { let summary = "Cast address space to generic in CallOp and FuncOp"; - let constructor = "mlir::iree_compiler::createLLVMGPUCastAddressSpaceFunction()"; } -def LLVMGPUCastTypeToFitMMA : InterfacePass<"iree-llvmgpu-cast-type-to-fit-mma", - "mlir::FunctionOpInterface"> { +def LLVMGPUCastTypeToFitMMAPass : InterfacePass<"iree-llvmgpu-cast-type-to-fit-mma", + "mlir::FunctionOpInterface"> { let summary = "Perform type extension/truncation over vector.contract types " "to target GPU MMA intrinsics"; - let constructor = "mlir::iree_compiler::createLLVMGPUCastTypeToFitMMAPass()"; } -def LLVMGPUConfigureVectorLayouts : +def LLVMGPUConfigureVectorLayoutsPass : InterfacePass<"iree-llvmgpu-configure-vector-layouts", "mlir::FunctionOpInterface"> { let summary = "Pass to set layouts for vector distribution"; - let constructor = "mlir::iree_compiler::createLLVMGPUConfigureVectorLayouts()"; } -def LLVMGPULowerExecutableTarget : +def LLVMGPULowerExecutableTargetPass : InterfacePass<"iree-llvmgpu-lower-executable-target", "mlir::FunctionOpInterface"> { let summary = "Perform lowering of executable target using one of the IREE::HAL::DispatchLoweringPassPipeline"; - let constructor = "mlir::iree_compiler::createLLVMGPULowerExecutableTargetPass()"; } -def LLVMGPUPackSharedMemoryAlloc : +def LLVMGPUPackSharedMemoryAllocPass : InterfacePass<"iree-llvmgpu-pack-shared-memory-alloc", "mlir::FunctionOpInterface"> { let summary = "Pass pack shared memory allocation in order to reduce memory usage."; - let constructor = "mlir::iree_compiler::createLLVMGPUPackSharedMemoryAlloc()"; } -def LLVMGPUPrefetchSharedMemory : +def LLVMGPUPrefetchSharedMemoryPass : InterfacePass<"iree-llvmgpu-prefetch-shared-memory", "mlir::FunctionOpInterface"> { let summary = "Rotate scf.for loops to prefetch shared memory with distance 1"; - let constructor = "mlir::iree_compiler::createLLVMGPUPrefetchSharedMemoryPass()"; } -def LLVMGPUPromoteMatmulToFitMMA : +def LLVMGPUPromoteMatmulToFitMMAPass : InterfacePass<"iree-llvmgpu-promote-matmul-to-fit-mma", "mlir::FunctionOpInterface"> { let summary = "Pass to promote contraction ops to fit mma shapes"; - let constructor = "mlir::iree_compiler::createLLVMGPUPromoteMatmulToFitMMAPass()"; let options = [ Option<"targetDimensions", "target-dimensions", "mlir::iree_compiler::LLVMGPUMatmulPadOption", /*default=*/"mlir::iree_compiler::LLVMGPUMatmulPadOption::ParallelDims", @@ -103,56 +115,48 @@ def LLVMGPUPromoteMatmulToFitMMA : ]; } -def LLVMGPUSelectLoweringStrategy : +def LLVMGPUSelectLoweringStrategyPass : Pass<"iree-llvmgpu-select-lowering-strategy", "ModuleOp"> { let summary = "Select a IREE::HAL::DispatchLoweringPassPipeline for lowering the target variant"; - let constructor = "mlir::iree_compiler::createLLVMGPUSelectLoweringStrategyPass()"; } -def LLVMGPUTensorCoreVectorization : +def LLVMGPUTensorCoreVectorizationPass : InterfacePass<"iree-llvmgpu-tensorcore-vectorization", "mlir::FunctionOpInterface"> { let summary = "Pass to convert linalg into Vector and transform it to a form that can be lowered to GPU MMA ops"; - let constructor = "mlir::iree_compiler::createLLVMGPUTensorCoreVectorizationPass()"; } -def LLVMGPUTensorPad : +def LLVMGPUTensorPadPass : InterfacePass<"iree-llvmgpu-tensor-pad", "mlir::FunctionOpInterface"> { let summary = "Pass to pad out tensors up to static dimensions."; - let constructor = "mlir::iree_compiler::createLLVMGPUTensorPadPass()"; } -def LLVMGPUTileAndDistribute : +def LLVMGPUTileAndDistributePass : InterfacePass<"iree-llvmgpu-tile-and-distribute", "mlir::FunctionOpInterface"> { let summary = "Pass to tile and distribute linalg ops within a workgroup."; - let constructor = "mlir::iree_compiler::createLLVMGPUTileAndDistribute()"; } -def LLVMGPUVectorDistribute : +def LLVMGPUVectorDistributePass : InterfacePass<"iree-llvmgpu-vector-distribute", "mlir::FunctionOpInterface"> { let summary = "Pass to distribute vectorized functions."; - let constructor = "mlir::iree_compiler::createLLVMGPUVectorDistribute()"; } -def LLVMGPUVectorLowering : +def LLVMGPUVectorLoweringPass : InterfacePass<"iree-llvmgpu-vector-lowering", "mlir::FunctionOpInterface"> { let summary = "Pass to lower Vector ops before conversion to LLVM."; - let constructor = "mlir::iree_compiler::createLLVMGPUVectorLoweringPass()"; } -def LLVMGPUVectorToGPU : +def LLVMGPUVectorToGPUPass : InterfacePass<"iree-llvmgpu-vector-to-gpu", "mlir::FunctionOpInterface"> { let summary = "Pass to convert vector to gpu."; - let constructor = "mlir::iree_compiler::createLLVMGPUVectorToGPU()"; } //------------------------------------------------------------------------------ // Test Passes //------------------------------------------------------------------------------ -def TestLLVMGPUScalarizeMathOp : +def TestLLVMGPUScalarizeMathOpPass : Pass<"iree-test-llvmgpu-legalize-ops", "ModuleOp"> { let summary = "Test pass for several legalization patterns."; - let constructor = "mlir::iree_compiler::createTestLLVMGPULegalizePass()"; } #endif // IREE_CODEGEN_LLVMGPU_PASSES diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLLowerExecutableTarget.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLLowerExecutableTarget.cpp index b16c4ddd9412..e9dfc6a5e9ac 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLLowerExecutableTarget.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLLowerExecutableTarget.cpp @@ -7,8 +7,6 @@ #include "iree/compiler/Codegen/Common/PassUtils.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" #include "iree/compiler/Codegen/LLVMGPU/Passes.h" -#include "iree/compiler/Codegen/LLVMGPU/ROCDLPassDetail.h" -#include "iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -24,13 +22,17 @@ namespace mlir::iree_compiler { +#define GEN_PASS_DEF_ROCDLLOWEREXECUTABLETARGETPASS +#include "iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h.inc" + namespace { using CodeGenPipeline = IREE::Codegen::DispatchLoweringPassPipeline; /// Lowers an IREE hal.executable.variant operation using a suitable pass /// pipeline. -class ROCDLLowerExecutableTargetPass - : public ROCDLLowerExecutableTargetBase { +class ROCDLLowerExecutableTargetPass final + : public impl::ROCDLLowerExecutableTargetPassBase< + ROCDLLowerExecutableTargetPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { registry @@ -82,10 +84,4 @@ class ROCDLLowerExecutableTargetPass } }; } // namespace - -std::unique_ptr> -createROCDLLowerExecutableTargetPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPassDetail.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPassDetail.h deleted file mode 100644 index 067cfead96ed..000000000000 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPassDetail.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#ifndef IREE_COMPILER_CODEGEN_LLVMGPU_ROCDLPASSDETAIL_H_ -#define IREE_COMPILER_CODEGEN_LLVMGPU_ROCDLPASSDETAIL_H_ - -#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/Pass/Pass.h" - -namespace mlir::iree_compiler { - -#define GEN_PASS_CLASSES -#include "iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h.inc" - -} // namespace mlir::iree_compiler - -#endif // IREE_COMPILER_CODEGEN_LLVMGPU_ROCDLPASSDETAIL_H_ diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h index 122eebf71290..696a6c0fab19 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h @@ -12,23 +12,13 @@ namespace mlir::iree_compiler { -//===----------------------------------------------------------------------===// -// Passes -//===----------------------------------------------------------------------===// - -/// Creates a pass that calls a dynamic pipeline to progressively lower Linalg -/// with tensor semantics to ROCDL. -std::unique_ptr> -createROCDLLowerExecutableTargetPass(); - -/// Creates a pass to select the lowering strategy for converting to ROCDL. -std::unique_ptr> -createROCDLSelectLoweringStrategyPass(); - //===----------------------------------------------------------------------===// // Pass Registration //===----------------------------------------------------------------------===// +#define GEN_PASS_DECL +#include "iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h.inc" // IWYU pragma: keep + // void registerCodegenROCDLPasses(); } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.td index 625ad1ada90b..bf91b6ebd084 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.td +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLPasses.td @@ -13,20 +13,16 @@ include "mlir/Pass/PassBase.td" // ROCDL Passes (keep alphabetical) //===----------------------------------------------------------------------===// -def ROCDLLowerExecutableTarget : InterfacePass< +def ROCDLLowerExecutableTargetPass : InterfacePass< "iree-rocdl-lower-executable-target", "mlir::FunctionOpInterface"> { let summary = "Lower an IREE hal.executable.variant op using a suitable " "pass pipeline"; - let constructor = - "mlir::iree_compiler::createROCDLLowerExecutableTargetPass()"; } -def ROCDLSelectLoweringStrategy : +def ROCDLSelectLoweringStrategyPass : Pass<"iree-rocdl-select-lowering-strategy", "ModuleOp"> { let summary = "Select a suitable lowering strategy for an IREE " "hal.executable.variant op"; - let constructor = - "mlir::iree_compiler::createROCDLSelectLoweringStrategyPass()"; } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLSelectLoweringStrategy.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLSelectLoweringStrategy.cpp index 8d291ffc5d58..65c855aa2e34 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLSelectLoweringStrategy.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ROCDLSelectLoweringStrategy.cpp @@ -7,17 +7,20 @@ #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" #include "iree/compiler/Codegen/LLVMGPU/ROCDLKernelConfig.h" -#include "iree/compiler/Codegen/LLVMGPU/ROCDLPassDetail.h" #include "iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Pass/Pass.h" namespace mlir::iree_compiler { +#define GEN_PASS_DEF_ROCDLSELECTLOWERINGSTRATEGYPASS +#include "iree/compiler/Codegen/LLVMGPU/ROCDLPasses.h.inc" + namespace { /// Selects a strategy for lowering an IREE hal.executable.variant to ROCDL. -class ROCDLSelectLoweringStrategyPass - : public ROCDLSelectLoweringStrategyBase { +class ROCDLSelectLoweringStrategyPass final + : public impl::ROCDLSelectLoweringStrategyPassBase< + ROCDLSelectLoweringStrategyPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { registry @@ -36,10 +39,4 @@ class ROCDLSelectLoweringStrategyPass } }; } // namespace - -std::unique_ptr> -createROCDLSelectLoweringStrategyPass() { - return std::make_unique(); -} - } // namespace mlir::iree_compiler